This is an archive of the discontinued LLVM Phabricator instance.

elmcdonough retitled this revision from [openmp] Parallel reduction LLVM IR generation to [openmp][mlir] Parallel reduction LLVM IR generation.Jul 12 2023, 11:35 PM

Harbormaster completed remote builds in B244999: Diff 539861.Jul 13 2023, 12:32 AM

Thanks @elmcdonough for the patch.

The reduction for the parallel construct should happen inside the parallel region (outlined function). At the moment, the code generated does this outside it.

Could you also check the case where the reduction happens in a nested region?

      subroutine diff(nelt)
      implicit none

      integer :: ie, nelt, rho1

      r1 = 0
!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ie) REDUCTION(+:r1)
!$OMP DO
       do ie=1,nelt
               r1            = r1 + ie
       end do
!$OMP END DO
!$OMP END PARALLEL

      end

This revision now requires changes to proceed.Jul 13 2023, 6:32 AM

Ensure that reduction happens in outlined function + account for nested WsLoopOp.

Harbormaster completed remote builds in B249603: Diff 546229.Aug 1 2023, 4:03 PM

LG.

Could you check that the tests in gfortran testuite pass for parallel reduction before submitting?
https://github.com/llvm/llvm-test-suite/blob/745f3fdfe3f2f28dcea34e3e55fdc55aca06c00d/Fortran/gfortran/regression/gomp/DisabledFiles.cmake#L111

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
355–368	Nit: Would this change work?

This revision is now accepted and ready to land.Aug 3 2023, 6:26 AM

elmcdonough updated this revision to Diff 550501.Aug 15 2023, 3:16 PM

Harbormaster completed remote builds in B252768: Diff 550501.Aug 15 2023, 4:21 PM

This revision was landed with ongoing or failed builds.Aug 15 2023, 10:59 PM

Closed by commit rGde7224399acd: [openmp][mlir] Parallel reduction LLVM IR generation (authored by elmcdonough). · Explain Why

This revision was automatically updated to reflect the committed changes.

elmcdonough added a commit: rGde7224399acd: [openmp][mlir] Parallel reduction LLVM IR generation.

elmcdonough mentioned this in D158054: [Fortran/gfortran][OpenMP] Enable parallel reduction tests.Aug 15 2023, 11:43 PM

elmcdonough mentioned this in rTdf04fba12583: [Fortran/gfortran][OpenMP] Enable parallel reduction tests.Aug 16 2023, 9:11 AM

Revision Contents

Path

Size

mlir/

include/

mlir/

Dialect/

OpenMP/

OpenMPOps.td

4 lines

lib/

Target/

LLVMIR/

Dialect/

OpenMP/

OpenMPToLLVMIRTranslation.cpp

352 lines

test/

Target/

LLVMIR/

openmp-reduction.mlir

56 lines

Diff 539861

mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td

Show First 20 Lines • Show All 182 Lines • ▼ Show 20 Lines	let arguments = (ins Optional<I1>:$if_expr_var,
OptionalAttr<SymbolRefArrayAttr>:$reductions,		OptionalAttr<SymbolRefArrayAttr>:$reductions,
OptionalAttr<ProcBindKindAttr>:$proc_bind_val);		OptionalAttr<ProcBindKindAttr>:$proc_bind_val);

let regions = (region AnyRegion:$region);		let regions = (region AnyRegion:$region);

let builders = [		let builders = [
OpBuilder<(ins CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>		OpBuilder<(ins CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>
];		];
		let extraClassDeclaration = [{
		/// Returns the number of reduction variables.
		unsigned getNumReductionVars() { return getReductionVars().size(); }
		}];
let assemblyFormat = [{		let assemblyFormat = [{
oilist( `reduction` `(`		oilist( `reduction` `(`
custom<ReductionVarList>(		custom<ReductionVarList>(
$reduction_vars, type($reduction_vars), $reductions		$reduction_vars, type($reduction_vars), $reductions
) `)`		) `)`
\| `if` `(` $if_expr_var `:` type($if_expr_var) `)`		\| `if` `(` $if_expr_var `:` type($if_expr_var) `)`
\| `num_threads` `(` $num_threads_var `:` type($num_threads_var) `)`		\| `num_threads` `(` $num_threads_var `:` type($num_threads_var) `)`
\| `allocate` `(`		\| `allocate` `(`
▲ Show 20 Lines • Show All 1,588 Lines • Show Last 20 Lines

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Show First 20 Lines • Show All 246 Lines • ▼ Show 20 Lines

static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {

case omp::ClauseProcBindKind::Primary:

return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary;

case omp::ClauseProcBindKind::Spread:

return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;

}

llvm_unreachable("Unknown ClauseProcBindKind kind");

}

/// Converts the OpenMP parallel operation to LLVM IR.

static LogicalResult

convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,

LLVM::ModuleTranslation &moduleTranslation) {

using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;

// TODO: support error propagation in OpenMPIRBuilder and use it instead of

// relying on captured variables.

LogicalResult bodyGenStatus = success();

auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {

// Save the alloca insertion point on ModuleTranslation stack for use in

// nested regions.

LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(

moduleTranslation, allocaIP);

// ParallelOp has only one region associated with it.

builder.restoreIP(codeGenIP);

convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,

moduleTranslation, bodyGenStatus);

};

// TODO: Perform appropriate actions according to the data-sharing

// attribute (shared, private, firstprivate, ...) of variables.

// Currently defaults to shared.

auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,

llvm::Value &, llvm::Value &vPtr,

llvm::Value *&replacementValue) -> InsertPointTy {

replacementValue = &vPtr;

return codeGenIP;

};

// TODO: Perform finalization actions for variables. This has to be

// called for variables which have destructors/finalizers.

auto finiCB = [&](InsertPointTy codeGenIP) {};

llvm::Value *ifCond = nullptr;

if (auto ifExprVar = opInst.getIfExprVar())

ifCond = moduleTranslation.lookupValue(ifExprVar);

llvm::Value *numThreads = nullptr;

if (auto numThreadsVar = opInst.getNumThreadsVar())

numThreads = moduleTranslation.lookupValue(numThreadsVar);

auto pbKind = llvm::omp::OMP_PROC_BIND_default;

if (auto bind = opInst.getProcBindVal())

pbKind = getProcBindKind(*bind);

// TODO: Is the Parallel construct cancellable?

bool isCancellable = false;

llvm::OpenMPIRBuilder::InsertPointTy allocaIP =

findAllocaInsertPoint(builder, moduleTranslation);

llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(

ompLoc, allocaIP, bodyGenCB, privCB, finiCB, ifCond, numThreads, pbKind,

isCancellable));

return bodyGenStatus;

}

/// Converts an OpenMP 'master' operation into LLVM IR using OpenMPIRBuilder.

static LogicalResult

convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,

LLVM::ModuleTranslation &moduleTranslation) {

using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;

// TODO: support error propagation in OpenMPIRBuilder and use it instead of

// relying on captured variables.

LogicalResult bodyGenStatus = success();

▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines

convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,

}

builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(

ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint));

return success();

}

/// Returns a reduction declaration that corresponds to the given reduction

/// operation in the given container. Currently only supports reductions inside

/// WsLoopOp but can be easily extended.

/// WsLoopOp and ParallelOp but can be easily extended.

static omp::ReductionDeclareOp findReductionDecl(omp::WsLoopOp container,

template <typename T>

static omp::ReductionDeclareOp findReductionDecl(T container,

omp::ReductionOp reduction) {

SymbolRefAttr reductionSymbol;

for (unsigned i = 0, e = container.getNumReductionVars(); i < e; ++i) {

if (container.getReductionVars()[i] != reduction.getAccumulator())

continue;

reductionSymbol = cast<SymbolRefAttr>((*container.getReductions())[i]);

break;

}

assert(reductionSymbol &&

"reduction operation must be associated with a declaration");

return SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(

container, reductionSymbol);

}

/// Populates `reductions` with reduction declarations used in the given loop.

template <typename T>

static void

collectReductionDecls(omp::WsLoopOp loop,

collectReductionDecls(T loop,

SmallVectorImpl<omp::ReductionDeclareOp> &reductions) {

std::optional<ArrayAttr> attr = loop.getReductions();

if (!attr)

return;

reductions.reserve(reductions.size() + loop.getNumReductionVars());

for (auto symbolRef : attr->getAsRange<SymbolRefAttr>()) {

reductions.push_back(

SymbolTable::lookupNearestSymbolFrom<omp::ReductionDeclareOp>(

loop, symbolRef));

}

/// Translates the blocks contained in the given region and appends them to at

/// the current insertion point of `builder`. The operations of the entry block

/// are appended to the current insertion block, which is not expected to have a

/// terminator. If set, `continuationBlockArgs` is populated with translated

/// values that correspond to the values omp.yield'ed from the region.

static LogicalResult inlineConvertOmpRegions(

Region &region, StringRef blockName, llvm::IRBuilderBase &builder,

kiranchandramohanUnsubmitted

Not Done

while (!declareOp.has_value() && container) {

// Check if current container is supported for reductions searches

if (auto par = dyn_cast<omp::ParallelOp>(*container)) {

declareOp = findReductionDeclInContainer(par, reduction);

} else if (auto loop = dyn_cast<omp::WsLoopOp>(*container)) {

declareOp = findReductionDeclInContainer(loop, reduction);

- }

- // See if we can search parent for reductions as well

- Operation *parent = containerOp.getParentOp();

- if (dyn_cast<omp::ParallelOp>(parent) || dyn_cast<omp::WsLoopOp>(parent)) {

- container = parent;

} else {

break;

}

+ // See if we can search parent for reductions as well

+ container = containerOp.getParentOp();

}

assert(declareOp.has_value() &&

Nit: Would this change work?

kiranchandramohan: Nit: Would this change work?

LLVM::ModuleTranslation &moduleTranslation,

SmallVectorImpl<llvm::Value *> *continuationBlockArgs = nullptr) {

if (region.empty())

return success();

// Special case for single-block regions that don't create additional blocks:

// insert operations without creating additional blocks.

if (llvm::hasSingleElement(region)) {

▲ Show 20 Lines • Show All 320 Lines • ▼ Show 20 Lines

convertOmpTaskgroupOp(omp::TaskGroupOp tgOp, llvm::IRBuilderBase &builder,

};

InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);

llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(

ompLoc, allocaIP, bodyCB));

return bodyGenStatus;

}

/// Allocate space for privatized reduction variables.

template <typename T>

static void

allocReductionVars(T loop, llvm::IRBuilderBase &builder,

LLVM::ModuleTranslation &moduleTranslation,

llvm::OpenMPIRBuilder::InsertPointTy &allocaIP,

SmallVector<omp::ReductionDeclareOp> &reductionDecls,

SmallVector<llvm::Value *> &privateReductionVariables,

DenseMap<Value, llvm::Value *> &reductionVariableMap) {

unsigned numReductions = loop.getNumReductionVars();

privateReductionVariables.reserve(numReductions);

if (numReductions != 0) {

llvm::IRBuilderBase::InsertPointGuard guard(builder);

builder.restoreIP(allocaIP);

for (unsigned i = 0; i < numReductions; ++i) {

llvm::Value *var = builder.CreateAlloca(

moduleTranslation.convertType(reductionDecls[i].getType()));

privateReductionVariables.push_back(var);

reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);

}

/// Initialize reduction variables

template <typename T>

static LogicalResult

initReductionVars(T loop, llvm::IRBuilderBase &builder,

LLVM::ModuleTranslation &moduleTranslation,

SmallVector<omp::ReductionDeclareOp> &reductionDecls,

SmallVector<llvm::Value *> &privateReductionVariables) {

// Before the loop, store the initial values of reductions into reduction

// variables. Although this could be done after allocas, we don't want to mess

// up with the alloca insertion point.

for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) {

SmallVector<llvm::Value *> phis;

if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),

"omp.reduction.neutral", builder,

moduleTranslation, &phis)))

return failure();

assert(phis.size() == 1 && "expected one value to be yielded from the "

"reduction neutral element declaration region");

builder.CreateStore(phis[0], privateReductionVariables[i]);

}

return success();

}

/// Collect reduction info

template <typename T>

static void collectReductionInfo(

T loop, llvm::IRBuilderBase &builder,

LLVM::ModuleTranslation &moduleTranslation,

SmallVector<omp::ReductionDeclareOp> &reductionDecls,

SmallVector<OwningReductionGen> &owningReductionGens,

SmallVector<OwningAtomicReductionGen> &owningAtomicReductionGens,

const SmallVector<llvm::Value *> &privateReductionVariables,

SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos) {

unsigned numReductions = loop.getNumReductionVars();

// Create the reduction generators. We need to own them here because

// ReductionInfo only accepts references to the generators.

for (unsigned i = 0; i < numReductions; ++i) {

owningReductionGens.push_back(

makeReductionGen(reductionDecls[i], builder, moduleTranslation));

owningAtomicReductionGens.push_back(

makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));

}

// Collect the reduction information.

reductionInfos.reserve(numReductions);

for (unsigned i = 0; i < numReductions; ++i) {

llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;

if (owningAtomicReductionGens[i])

atomicGen = owningAtomicReductionGens[i];

llvm::Value *variable =

moduleTranslation.lookupValue(loop.getReductionVars()[i]);

reductionInfos.push_back(

{moduleTranslation.convertType(reductionDecls[i].getType()), variable,

privateReductionVariables[i], owningReductionGens[i], atomicGen});

}

/// Generates reduction LLVM IR from info

template <typename T>

static LogicalResult genReductionFromInfo(

T loop, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder,

SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> &reductionInfos,

llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, llvm::omp::Directive D,

bool nowait = false) {

// The call to createReductions below expects the block to have a

// terminator. Create an unreachable instruction to serve as terminator

// and remove it later.

llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();

builder.SetInsertPoint(tempTerminator);

llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =

ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,

nowait);

if (!contInsertPoint.getBlock())

return loop->emitOpError() << "failed to convert reductions";

auto nextInsertionPoint = ompBuilder->createBarrier(contInsertPoint, D);

tempTerminator->eraseFromParent();

builder.restoreIP(nextInsertionPoint);

return success();

}

/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.

static LogicalResult

convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,

LLVM::ModuleTranslation &moduleTranslation) {

auto loop = cast<omp::WsLoopOp>(opInst);

// TODO: this should be in the op verifier instead.

if (loop.getLowerBound().empty())

return failure();

Show All 12 Lines

if (loop.getScheduleChunkVar()) {

chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);

}

SmallVector<omp::ReductionDeclareOp> reductionDecls;

collectReductionDecls(loop, reductionDecls);

llvm::OpenMPIRBuilder::InsertPointTy allocaIP =

findAllocaInsertPoint(builder, moduleTranslation);

// Allocate space for privatized reduction variables.

SmallVector<llvm::Value *> privateReductionVariables;

DenseMap<Value, llvm::Value *> reductionVariableMap;

unsigned numReductions = loop.getNumReductionVars();

allocReductionVars(loop, builder, moduleTranslation, allocaIP, reductionDecls,

privateReductionVariables.reserve(numReductions);

privateReductionVariables, reductionVariableMap);

if (numReductions != 0) {

llvm::IRBuilderBase::InsertPointGuard guard(builder);

builder.restoreIP(allocaIP);

for (unsigned i = 0; i < numReductions; ++i) {

llvm::Value *var = builder.CreateAlloca(

moduleTranslation.convertType(reductionDecls[i].getType()));

privateReductionVariables.push_back(var);

reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);

}

// Store the mapping between reduction variables and their private copies on

// ModuleTranslation stack. It can be then recovered when translating

// omp.reduce operations in a separate call.

LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(

moduleTranslation, reductionVariableMap);

// Before the loop, store the initial values of reductions into reduction

if (failed(initReductionVars(loop, builder, moduleTranslation, reductionDecls,

// variables. Although this could be done after allocas, we don't want to mess

privateReductionVariables))) {

// up with the alloca insertion point.

for (unsigned i = 0; i < numReductions; ++i) {

SmallVector<llvm::Value *> phis;

if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(),

"omp.reduction.neutral", builder,

moduleTranslation, &phis)))

return failure();

assert(phis.size() == 1 && "expected one value to be yielded from the "

"reduction neutral element declaration region");

builder.CreateStore(phis[0], privateReductionVariables[i]);

}

// Set up the source location value for OpenMP runtime.

llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

// Generator of the canonical loop body.

// TODO: support error propagation in OpenMPIRBuilder and use it instead of

// relying on captured variables.

▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines

convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,

// Continue building IR after the loop. Note that the LoopInfo returned by

// `collapseLoops` points inside the outermost loop and is intended for

// potential further loop transformations. Use the insertion point stored

// before collapsing loops instead.

builder.restoreIP(afterIP);

// Process the reductions if required.

if (numReductions == 0)

if (loop.getNumReductionVars() == 0)

return success();

// Create the reduction generators. We need to own them here because

// ReductionInfo only accepts references to the generators.

SmallVector<OwningReductionGen> owningReductionGens;

SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;

for (unsigned i = 0; i < numReductions; ++i) {

SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;

owningReductionGens.push_back(

collectReductionInfo(loop, builder, moduleTranslation, reductionDecls,

makeReductionGen(reductionDecls[i], builder, moduleTranslation));

owningReductionGens, owningAtomicReductionGens,

owningAtomicReductionGens.push_back(

privateReductionVariables, reductionInfos);

makeAtomicReductionGen(reductionDecls[i], builder, moduleTranslation));

return genReductionFromInfo(loop, builder, ompBuilder, reductionInfos,

allocaIP, llvm::omp::OMPD_for, loop.getNowait());

}

// Collect the reduction information.

/// Converts the OpenMP parallel operation to LLVM IR.

SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;

static LogicalResult

reductionInfos.reserve(numReductions);

convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,

for (unsigned i = 0; i < numReductions; ++i) {

LLVM::ModuleTranslation &moduleTranslation) {

llvm::OpenMPIRBuilder::AtomicReductionGenTy atomicGen = nullptr;

using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;

if (owningAtomicReductionGens[i])

// TODO: support error propagation in OpenMPIRBuilder and use it instead of

atomicGen = owningAtomicReductionGens[i];

// relying on captured variables.

llvm::Value *variable =

LogicalResult bodyGenStatus = success();

moduleTranslation.lookupValue(loop.getReductionVars()[i]);

reductionInfos.push_back(

auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {

{moduleTranslation.convertType(reductionDecls[i].getType()), variable,

// Save the alloca insertion point on ModuleTranslation stack for use in

privateReductionVariables[i], owningReductionGens[i], atomicGen});

// nested regions.

LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(

moduleTranslation, allocaIP);

// ParallelOp has only one region associated with it.

builder.restoreIP(codeGenIP);

convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,

moduleTranslation, bodyGenStatus);

};

// TODO: Perform appropriate actions according to the data-sharing

// attribute (shared, private, firstprivate, ...) of variables.

// Currently defaults to shared.

auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,

llvm::Value &, llvm::Value &vPtr,

llvm::Value *&replacementValue) -> InsertPointTy {

replacementValue = &vPtr;

return codeGenIP;

};

// TODO: Perform finalization actions for variables. This has to be

// called for variables which have destructors/finalizers.

auto finiCB = [&](InsertPointTy codeGenIP) {};

llvm::Value *ifCond = nullptr;

if (auto ifExprVar = opInst.getIfExprVar())

ifCond = moduleTranslation.lookupValue(ifExprVar);

llvm::Value *numThreads = nullptr;

if (auto numThreadsVar = opInst.getNumThreadsVar())

numThreads = moduleTranslation.lookupValue(numThreadsVar);

auto pbKind = llvm::omp::OMP_PROC_BIND_default;

if (auto bind = opInst.getProcBindVal())

pbKind = getProcBindKind(*bind);

// TODO: Is the Parallel construct cancellable?

bool isCancellable = false;

llvm::OpenMPIRBuilder::InsertPointTy allocaIP =

findAllocaInsertPoint(builder, moduleTranslation);

llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);

// Collect reduction declarations

SmallVector<omp::ReductionDeclareOp> reductionDecls;

collectReductionDecls(opInst, reductionDecls);

// Allocate reduction vars

SmallVector<llvm::Value *> privateReductionVariables;

DenseMap<Value, llvm::Value *> reductionVariableMap;

allocReductionVars(opInst, builder, moduleTranslation, allocaIP,

reductionDecls, privateReductionVariables,

reductionVariableMap);

// Store the mapping between reduction variables and their private copies on

// ModuleTranslation stack. It can be then recovered when translating

// omp.reduce operations in a separate call.

LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(

moduleTranslation, reductionVariableMap);

// Set reduction vars to initial values

if (failed(initReductionVars(opInst, builder, moduleTranslation,

reductionDecls, privateReductionVariables))) {

return failure();

}

// The call to createReductions below expects the block to have a

llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();

// terminator. Create an unreachable instruction to serve as terminator

builder.restoreIP(

// and remove it later.

ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,

llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();

ifCond, numThreads, pbKind, isCancellable));

builder.SetInsertPoint(tempTerminator);

llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =

ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,

loop.getNowait());

if (!contInsertPoint.getBlock())

return loop->emitOpError() << "failed to convert reductions";

auto nextInsertionPoint =

ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);

tempTerminator->eraseFromParent();

builder.restoreIP(nextInsertionPoint);

return success();

// Process the reductions if required.

if (opInst.getNumReductionVars() > 0) {

SmallVector<OwningReductionGen> owningReductionGens;

SmallVector<OwningAtomicReductionGen> owningAtomicReductionGens;

SmallVector<llvm::OpenMPIRBuilder::ReductionInfo> reductionInfos;

collectReductionInfo(opInst, builder, moduleTranslation, reductionDecls,

owningReductionGens, owningAtomicReductionGens,

privateReductionVariables, reductionInfos);

if (failed(genReductionFromInfo(opInst, builder, ompBuilder, reductionInfos,

allocaIP, llvm::omp::OMPD_parallel))) {

return failure();

}

return bodyGenStatus;

}

/// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder.

static LogicalResult

convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder,

LLVM::ModuleTranslation &moduleTranslation) {

auto loop = cast<omp::SimdLoopOp>(opInst);

▲ Show 20 Lines • Show All 320 Lines • ▼ Show 20 Lines

/// mapping between reduction variables and their private equivalents to have

/// been stored on the ModuleTranslation stack. Currently only supports

/// reduction within WsLoopOp, but can be easily extended.

static LogicalResult

convertOmpReductionOp(omp::ReductionOp reductionOp,

llvm::IRBuilderBase &builder,

LLVM::ModuleTranslation &moduleTranslation) {

// Find the declaration that corresponds to the reduction op.

auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>();

omp::ReductionDeclareOp declaration;

omp::ReductionDeclareOp declaration =

if (auto reductionContainer = reductionOp->getParentOfType<omp::WsLoopOp>()) {

findReductionDecl(reductionContainer, reductionOp);

declaration = findReductionDecl(reductionContainer, reductionOp);

} else if (auto reductionContainer =

reductionOp->getParentOfType<omp::ParallelOp>()) {

declaration = findReductionDecl(reductionContainer, reductionOp);

} else {

llvm_unreachable("Unhandled reduction container");

}

assert(declaration && "could not find reduction declaration");

// Retrieve the mapping between reduction variables and their private

// equivalents.

const DenseMap<Value, llvm::Value *> *reductionVariableMap = nullptr;

moduleTranslation.stackWalk<OpenMPVarMappingStackFrame>(

[&](const OpenMPVarMappingStackFrame &frame) {

reductionVariableMap = &frame.mapping;

▲ Show 20 Lines • Show All 530 Lines • Show Last 20 Lines

mlir/test/Target/LLVMIR/openmp-reduction.mlir

	Show First 20 Lines • Show All 410 Lines • ▼ Show 20 Lines
	// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]			// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]
	// CHECK: %[[UPDATED2:.+]] = fmul float %[[PARTIAL2]], 2.000000e+00			// CHECK: %[[UPDATED2:.+]] = fmul float %[[PARTIAL2]], 2.000000e+00
	// CHECK: store float %[[UPDATED2]], ptr %[[PRIVATE2]]			// CHECK: store float %[[UPDATED2]], ptr %[[PRIVATE2]]

	// Reduction function.			// Reduction function.
	// CHECK: define internal void @[[REDFUNC]]			// CHECK: define internal void @[[REDFUNC]]
	// CHECK: fadd float			// CHECK: fadd float
	// CHECK: fmul float			// CHECK: fmul float

				// -----

				omp.reduction.declare @add_f32 : f32
				init {
				^bb0(%arg: f32):
				%0 = llvm.mlir.constant(0.0 : f32) : f32
				omp.yield (%0 : f32)
				}
				combiner {
				^bb1(%arg0: f32, %arg1: f32):
				%1 = llvm.fadd %arg0, %arg1 : f32
				omp.yield (%1 : f32)
				}
				atomic {
				^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
				%2 = llvm.load %arg3 : !llvm.ptr -> f32
				llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
				omp.yield
				}

				// CHECK-LABEL: @simple_reduction_parallel
				llvm.func @simple_reduction_parallel() {
				%c1 = llvm.mlir.constant(1 : i32) : i32
				%0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
				omp.parallel reduction(@add_f32 -> %0 : !llvm.ptr) {
				%1 = llvm.mlir.constant(2.0 : f32) : f32
				omp.reduction %1, %0 : f32, !llvm.ptr
				omp.terminator
				}
				llvm.return
				}

				// Call to the outlined function.
				// CHECK: call void {{.*}} @__kmpc_fork_call
				// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]

				// Call to the reduction function.
				// CHECK: call i32 @__kmpc_reduce
				// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]

				// CHECK: [[FINALIZE:.+]]:
				// CHECK: call void @__kmpc_barrier

				// Non-atomic reduction:
				// CHECK: fadd float
				// CHECK: call void @__kmpc_end_reduce
				// CHECK: br label %[[FINALIZE]]

				// Atomic reduction.
				// CHECK: %[[PARTIAL:.+]] = load float, ptr
				// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]

				// Reduction function.
				// CHECK: define internal void @[[REDFUNC]]
				// CHECK: fadd float

This is an archive of the discontinued LLVM Phabricator instance.

[openmp][mlir] Parallel reduction LLVM IR generationClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 539861

mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

mlir/test/Target/LLVMIR/openmp-reduction.mlir

[openmp][mlir] Parallel reduction LLVM IR generation
ClosedPublic