diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -339,9 +339,50 @@ let verifier = [{ return ::verifyWsLoopOp(*this); }]; } +//===----------------------------------------------------------------------===// +// 2.9.3 SIMD Directive +//===----------------------------------------------------------------------===// + +def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments, + AllTypesMatch<["lowerBound", "upperBound", "step"]>]> { + let summary = "simd loop construct"; + let description = [{ + The simd construct can be applied to a loop to indicate that the loop can be + transformed into a SIMD loop (that is, multiple iterations of the loop can + be executed concurrently using SIMD instructions).. The lower and upper + bounds specify a half-open range: the range includes the lower bound but does + not include the upper bound. + + The body region can contain any number of blocks. The region is terminated + by "omp.yield" instruction without operands. + ``` + omp.simdloop (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { + // block operations + omp.yield + } + ``` + }]; + + // TODO: Add other clauses + let arguments = (ins Variadic:$lowerBound, + Variadic:$upperBound, + Variadic:$step); + + let regions = (region AnyRegion:$region); + + let extraClassDeclaration = [{ + /// Returns the number of loops in the simd loop nest. + unsigned getNumLoops() { return lowerBound().size(); } + + }]; + + let parser = [{ return parseSimdLoopOp(parser, result); }]; + let printer = [{ return printSimdLoopOp(p, *this); }]; +} + def YieldOp : OpenMP_Op<"yield", [NoSideEffect, ReturnLike, Terminator, - ParentOneOf<["WsLoopOp", "ReductionDeclareOp"]>]> { + ParentOneOf<["WsLoopOp", "ReductionDeclareOp", "SimdLoopOp"]>]> { let summary = "loop yield and termination operation"; let description = [{ "omp.yield" yields SSA values from the OpenMP dialect op region and diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1133,6 +1133,71 @@ p.printRegion(op.region(), /*printEntryBlockArgs=*/false); } +//===----------------------------------------------------------------------===// +// SimdLoopOp +//===----------------------------------------------------------------------===// +/// Parses an OpenMP Simd Loop operation +/// +/// simdloop ::= `omp.simdloop` loop-control clause-list +/// loop-control ::= `(` ssa-id-list `)` `:` type `=` loop-bounds +/// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` steps +/// steps := `step` `(`ssa-id-list`)` +/// clause-list ::= clause clause-list | empty +/// clause ::= TODO +static ParseResult parseSimdLoopOp(OpAsmParser &parser, + OperationState &result) { + + // Parse an opening `(` followed by induction variables followed by `)` + SmallVector ivs; + if (parser.parseRegionArgumentList(ivs, /*requiredOperandCount=*/-1, + OpAsmParser::Delimiter::Paren)) + return failure(); + int numIVs = static_cast(ivs.size()); + Type loopVarType; + if (parser.parseColonType(loopVarType)) + return failure(); + // Parse loop bounds. + SmallVector lower; + if (parser.parseEqual() || + parser.parseOperandList(lower, numIVs, OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(lower, loopVarType, result.operands)) + return failure(); + SmallVector upper; + if (parser.parseKeyword("to") || + parser.parseOperandList(upper, numIVs, OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(upper, loopVarType, result.operands)) + return failure(); + + // Parse step values. + SmallVector steps; + if (parser.parseKeyword("step") || + parser.parseOperandList(steps, numIVs, OpAsmParser::Delimiter::Paren) || + parser.resolveOperands(steps, loopVarType, result.operands)) + return failure(); + + SmallVector segments{numIVs, numIVs, numIVs}; + // TODO: Add parseClauses() when we support clauses + result.addAttribute("operand_segment_sizes", + parser.getBuilder().getI32VectorAttr(segments)); + + // Now parse the body. + Region *body = result.addRegion(); + SmallVector ivTypes(numIVs, loopVarType); + SmallVector blockArgs(ivs); + if (parser.parseRegion(*body, blockArgs, ivTypes)) + return failure(); + return success(); +} + +static void printSimdLoopOp(OpAsmPrinter &p, SimdLoopOp op) { + auto args = op.getRegion().front().getArguments(); + p << " (" << args << ") : " << args[0].getType() << " = (" << op.lowerBound() + << ") to (" << op.upperBound() << ") "; + p << "step (" << op.step() << ") "; + + p.printRegion(op.region(), /*printEntryBlockArgs=*/false); +} + //===----------------------------------------------------------------------===// // ReductionOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -891,6 +891,90 @@ return success(); } +/// Converts an OpenMP simd loop into LLVM IR using OpenMPIRBuilder. +static LogicalResult +convertOmpSimdLoop(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto loop = cast(opInst); + // TODO: this should be in the op verifier instead. + if (loop.lowerBound().empty()) + return failure(); + + llvm::DISubprogram *subprogram = + builder.GetInsertBlock()->getParent()->getSubprogram(); + const llvm::DILocation *diLoc = + moduleTranslation.translateLoc(opInst.getLoc(), subprogram); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder.saveIP(), + llvm::DebugLoc(diLoc)); + + // Generator of the canonical loop body. + // TODO: support error propagation in OpenMPIRBuilder and use it instead of + // relying on captured variables. + SmallVector loopInfos; + SmallVector bodyInsertPoints; + LogicalResult bodyGenStatus = success(); + auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { + // Make sure further conversions know about the induction variable. + moduleTranslation.mapValue( + loop.getRegion().front().getArgument(loopInfos.size()), iv); + + // Capture the body insertion point for use in nested loops. BodyIP of the + // CanonicalLoopInfo always points to the beginning of the entry block of + // the body. + bodyInsertPoints.push_back(ip); + + if (loopInfos.size() != loop.getNumLoops() - 1) + return; + + // Convert the body of the loop. + llvm::BasicBlock *entryBlock = ip.getBlock(); + llvm::BasicBlock *exitBlock = + entryBlock->splitBasicBlock(ip.getPoint(), "omp.simdloop.exit"); + convertOmpOpRegions(loop.region(), "omp.simdloop.region", *entryBlock, + *exitBlock, builder, moduleTranslation, bodyGenStatus); + }; + + // Delegate actual loop construction to the OpenMP IRBuilder. + // TODO: this currently assumes SimdLoop is semantically similar to SCF loop, + // i.e. it has a positive step, uses signed integer semantics. Reconsider + // this code when SimdLoop clearly supports more cases. + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + for (unsigned i = 0, e = loop.getNumLoops(); i < e; ++i) { + llvm::Value *lowerBound = + moduleTranslation.lookupValue(loop.lowerBound()[i]); + llvm::Value *upperBound = + moduleTranslation.lookupValue(loop.upperBound()[i]); + llvm::Value *step = moduleTranslation.lookupValue(loop.step()[i]); + + // Make sure loop trip count are emitted in the preheader of the outermost + // loop at the latest so that they are all available for the new collapsed + // loop will be created below. + llvm::OpenMPIRBuilder::LocationDescription loc = ompLoc; + llvm::OpenMPIRBuilder::InsertPointTy computeIP = ompLoc.IP; + if (i != 0) { + loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back(), + llvm::DebugLoc(diLoc)); + computeIP = loopInfos.front()->getPreheaderIP(); + } + loopInfos.push_back(ompBuilder->createCanonicalLoop( + loc, bodyGen, lowerBound, upperBound, step, + /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); + + if (failed(bodyGenStatus)) + return failure(); + } + + // Collapse loops. + llvm::IRBuilderBase::InsertPoint afterIP = loopInfos.front()->getAfterIP(); + llvm::CanonicalLoopInfo *loopInfo = + ompBuilder->collapseLoops(diLoc, loopInfos, {}); + + ompBuilder->applySimd(ompLoc.DL, loopInfo); + + builder.restoreIP(afterIP); + return success(); +} + // Convert an Atomic Ordering attribute to llvm::AtomicOrdering. llvm::AtomicOrdering convertAtomicOrdering(Optional ao) { @@ -1079,6 +1163,9 @@ .Case([&](omp::WsLoopOp) { return convertOmpWsLoop(*op, builder, moduleTranslation); }) + .Case([&](omp::SimdLoopOp) { + return convertOmpSimdLoop(*op, builder, moduleTranslation); + }) .Case([&](omp::AtomicReadOp) { return convertOmpAtomicRead(*op, builder, moduleTranslation); }) diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -217,6 +217,19 @@ // ----- +func @omp_simdloop(%lb : index, %ub : index, %step : i32) -> () { + // expected-error @below {{op failed to verify that all of {lowerBound, upperBound, step} have same type}} + "omp.simdloop" (%lb, %ub, %step) ({ + ^bb0(%iv: index): + omp.yield + }) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} : + (index, index, i32) -> () + + return +} + +// ----- + // expected-error @below {{op expects initializer region with one argument of the reduction type}} omp.reduction.declare @add_f32 : f64 init { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -327,6 +327,27 @@ return } +// CHECK-LABEL: omp_simdloop +func @omp_simdloop(%lb : index, %ub : index, %step : index) -> () { + // CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) + "omp.simdloop" (%lb, %ub, %step) ({ + ^bb0(%iv: index): + omp.yield + }) {operand_segment_sizes = dense<[1,1,1]> : vector<3xi32>} : + (index, index, index) -> () + + return +} + +// CHECK-LABEL: omp_simdloop_pretty +func @omp_simdloop_pretty(%lb : index, %ub : index, %step : index) -> () { + // CHECK: omp.simdloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) + omp.simdloop (%iv) : index = (%lb) to (%ub) step (%step) { + omp.yield + } + return +} + // CHECK-LABEL: omp_target func @omp_target(%if_cond : i1, %device : si32, %num_threads : si32) -> () {