diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -120,6 +120,10 @@ Runtime = 37, Auto = 38, // auto + StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd) + GuidedSimd = 46, // guided with chunk adjustment + RuntimeSimd = 47, // runtime with chunk adjustment + ModifierMonotonic = (1 << 29), // Set if the monotonic schedule modifier was present ModifierNonmonotonic = diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -134,6 +134,20 @@ let assemblyFormat = "attr-dict"; } +def OMP_SCHEDULE_MOD_None : StrEnumAttrCase<"none", 0>; +def OMP_SCHEDULE_MOD_Monotonic : StrEnumAttrCase<"monotonic", 1>; +def OMP_SCHEDULE_MOD_Nonmonotonic : StrEnumAttrCase<"nonmonotonic", 2>; +def OMP_SCHEDULE_MOD_SIMD : StrEnumAttrCase<"simd", 3>; + +def ScheduleModifier : StrEnumAttr<"ScheduleModifier", "OpenMP Schedule Modifier", + [OMP_SCHEDULE_MOD_None, + OMP_SCHEDULE_MOD_Monotonic, + OMP_SCHEDULE_MOD_Nonmonotonic, + OMP_SCHEDULE_MOD_SIMD]> +{ + let cppNamespace = "::mlir::omp"; +} + //===----------------------------------------------------------------------===// // 2.9.2 Workshare Loop Construct //===----------------------------------------------------------------------===// @@ -213,6 +227,8 @@ "array of symbol references">>:$reductions, OptionalAttr:$schedule_val, Optional:$schedule_chunk_var, + OptionalAttr:$schedule_modifiers, + OptionalAttr:$simd_modifier, Confined, [IntMinValue<0>]>:$collapse_val, UnitAttr:$nowait, Confined, [IntMinValue<0>]>:$ordered_val, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -413,6 +413,7 @@ /// sched-wo-chunk ::= `auto` | `runtime` static ParseResult parseScheduleClause(OpAsmParser &parser, SmallString<8> &schedule, + SmallVectorImpl> &modifiers, Optional &chunkSize) { if (parser.parseLParen()) return failure(); @@ -436,6 +437,14 @@ return parser.emitError(parser.getNameLoc()) << " expected schedule kind"; } + // If there is a comma, we have one or more modifiers.. + while (succeeded(parser.parseOptionalComma())) { + StringRef mod; + if (parser.parseKeyword(&mod)) + return failure(); + modifiers.push_back(mod); + } + if (parser.parseRParen()) return failure(); @@ -531,6 +540,7 @@ SmallVector reductionVars; SmallVector reductionVarTypes; SmallString<8> schedule; + SmallVector> modifiers; Optional scheduleChunkSize; const StringRef opName = result.name.getStringRef(); @@ -582,7 +592,7 @@ } else if (keyword == "schedule") { if (!schedule.empty()) return allowedOnce(parser, "schedule", opName); - if (parseScheduleClause(parser, schedule, scheduleChunkSize)) + if (parseScheduleClause(parser, schedule, modifiers, scheduleChunkSize)) return failure(); if (scheduleChunkSize) { segments[scheduleClausePos] = 1; @@ -669,6 +679,14 @@ schedule[0] = llvm::toUpper(schedule[0]); auto attr = parser.getBuilder().getStringAttr(schedule); result.addAttribute("schedule_val", attr); + if (modifiers.size() > 0) { + auto mod = parser.getBuilder().getStringAttr(modifiers[0]); + result.addAttribute("schedule_modifiers", mod); + if (modifiers.size() > 1) { + mod = parser.getBuilder().getStringAttr(modifiers[1]); + result.addAttribute("simd_modifier", mod); + } + } if (scheduleChunkSize) { auto chunkSizeType = parser.getBuilder().getI32Type(); parser.resolveOperand(*scheduleChunkSize, chunkSizeType, result.operands); @@ -727,6 +745,13 @@ if (auto chunk = op.schedule_chunk_var()) { p << " = " << chunk; } + if (auto modifier = op.schedule_modifiers()) { + p << ", " << modifier; + } + auto simd = op.simd_modifier(); + if (simd.hasValue() && *simd != "none") { + p << ", " << simd; + } p << ")"; } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -612,6 +612,15 @@ ompBuilder->collapseLoops(diLoc, loopInfos, {}); allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + llvm::OpenMPIRBuilder::InsertPointTy afterIP; + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + bool isSimd = false; + if (auto simd = loop.simd_modifier()) { + omp::ScheduleModifier modifier = *omp::symbolizeScheduleModifier(*simd); + isSimd = (modifier == omp::ScheduleModifier::simd); + } + if (schedule == omp::ClauseScheduleKind::Static) { ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, !loop.nowait(), chunk); @@ -622,21 +631,43 @@ schedType = llvm::omp::OMPScheduleType::DynamicChunked; break; case omp::ClauseScheduleKind::Guided: - schedType = llvm::omp::OMPScheduleType::GuidedChunked; + if (isSimd) + schedType = llvm::omp::OMPScheduleType::GuidedSimd; + else + schedType = llvm::omp::OMPScheduleType::GuidedChunked; break; case omp::ClauseScheduleKind::Auto: schedType = llvm::omp::OMPScheduleType::Auto; break; case omp::ClauseScheduleKind::Runtime: - schedType = llvm::omp::OMPScheduleType::Runtime; + if (isSimd) + schedType = llvm::omp::OMPScheduleType::RuntimeSimd; + else + schedType = llvm::omp::OMPScheduleType::Runtime; break; default: llvm_unreachable("Unknown schedule value"); break; } - ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, - schedType, !loop.nowait(), chunk); + if (loop.schedule_modifiers().hasValue()) { + omp::ScheduleModifier modifier = + *omp::symbolizeScheduleModifier( + loop.schedule_modifiers().getValue()); + switch (modifier) { + case omp::ScheduleModifier::monotonic: + schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic; + break; + case omp::ScheduleModifier::nonmonotonic: + schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; + break; + default: + // Nothing to do here. + break; + } + } + afterIP = ompBuilder->applyDynamicWorkshareLoop( + ompLoc, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); } // Continue building IR after the loop. Note that the LoopInfo returned by diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -176,15 +176,29 @@ omp.yield } - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref) schedule(static) - omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) schedule(static) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) { + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref) schedule(static, none) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) schedule(static, none) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) { + omp.yield + } + + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(static = %{{.*}}, none) collapse(3) ordered(2) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) + firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) + schedule(static = %chunk_var, none) collapse(3) { + omp.yield + } + + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, nonmonotonic) collapse(3) ordered(2) + omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) + firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) + schedule(dynamic = %chunk_var, nonmonotonic) collapse(3) { omp.yield } - // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(static = %{{.*}}) collapse(3) ordered(2) + // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}, monotonic) collapse(3) ordered(2) omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) ordered(2) private(%data_var : memref) firstprivate(%data_var : memref) lastprivate(%data_var : memref) linear(%data_var = %linear_var : memref) - schedule(static = %chunk_var) collapse(3) { + schedule(dynamic = %chunk_var, monotonic) collapse(3) { omp.yield } diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -420,7 +420,7 @@ llvm.func @body(i64) llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -432,7 +432,7 @@ } llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -444,7 +444,7 @@ } llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -456,7 +456,7 @@ } llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { - omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided) { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided, none) { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 @@ -497,6 +497,32 @@ // ----- +llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, nonmonotonic) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859 + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, monotonic) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 536870947 + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + // Check that the loop bounds are emitted in the correct location in case of // collapse. This only checks the overall shape of the IR, detailed checking // is done by the OpenMPIRBuilder.