diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -113,8 +113,6 @@ enum class OMPScheduleType { StaticChunked = 33, Static = 34, // static unspecialized - DistributeChunked = 91, - Distribute = 92, DynamicChunked = 35, GuidedChunked = 36, // guided unspecialized Runtime = 37, @@ -124,6 +122,16 @@ GuidedSimd = 46, // guided with chunk adjustment RuntimeSimd = 47, // runtime with chunk adjustment + OrderedStaticChunked = 65, + OrderedStatic = 66, // ordered static unspecialized + OrderedDynamicChunked = 67, + OrderedGuidedChunked = 68, + OrderedRuntime = 69, + OrderedAuto = 70, // ordered auto + + DistributeChunked = 91, // distribute static chunked + Distribute = 92, // distribute static unspecialized + ModifierMonotonic = (1 << 29), // Set if the monotonic schedule modifier was present ModifierNonmonotonic = diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -399,13 +399,16 @@ /// the loop. /// \param Chunk The size of loop chunk considered as a unit when /// scheduling. If \p nullptr, defaults to 1. + /// \param Ordered Indicates whether the ordered clause is specified without + /// parameter. /// /// \returns Point where to insert code after the workshare construct. InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, omp::OMPScheduleType SchedType, bool NeedsBarrier, - Value *Chunk = nullptr); + Value *Chunk = nullptr, + bool Ordered = false); /// Modifies the canonical loop to be a workshare loop. /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1603,9 +1603,24 @@ llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } +/// Returns an LLVM function to call for finalizing the dynamic loop using +/// depending on `type`. Only i32 and i64 are supported by the runtime. Always +/// interpret integers as unsigned similarly to CanonicalLoopInfo. +static FunctionCallee +getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { + unsigned Bitwidth = Ty->getIntegerBitWidth(); + if (Bitwidth == 32) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u); + if (Bitwidth == 64) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u); + llvm_unreachable("unknown OpenMP loop iterator bitwidth"); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) { + OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk, bool Ordered) { assert(CLI->isValid() && "Requires a valid canonical loop"); // Set up the source location value for OpenMP runtime. @@ -1643,6 +1658,7 @@ BasicBlock *Header = CLI->getHeader(); BasicBlock *Exit = CLI->getExit(); BasicBlock *Cond = CLI->getCond(); + BasicBlock *Latch = CLI->getLatch(); InsertPointTy AfterIP = CLI->getAfterIP(); // The CLI will be "broken" in the code below, as the loop is no longer @@ -1702,6 +1718,13 @@ assert(BI->getSuccessor(1) == Exit); BI->setSuccessor(1, OuterCond); + // Call the "fini" function if "ordered" is present in wsloop directive. + if (Ordered) { + Builder.SetInsertPoint(&Latch->back()); + FunctionCallee DynamicFini = getKmpcForDynamicFiniForType(IVTy, M, *this); + Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum}); + } + // Add the barrier if requested. if (NeedsBarrier) { Builder.SetInsertPoint(&Exit->back()); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -1883,11 +1883,13 @@ InsertPointTy AfterIP = CLI->getAfterIP(); BasicBlock *Preheader = CLI->getPreheader(); BasicBlock *ExitBlock = CLI->getExit(); + BasicBlock *LatchBlock = CLI->getLatch(); Value *IV = CLI->getIndVar(); InsertPointTy EndIP = OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType, - /*NeedsBarrier=*/true, ChunkVal); + /*NeedsBarrier=*/true, ChunkVal, + /*Ordered*/ true); // The returned value should be the "after" point. ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock()); ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint()); @@ -1940,6 +1942,15 @@ EXPECT_EQ(OrigUpperBound->getValue(), 21); EXPECT_EQ(OrigStride->getValue(), 1); + CallInst *FiniCall = dyn_cast( + &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); + ASSERT_NE(FiniCall, nullptr); + EXPECT_EQ(FiniCall->getCalledFunction()->getName(), + "__kmpc_dispatch_fini_4u"); + EXPECT_EQ(FiniCall->arg_size(), 2U); + EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0)); + EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1)); + // The original loop iterator should only be used in the condition, in the // increment and in the statement that adds the lower bound to it. EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -218,9 +218,9 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, AllTypesMatch<["lowerBound", "upperBound", "step"]>]> { - let summary = "workshare loop construct"; + let summary = "worksharing-loop construct"; let description = [{ - The workshare loop construct specifies that the iterations of the loop(s) + The worksharing-loop construct specifies that the iterations of the loop(s) will be executed in parallel by threads in the current context. These iterations are spread across threads that already exist in the enclosing parallel region. The lower and upper bounds specify a half-open range: the @@ -271,7 +271,8 @@ implicit barrier at the end of the loop. The optional `ordered_val` attribute specifies how many loops are associated - with the do loop construct. + with the worksharing-loop construct. The value of zero refers to the ordered + clause specified without parameter. The optional `order` attribute specifies which order the iterations of the associate loops are executed in. Currently the only option for this diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -687,29 +687,48 @@ bool isSimd = loop.simd_modifier(); - if (schedule == omp::ClauseScheduleKind::Static) { - ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, - !loop.nowait(), chunk); - } else { + std::int64_t orderedVal = + loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1; + if (schedule != omp::ClauseScheduleKind::Static || orderedVal == 0) { llvm::omp::OMPScheduleType schedType; switch (schedule) { + case omp::ClauseScheduleKind::Static: + if (loop.schedule_chunk_var()) + schedType = llvm::omp::OMPScheduleType::OrderedStaticChunked; + else + schedType = llvm::omp::OMPScheduleType::OrderedStatic; + break; case omp::ClauseScheduleKind::Dynamic: - schedType = llvm::omp::OMPScheduleType::DynamicChunked; + if (orderedVal == 0) + schedType = llvm::omp::OMPScheduleType::OrderedDynamicChunked; + else + schedType = llvm::omp::OMPScheduleType::DynamicChunked; break; case omp::ClauseScheduleKind::Guided: - if (isSimd) - schedType = llvm::omp::OMPScheduleType::GuidedSimd; - else - schedType = llvm::omp::OMPScheduleType::GuidedChunked; + if (orderedVal == 0) { + schedType = llvm::omp::OMPScheduleType::OrderedGuidedChunked; + } else { + if (isSimd) + schedType = llvm::omp::OMPScheduleType::GuidedSimd; + else + schedType = llvm::omp::OMPScheduleType::GuidedChunked; + } break; case omp::ClauseScheduleKind::Auto: - schedType = llvm::omp::OMPScheduleType::Auto; + if (orderedVal == 0) + schedType = llvm::omp::OMPScheduleType::OrderedAuto; + else + schedType = llvm::omp::OMPScheduleType::Auto; break; case omp::ClauseScheduleKind::Runtime: - if (isSimd) - schedType = llvm::omp::OMPScheduleType::RuntimeSimd; - else - schedType = llvm::omp::OMPScheduleType::Runtime; + if (orderedVal == 0) { + schedType = llvm::omp::OMPScheduleType::OrderedRuntime; + } else { + if (isSimd) + schedType = llvm::omp::OMPScheduleType::RuntimeSimd; + else + schedType = llvm::omp::OMPScheduleType::Runtime; + } break; default: llvm_unreachable("Unknown schedule value"); @@ -730,9 +749,23 @@ // Nothing to do here. break; } + } else { + // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Desription. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if the nonmonotonic modifier is not specified, the + // effect is as if the monotonic modifier is specified. Otherwise, unless + // the monotonic modifier is specified, the effect is as if the + // nonmonotonic modifier is specified. + if (!(schedType == llvm::omp::OMPScheduleType::OrderedStatic || + schedType == llvm::omp::OMPScheduleType::OrderedStaticChunked)) + schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; } afterIP = ompBuilder->applyDynamicWorkshareLoop( - ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); + ompLoc.DL, loopInfo, allocaIP, schedType, !loop.nowait(), chunk, + /*ordered*/ orderedVal == 0); + } else { + ompBuilder->applyStaticWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, + !loop.nowait(), chunk); } // Continue building IR after the loop. Note that the LoopInfo returned by diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -427,7 +427,7 @@ llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic) { - // CHECK: call void @__kmpc_dispatch_init_8u + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -443,7 +443,7 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto) { - // CHECK: call void @__kmpc_dispatch_init_8u + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741862, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -459,7 +459,7 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime) { - // CHECK: call void @__kmpc_dispatch_init_8u + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741861, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -475,7 +475,7 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided) { - // CHECK: call void @__kmpc_dispatch_init_8u + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741860, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -491,7 +491,7 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, nonmonotonic) { - // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859 + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -507,7 +507,7 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, monotonic) { - // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 536870947 + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 536870947, i64 1, i64 %{{.*}}, i64 1, i64 1) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -517,6 +517,10 @@ llvm.return } +// ----- + +llvm.func @body(i64) + llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime, simd) { // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 47 @@ -529,6 +533,10 @@ llvm.return } +// ----- + +llvm.func @body(i64) + llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided, simd) { // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 46 @@ -543,6 +551,143 @@ // ----- +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(static) ordered(0) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i32) + +llvm.func @test_omp_wsloop_dynamic(%lb : i32, %ub : i32, %step : i32) -> () { + %static_chunk_size = llvm.mlir.constant(1 : i32) : i32 + omp.wsloop (%iv) : i32 = (%lb) to (%ub) step (%step) schedule(static = %static_chunk_size) ordered(0) { + // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 65, i32 1, i32 %{{.*}}, i32 1, i32 1) + // CHECK: call void @__kmpc_dispatch_fini_4u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i32) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic) ordered(0) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto) ordered(0) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741894, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime) ordered(0) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741893, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided) ordered(0) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741892, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, nonmonotonic) ordered(0) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic, monotonic) ordered(0) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 536870979, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + omp.critical.declare @mutex hint(contended) // CHECK-LABEL: @omp_critical