Index: llvm/include/llvm/Frontend/OpenMP/OMPConstants.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -78,8 +78,6 @@ enum class OMPScheduleType { StaticChunked = 33, Static = 34, // static unspecialized - DistributeChunked = 91, - Distribute = 92, DynamicChunked = 35, GuidedChunked = 36, // guided unspecialized Runtime = 37, @@ -89,6 +87,16 @@ GuidedSimd = 46, // guided with chunk adjustment RuntimeSimd = 47, // runtime with chunk adjustment + OrderedStaticChunked = 65, + OrderedStatic = 66, // ordered static unspecialized + OrderedDynamicChunked = 67, + OrderedGuidedChunked = 68, + OrderedRuntime = 69, + OrderedAuto = 70, // ordered auto + + DistributeChunked = 91, // distribute static chunked + Distribute = 92, // distribute static unspecialized + ModifierMonotonic = (1 << 29), // Set if the monotonic schedule modifier was present ModifierNonmonotonic = Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -403,13 +403,16 @@ /// the loop. /// \param Chunk The size of loop chunk considered as a unit when /// scheduling. If \p nullptr, defaults to 1. + /// \param Ordered Indicates whether the ordered clause is specified without + /// parameter. /// /// \returns Point where to insert code after the workshare construct. InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, omp::OMPScheduleType SchedType, bool NeedsBarrier, - Value *Chunk = nullptr); + Value *Chunk = nullptr, + bool Ordered = false); /// Modifies the canonical loop to be a workshare loop. /// Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1903,9 +1903,24 @@ llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } +/// Returns an LLVM function to call for finalizing the dynamic loop using +/// depending on `type`. Only i32 and i64 are supported by the runtime. Always +/// interpret integers as unsigned similarly to CanonicalLoopInfo. +static FunctionCallee +getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { + unsigned Bitwidth = Ty->getIntegerBitWidth(); + if (Bitwidth == 32) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u); + if (Bitwidth == 64) + return OMPBuilder.getOrCreateRuntimeFunction( + M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u); + llvm_unreachable("unknown OpenMP loop iterator bitwidth"); +} + OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) { + OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk, bool Ordered) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) && "Require dedicated allocate IP"); @@ -1946,6 +1961,7 @@ BasicBlock *Header = CLI->getHeader(); BasicBlock *Exit = CLI->getExit(); BasicBlock *Cond = CLI->getCond(); + BasicBlock *Latch = CLI->getLatch(); InsertPointTy AfterIP = CLI->getAfterIP(); // The CLI will be "broken" in the code below, as the loop is no longer @@ -2005,6 +2021,13 @@ assert(BI->getSuccessor(1) == Exit); BI->setSuccessor(1, OuterCond); + // Call the "fini" function if "ordered" is present in wsloop directive. + if (Ordered) { + Builder.SetInsertPoint(&Latch->back()); + FunctionCallee DynamicFini = getKmpcForDynamicFiniForType(IVTy, M, *this); + Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum}); + } + // Add the barrier if requested. if (NeedsBarrier) { Builder.SetInsertPoint(&Exit->back()); Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp =================================================================== --- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -2089,11 +2089,13 @@ InsertPointTy AfterIP = CLI->getAfterIP(); BasicBlock *Preheader = CLI->getPreheader(); BasicBlock *ExitBlock = CLI->getExit(); + BasicBlock *LatchBlock = CLI->getLatch(); Value *IV = CLI->getIndVar(); InsertPointTy EndIP = OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType, - /*NeedsBarrier=*/true, ChunkVal); + /*NeedsBarrier=*/true, ChunkVal, + /*Ordered*/ false); // The returned value should be the "after" point. ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock()); ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint()); @@ -2146,6 +2148,10 @@ EXPECT_EQ(OrigUpperBound->getValue(), 21); EXPECT_EQ(OrigStride->getValue(), 1); + CallInst *FiniCall = dyn_cast( + &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); + EXPECT_EQ(FiniCall, nullptr); + // The original loop iterator should only be used in the condition, in the // increment and in the statement that adds the lower bound to it. EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); @@ -2181,6 +2187,83 @@ omp::OMPScheduleType::Runtime | omp::OMPScheduleType::ModifierMonotonic)); +TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) { + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + IRBuilder<> Builder(BB); + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + omp::OMPScheduleType SchedType = omp::OMPScheduleType::OrderedStaticChunked; + uint32_t ChunkSize = 1; + Type *LCTy = Type::getInt32Ty(Ctx); + Value *StartVal = ConstantInt::get(LCTy, 10); + Value *StopVal = ConstantInt::get(LCTy, 52); + Value *StepVal = ConstantInt::get(LCTy, 2); + Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize); + auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; + + CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( + Loc, LoopBodyGen, StartVal, StopVal, StepVal, + /*IsSigned=*/false, /*InclusiveStop=*/false); + + Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); + InsertPointTy AllocaIP = Builder.saveIP(); + + // Collect all the info from CLI, as it isn't usable after the call to + // createDynamicWorkshareLoop. + InsertPointTy AfterIP = CLI->getAfterIP(); + BasicBlock *Preheader = CLI->getPreheader(); + BasicBlock *ExitBlock = CLI->getExit(); + BasicBlock *LatchBlock = CLI->getLatch(); + Value *IV = CLI->getIndVar(); + + InsertPointTy EndIP = + OMPBuilder.applyDynamicWorkshareLoop(DL, CLI, AllocaIP, SchedType, + /*NeedsBarrier=*/true, ChunkVal, + /*Ordered=*/true); + + CallInst *InitCall = nullptr; + for (Instruction &EI : *Preheader) { + Instruction *Cur = &EI; + if (isa(Cur)) { + InitCall = cast(Cur); + if (InitCall->getCalledFunction()->getName() == "__kmpc_dispatch_init_4u") + break; + InitCall = nullptr; + } + } + EXPECT_NE(InitCall, nullptr); + EXPECT_EQ(InitCall->arg_size(), 7U); + ConstantInt *SchedVal = cast(InitCall->getArgOperand(2)); + EXPECT_EQ(SchedVal->getValue(), static_cast(SchedType)); + + CallInst *FiniCall = dyn_cast( + &*(LatchBlock->getTerminator()->getPrevNonDebugInstruction(true))); + ASSERT_NE(FiniCall, nullptr); + EXPECT_EQ(FiniCall->getCalledFunction()->getName(), + "__kmpc_dispatch_fini_4u"); + EXPECT_EQ(FiniCall->arg_size(), 2U); + EXPECT_EQ(InitCall->getArgOperand(0), FiniCall->getArgOperand(0)); + EXPECT_EQ(InitCall->getArgOperand(1), FiniCall->getArgOperand(1)); + + // The original loop iterator should only be used in the condition, in the + // increment and in the statement that adds the lower bound to it. + EXPECT_EQ(std::distance(IV->use_begin(), IV->use_end()), 3); + + // The exit block should contain the barrier call, plus the call to obtain + // the thread ID. + size_t NumCallsInExitBlock = + count_if(*ExitBlock, [](Instruction &I) { return isa(I); }); + EXPECT_EQ(NumCallsInExitBlock, 2u); + + // Add a termination to our block and check that it is internally consistent. + Builder.restoreIP(EndIP); + Builder.CreateRetVoid(); + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); +} + TEST_F(OpenMPIRBuilderTest, MasterDirective) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); Index: mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td =================================================================== --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -280,9 +280,9 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, AllTypesMatch<["lowerBound", "upperBound", "step"]>, RecursiveSideEffects, ReductionClauseInterface]> { - let summary = "workshare loop construct"; + let summary = "worksharing-loop construct"; let description = [{ - The workshare loop construct specifies that the iterations of the loop(s) + The worksharing-loop construct specifies that the iterations of the loop(s) will be executed in parallel by threads in the current context. These iterations are spread across threads that already exist in the enclosing parallel region. The lower and upper bounds specify a half-open range: the @@ -332,7 +332,8 @@ implicit barrier at the end of the loop. The optional `ordered_val` attribute specifies how many loops are associated - with the do loop construct. + with the worksharing-loop construct. The value of zero refers to the ordered + clause specified without parameter. The optional `order` attribute specifies which order the iterations of the associate loops are executed in. Currently the only option for this Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -799,32 +799,63 @@ bool isSimd = loop.simd_modifier(); - if (schedule == omp::ClauseScheduleKind::Static) { + // The orderedVal refers to the value obtained from the ordered[(n)] clause. + // orderedVal == -1: No ordered[(n)] clause specified. + // orderedVal == 0: The ordered clause specified without a parameter. + // orderedVal > 0: The ordered clause specified with a parameter (n). + // TODO: Handle doacross loop init when orderedVal is greater than 0. + int64_t orderedVal = + loop.ordered_val().hasValue() ? loop.ordered_val().getValue() : -1; + if (schedule == omp::ClauseScheduleKind::Static && orderedVal != 0) { ompBuilder->applyWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, !loop.nowait(), llvm::omp::OMP_SCHEDULE_Static, chunk); } else { llvm::omp::OMPScheduleType schedType; switch (schedule) { + case omp::ClauseScheduleKind::Static: + if (loop.schedule_chunk_var()) + schedType = llvm::omp::OMPScheduleType::OrderedStaticChunked; + else + schedType = llvm::omp::OMPScheduleType::OrderedStatic; + break; case omp::ClauseScheduleKind::Dynamic: - schedType = llvm::omp::OMPScheduleType::DynamicChunked; + if (orderedVal == 0) + schedType = llvm::omp::OMPScheduleType::OrderedDynamicChunked; + else + schedType = llvm::omp::OMPScheduleType::DynamicChunked; break; case omp::ClauseScheduleKind::Guided: - if (isSimd) - schedType = llvm::omp::OMPScheduleType::GuidedSimd; - else - schedType = llvm::omp::OMPScheduleType::GuidedChunked; + if (orderedVal == 0) { + schedType = llvm::omp::OMPScheduleType::OrderedGuidedChunked; + } else { + if (isSimd) + schedType = llvm::omp::OMPScheduleType::GuidedSimd; + else + schedType = llvm::omp::OMPScheduleType::GuidedChunked; + } break; case omp::ClauseScheduleKind::Auto: - schedType = llvm::omp::OMPScheduleType::Auto; + if (orderedVal == 0) + schedType = llvm::omp::OMPScheduleType::OrderedAuto; + else + schedType = llvm::omp::OMPScheduleType::Auto; break; case omp::ClauseScheduleKind::Runtime: - if (isSimd) - schedType = llvm::omp::OMPScheduleType::RuntimeSimd; - else - schedType = llvm::omp::OMPScheduleType::Runtime; + if (orderedVal == 0) { + schedType = llvm::omp::OMPScheduleType::OrderedRuntime; + } else { + if (isSimd) + schedType = llvm::omp::OMPScheduleType::RuntimeSimd; + else + schedType = llvm::omp::OMPScheduleType::Runtime; + } break; default: + if (orderedVal == 0) { + schedType = llvm::omp::OMPScheduleType::OrderedStatic; + break; + } llvm_unreachable("Unknown schedule value"); break; } @@ -841,9 +872,21 @@ // Nothing to do here. break; } + } else { + // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description. + // If the static schedule kind is specified or if the ordered clause is + // specified, and if the nonmonotonic modifier is not specified, the + // effect is as if the monotonic modifier is specified. Otherwise, unless + // the monotonic modifier is specified, the effect is as if the + // nonmonotonic modifier is specified. + if (!(schedType == llvm::omp::OMPScheduleType::OrderedStatic || + schedType == llvm::omp::OMPScheduleType::OrderedStaticChunked)) + schedType |= llvm::omp::OMPScheduleType::ModifierNonmonotonic; } + ompBuilder->applyDynamicWorkshareLoop(ompLoc.DL, loopInfo, allocaIP, - schedType, !loop.nowait(), chunk); + schedType, !loop.nowait(), chunk, + /*ordered*/ orderedVal == 0); } // Continue building IR after the loop. Note that the LoopInfo returned by Index: mlir/test/Target/LLVMIR/openmp-llvm.mlir =================================================================== --- mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -496,7 +496,7 @@ %chunk_size_const = llvm.mlir.constant(2 : i16) : i16 omp.wsloop schedule(dynamic = %chunk_size_const : i16) for (%iv) : i64 = (%lb) to (%ub) step (%step) { - // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2) + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i64 {{.*}}, i64 %{{.*}}, i64 {{.*}}, i64 2) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -517,7 +517,7 @@ omp.wsloop schedule(dynamic = %chunk_size_var : i16) for (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: %[[CHUNK_SIZE:.*]] = sext i16 %{{.*}} to i32 - // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) + // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -538,7 +538,7 @@ omp.wsloop schedule(dynamic = %chunk_size_var : i64) for (%iv) : i32 = (%lb) to (%ub) step (%step) { // CHECK: %[[CHUNK_SIZE:.*]] = trunc i64 %{{.*}} to i32 - // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) + // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %[[CHUNK_SIZE]]) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -555,7 +555,7 @@ llvm.func @test_omp_wsloop_dynamic_chunk_var3(%lb : i32, %ub : i32, %step : i32, %chunk_size : i32) -> () { omp.wsloop schedule(dynamic = %chunk_size : i32) for (%iv) : i32 = (%lb) to (%ub) step (%step) { - // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 35, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}}) + // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741859, i32 {{.*}}, i32 %{{.*}}, i32 {{.*}}, i32 %{{.*}}) // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} @@ -650,6 +650,10 @@ llvm.return } +// ----- + +llvm.func @body(i64) + llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop schedule(runtime, simd) for (%iv) : i64 = (%lb) to (%ub) step (%step) { @@ -663,6 +667,10 @@ llvm.return } +// ----- + +llvm.func @body(i64) + llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () { omp.wsloop schedule(guided, simd) for (%iv) : i64 = (%lb) to (%ub) step (%step) { @@ -720,6 +728,169 @@ // ----- +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(static) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 66, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i32) + +llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i32) -> () { + %static_chunk_size = llvm.mlir.constant(1 : i32) : i32 + omp.wsloop schedule(static = %static_chunk_size : i32) ordered(0) + for (%iv) : i32 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 65, i32 1, i32 %{{.*}}, i32 1, i32 1) + // CHECK: call void @__kmpc_dispatch_fini_4u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i32) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(dynamic) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(auto) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741894, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(runtime) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741893, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(guided) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741892, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(dynamic, nonmonotonic) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 1073741891, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop schedule(dynamic, monotonic) ordered(0) + for (%iv) : i64 = (%lb) to (%ub) step (%step) { + // CHECK: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @{{.*}}, i32 %{{.*}}, i32 536870979, i64 1, i64 %{{.*}}, i64 1, i64 1) + // CHECK: call void @__kmpc_dispatch_fini_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +// ----- + omp.critical.declare @mutex hint(contended) // CHECK-LABEL: @omp_critical