diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -111,10 +111,15 @@ /// Todo: Update kmp.h to include this file, and remove the enums in kmp.h /// To complete this, more enum values will need to be moved here. enum class OMPScheduleType { - Static = 34, /**< static unspecialized */ + Static = 34, //< static unspecialized DynamicChunked = 35, + GuidedChunked = 36, //< guided unspecialized + Runtime = 37, + Auto = 38, //< auto + ModifierNonmonotonic = (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */ + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierNonmonotonic) }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -387,6 +387,7 @@ /// \param CLI A descriptor of the canonical loop to workshare. /// \param AllocaIP An insertion point for Alloca instructions usable in the /// preheader of the loop. + /// \param SchedType Type of scheduling to be passed to the init function. /// \param NeedsBarrier Indicates whether a barrier must be insterted after /// the loop. /// \param Chunk The size of loop chunk considered as a unit when @@ -396,6 +397,7 @@ InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, + omp::OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk = nullptr); diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1384,7 +1384,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop( const LocationDescription &Loc, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) { + InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier, + Value *Chunk) { // Set up the source location value for OpenMP runtime. Builder.SetCurrentDebugLocation(Loc.DL); @@ -1431,7 +1432,7 @@ Value *ThreadNum = getOrCreateThreadID(SrcLoc); OMPScheduleType DynamicSchedType = - OMPScheduleType::DynamicChunked | OMPScheduleType::ModifierNonmonotonic; + SchedType | OMPScheduleType::ModifierNonmonotonic; Constant *SchedulingType = ConstantInt::get(I32Type, static_cast(DynamicSchedType)); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -149,6 +149,10 @@ DebugLoc DL; }; +class OpenMPIRBuilderTestWithParams + : public OpenMPIRBuilderTest, + public ::testing::WithParamInterface {}; + // Returns the value stored in the given allocation. Returns null if the given // value is not a result of an allocation, if no value is stored or if there is // more than one store. @@ -1708,18 +1712,34 @@ EXPECT_EQ(NumCallsInExitBlock, 3u); } -TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoop) { +TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); IRBuilder<> Builder(BB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + omp::OMPScheduleType SchedType = GetParam(); + uint32_t ChunkSize = 1; + switch (SchedType) { + case omp::OMPScheduleType::DynamicChunked: + case omp::OMPScheduleType::GuidedChunked: + ChunkSize = 7; + break; + case omp::OMPScheduleType::Auto: + case omp::OMPScheduleType::Runtime: + ChunkSize = 1; + break; + default: + assert(0 && "unknown type for this test"); + break; + } + Type *LCTy = Type::getInt32Ty(Ctx); Value *StartVal = ConstantInt::get(LCTy, 10); Value *StopVal = ConstantInt::get(LCTy, 52); Value *StepVal = ConstantInt::get(LCTy, 2); - Value *ChunkVal = ConstantInt::get(LCTy, 7); + Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize); auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( @@ -1737,7 +1757,7 @@ Value *IV = CLI->getIndVar(); InsertPointTy EndIP = - OMPBuilder.createDynamicWorkshareLoop(Loc, CLI, AllocaIP, + OMPBuilder.createDynamicWorkshareLoop(Loc, CLI, AllocaIP, SchedType, /*NeedsBarrier=*/true, ChunkVal); // The returned value should be the "after" point. ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock()); @@ -1775,7 +1795,7 @@ "__kmpc_dispatch_init_4u"); EXPECT_EQ(InitCall->getNumArgOperands(), 7U); EXPECT_EQ(InitCall->getArgOperand(6), - ConstantInt::get(Type::getInt32Ty(Ctx), 7)); + ConstantInt::get(Type::getInt32Ty(Ctx), ChunkSize)); ConstantInt *OrigLowerBound = dyn_cast(LowerBoundStore->getValueOperand()); @@ -1807,6 +1827,13 @@ EXPECT_FALSE(verifyModule(*M, &errs())); } +INSTANTIATE_TEST_CASE_P(OpenMPWSLoopSchedulingTypes, + OpenMPIRBuilderTestWithParams, + ::testing::Values(omp::OMPScheduleType::DynamicChunked, + omp::OMPScheduleType::GuidedChunked, + omp::OMPScheduleType::Auto, + omp::OMPScheduleType::Runtime)); + TEST_F(OpenMPIRBuilderTest, MasterDirective) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -216,17 +216,11 @@ if (loop.getNumLoops() != 1) return opInst.emitOpError("collapsed loops not yet supported"); - bool isStatic = true; - - if (loop.schedule_val().hasValue()) { - auto schedule = - omp::symbolizeClauseScheduleKind(loop.schedule_val().getValue()); - if (schedule != omp::ClauseScheduleKind::Static && - schedule != omp::ClauseScheduleKind::Dynamic) - return opInst.emitOpError("only static (default) and dynamic loop " - "schedule is currently supported"); - isStatic = (schedule == omp::ClauseScheduleKind::Static); - } + // Static is the default. + omp::ClauseScheduleKind schedule = omp::ClauseScheduleKind::Static; + if (loop.schedule_val().hasValue()) + schedule = + *omp::symbolizeClauseScheduleKind(loop.schedule_val().getValue()); // Find the loop configuration. llvm::Value *lowerBound = moduleTranslation.lookupValue(loop.lowerBound()[0]); @@ -281,13 +275,32 @@ findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::InsertPointTy afterIP; llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - if (isStatic) { + if (schedule == omp::ClauseScheduleKind::Static) { loopInfo = ompBuilder->createStaticWorkshareLoop(ompLoc, loopInfo, allocaIP, !loop.nowait(), chunk); afterIP = loopInfo->getAfterIP(); } else { - afterIP = ompBuilder->createDynamicWorkshareLoop(ompLoc, loopInfo, allocaIP, - !loop.nowait(), chunk); + llvm::omp::OMPScheduleType schedType; + switch (schedule) { + case omp::ClauseScheduleKind::Dynamic: + schedType = llvm::omp::OMPScheduleType::DynamicChunked; + break; + case omp::ClauseScheduleKind::Guided: + schedType = llvm::omp::OMPScheduleType::GuidedChunked; + break; + case omp::ClauseScheduleKind::Auto: + schedType = llvm::omp::OMPScheduleType::Auto; + break; + case omp::ClauseScheduleKind::Runtime: + schedType = llvm::omp::OMPScheduleType::Runtime; + break; + default: + llvm_unreachable("Unknown schedule value"); + break; + } + + afterIP = ompBuilder->createDynamicWorkshareLoop( + ompLoc, loopInfo, allocaIP, schedType, !loop.nowait(), chunk); } // Continue building IR after the loop. diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -416,3 +416,53 @@ }) {inclusive, operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0]> : vector<9xi32>} : (i64, i64, i64) -> () llvm.return } + +llvm.func @body(i64) + +llvm.func @test_omp_wsloop_dynamic(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(dynamic) { + // CHECK: call void @__kmpc_dispatch_init_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(auto) { + // CHECK: call void @__kmpc_dispatch_init_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(runtime) { + // CHECK: call void @__kmpc_dispatch_init_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +} + +llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { + omp.wsloop (%iv) : i64 = (%lb) to (%ub) step (%step) schedule(guided) { + // CHECK: call void @__kmpc_dispatch_init_8u + // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u + // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 + // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + llvm.call @body(%iv) : (i64) -> () + omp.yield + } + llvm.return +}