diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -122,7 +122,9 @@ The workshare loop construct specifies that the iterations of the loop(s) will be executed in parallel by threads in the current context. These iterations are spread across threads that already exist in the enclosing - parallel region. + parallel region. The lower and upper bounds specify a half-open range: the + range includes the lower bound but does not include the upper bound. If the + `inclusive` attribute is specified then the upper bound is also included. The body region can contain any number of blocks. The region is terminated by "omp.yield" instruction without operands. @@ -174,9 +176,10 @@ OptionalAttr:$schedule_val, Optional:$schedule_chunk_var, Confined, [IntMinValue<0>]>:$collapse_val, - OptionalAttr:$nowait, + UnitAttr:$nowait, Confined, [IntMinValue<0>]>:$ordered_val, - OptionalAttr:$order_val); + OptionalAttr:$order_val, + UnitAttr:$inclusive); let builders = [ OpBuilderDAG<(ins "ValueRange":$lowerBound, "ValueRange":$upperBound, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -386,7 +386,8 @@ /*linear_vars=*/ValueRange(), /*linear_step_vars=*/ValueRange(), /*schedule_val=*/nullptr, /*schedule_chunk_var=*/nullptr, /*collapse_val=*/nullptr, - /*nowait=*/nullptr, /*ordered_val=*/nullptr, /*order_val=*/nullptr); + /*nowait=*/false, /*ordered_val=*/nullptr, /*order_val=*/nullptr, + /*inclusive=*/false); state.addAttributes(attributes); } diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -596,7 +596,7 @@ llvm::BasicBlock *insertBlock = builder.GetInsertBlock(); llvm::CanonicalLoopInfo *loopInfo = ompBuilder->createCanonicalLoop( ompLoc, bodyGen, lowerBound, upperBound, step, /*IsSigned=*/true, - /*InclusiveStop=*/false); + /*InclusiveStop=*/loop.inclusive()); if (failed(bodyGenStatus)) return failure(); @@ -606,9 +606,8 @@ // Put them at the start of the current block for now. llvm::OpenMPIRBuilder::InsertPointTy allocaIP( insertBlock, insertBlock->getFirstInsertionPt()); - loopInfo = ompBuilder->createStaticWorkshareLoop( - ompLoc, loopInfo, allocaIP, - !loop.nowait().hasValue() || loop.nowait().getValue(), chunk); + loopInfo = ompBuilder->createStaticWorkshareLoop(ompLoc, loopInfo, allocaIP, + !loop.nowait(), chunk); // Continue building IR after the loop. builder.restoreIP(loopInfo->getAfterIP()); diff --git a/mlir/test/Target/openmp-llvm.mlir b/mlir/test/Target/openmp-llvm.mlir --- a/mlir/test/Target/openmp-llvm.mlir +++ b/mlir/test/Target/openmp-llvm.mlir @@ -323,3 +323,35 @@ } llvm.return } + +// CHECK-LABEL: @wsloop_inclusive_1 +llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) { + %0 = llvm.mlir.constant(42 : index) : !llvm.i64 + %1 = llvm.mlir.constant(10 : index) : !llvm.i64 + %2 = llvm.mlir.constant(1 : index) : !llvm.i64 + // CHECK: store i64 31, i64* %{{.*}}upperbound + "omp.wsloop"(%1, %0, %2) ( { + ^bb0(%arg1: !llvm.i64): + %3 = llvm.mlir.constant(2.000000e+00 : f32) : !llvm.float + %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %3, %4 : !llvm.ptr + omp.yield + }) {operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0]> : vector<9xi32>} : (!llvm.i64, !llvm.i64, !llvm.i64) -> () + llvm.return +} + +// CHECK-LABEL: @wsloop_inclusive_2 +llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) { + %0 = llvm.mlir.constant(42 : index) : !llvm.i64 + %1 = llvm.mlir.constant(10 : index) : !llvm.i64 + %2 = llvm.mlir.constant(1 : index) : !llvm.i64 + // CHECK: store i64 32, i64* %{{.*}}upperbound + "omp.wsloop"(%1, %0, %2) ( { + ^bb0(%arg1: !llvm.i64): + %3 = llvm.mlir.constant(2.000000e+00 : f32) : !llvm.float + %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, !llvm.i64) -> !llvm.ptr + llvm.store %3, %4 : !llvm.ptr + omp.yield + }) {inclusive, operand_segment_sizes = dense<[1, 1, 1, 0, 0, 0, 0, 0, 0]> : vector<9xi32>} : (!llvm.i64, !llvm.i64, !llvm.i64) -> () + llvm.return +}