diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -132,7 +132,10 @@ // TODO: remove this once emitAccessorPrefix is set to // kEmitAccessorPrefix_Prefixed for the dialect. /// Returns the reduction variables - operand_range getReductionVars() { return reduction_vars(); } + SmallVector getReductionVars() { + return SmallVector(reduction_vars().begin(), + reduction_vars().end()); + } }]; } @@ -237,7 +240,10 @@ // TODO: remove this once emitAccessorPrefix is set to // kEmitAccessorPrefix_Prefixed for the dialect. /// Returns the reduction variables - operand_range getReductionVars() { return reduction_vars(); } + SmallVector getReductionVars() { + return SmallVector(reduction_vars().begin(), + reduction_vars().end()); + } }]; } @@ -375,7 +381,10 @@ // TODO: remove this once emitAccessorPrefix is set to // kEmitAccessorPrefix_Prefixed for the dialect. /// Returns the reduction variables - operand_range getReductionVars() { return reduction_vars(); } + SmallVector getReductionVars() { + return SmallVector(reduction_vars().begin(), + reduction_vars().end()); + } }]; let hasCustomAssemblyFormat = 1; let assemblyFormat = [{ @@ -393,7 +402,7 @@ custom( $reduction_vars, type($reduction_vars), $reductions ) `)` - ) `for` custom($region, $lowerBound, $upperBound, $step, + ) `for` custom($region, $lowerBound, $upperBound, $step, type($step), $inclusive) attr-dict }]; let hasVerifier = 1; @@ -542,11 +551,172 @@ }]; let extraClassDeclaration = [{ /// Returns the reduction variables - operand_range getReductionVars() { return in_reduction_vars(); } + SmallVector getReductionVars() { + return SmallVector(in_reduction_vars().begin(), + in_reduction_vars().end()); + } }]; let hasVerifier = 1; } +def TaskLoopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments, + AutomaticAllocationScope, RecursiveSideEffects, + AllTypesMatch<["lowerBound", "upperBound", "step"]>, + ReductionClauseInterface]> { + let summary = "taskloop construct"; + let description = [{ + The taskloop construct specifies that the iterations of one or more + associated loops will be executed in parallel using explicit tasks. The + iterations are distributed across tasks generated by the construct and + scheduled to be executed. + + The `lowerBound` and `upperBound` specify a half-open range: the range + includes the lower bound but does not include the upper bound. If the + `inclusive` attribute is specified then the upper bound is also included. + The `step` specifies the loop step. + + The body region can contain any number of blocks. + + ``` + omp.taskloop + for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { + %a = load %arrA[%i1, %i2] : memref + %b = load %arrB[%i1, %i2] : memref + %sum = arith.addf %a, %b : f32 + store %sum, %arrC[%i1, %i2] : memref + omp.terminator + } + ``` + + For definitions of "undeferred task", "included task", "final task" and + "mergeable task", please check OpenMP Specification. + + When an `if` clause is present on a taskloop construct, and if the `if` + clause expression evaluates to `false`, undeferred tasks are generated. The + use of a variable in an `if` clause expression of a taskloop construct + causes an implicit reference to the variable in all enclosing constructs. + + When a `final` clause is present on a taskloop construct and the `final` + clause expression evaluates to `true`, the generated tasks will be final + tasks. The use of a variable in a `final` clause expression of a taskloop + construct causes an implicit reference to the variable in all enclosing + constructs. + + If the `untied` clause is specified, all tasks generated by the taskloop + construct are untied tasks. + + When the `mergeable` clause is present on a taskloop construct, each + generated task is a mergeable task. + + Reductions can be performed in a loop by specifying reduction accumulator + variables in `reduction_vars` or `in_reduction_vars` and symbols referring + to reduction declarations in the `reductions` or `in_reductions` attribute. + Each reduction is identified by the accumulator it uses and accumulators + must not be repeated in the same reduction. The `omp.reduction` operation + accepts the accumulator and a partial value which is considered to be + produced by the current loop iteration for the given reduction. If multiple + values are produced for the same accumulator, i.e. there are multiple + `omp.reduction`s, the last value is taken. The reduction declaration + specifies how to combine the values from each iteration into the final + value, which is available in the accumulator after the loop completes. + + If an `in_reduction` clause is present on the taskloop construct, the + behavior is as if each generated task was defined by a task construct on + which an `in_reduction` clause with the same reduction operator and list + items is present. Thus, the generated tasks are participants of a reduction + previously defined by a reduction scoping clause. + + If a `reduction` clause is present on the taskloop construct, the behavior + is as if a `task_reduction` clause with the same reduction operator and list + items was applied to the implicit taskgroup construct enclosing the taskloop + construct. The taskloop construct executes as if each generated task was + defined by a task construct on which an `in_reduction` clause with the same + reduction operator and list items is present. Thus, the generated tasks are + participants of the reduction defined by the `task_reduction` clause that + was applied to the implicit taskgroup construct. + + When a `priority` clause is present on a taskloop construct, the generated + tasks use the `priority-value` as if it was specified for each individual + task. If the `priority` clause is not specified, tasks generated by the + taskloop construct have the default task priority (zero). + + The `allocators_vars` and `allocate_vars` arguments are a variadic list of + values that specify the memory allocator to be used to obtain storage for + private values. + + If a `grainsize` clause is present on the taskloop construct, the number of + logical loop iterations assigned to each generated task is greater than or + equal to the minimum of the value of the grain-size expression and the + number of logical loop iterations, but less than two times the value of the + grain-size expression. + + If `num_tasks` is specified, the taskloop construct creates as many tasks as + the minimum of the num-tasks expression and the number of logical loop + iterations. Each task must have at least one logical loop iteration. + + By default, the taskloop construct executes as if it was enclosed in a + taskgroup construct with no statements or directives outside of the taskloop + construct. Thus, the taskloop construct creates an implicit taskgroup + region. If the `nogroup` clause is present, no implicit taskgroup region is + created. + }]; + + let arguments = (ins Variadic:$lowerBound, + Variadic:$upperBound, + Variadic:$step, + UnitAttr:$inclusive, + Optional:$if_expr, + Optional:$final_expr, + UnitAttr:$untied, + UnitAttr:$mergeable, + Variadic:$in_reduction_vars, + OptionalAttr:$in_reductions, + Variadic:$reduction_vars, + OptionalAttr:$reductions, + Optional:$priority, + Variadic:$allocate_vars, + Variadic:$allocators_vars, + Optional: $grain_size, + Optional: $num_tasks, + UnitAttr: $nogroup); + + let regions = (region AnyRegion:$region); + + let assemblyFormat = [{ + oilist(`if` `(` $if_expr `)` + |`final` `(` $final_expr `)` + |`untied` $untied + |`mergeable` $mergeable + |`in_reduction` `(` + custom( + $in_reduction_vars, type($in_reduction_vars), $in_reductions + ) `)` + |`reduction` `(` + custom( + $reduction_vars, type($reduction_vars), $reductions + ) `)` + |`priority` `(` $priority `:` type($priority) `)` + |`allocate` `(` + custom( + $allocate_vars, type($allocate_vars), + $allocators_vars, type($allocators_vars) + ) `)` + |`grain_size` `(` $grain_size `:` type($grain_size) `)` + |`num_tasks` `(` $num_tasks `:` type($num_tasks) `)` + |`nogroup` $nogroup + ) `for` custom($region, $lowerBound, $upperBound, $step, + type($step), $inclusive) attr-dict + }]; + + let extraClassDeclaration = [{ + /// Returns the reduction variables + SmallVector getReductionVars(); + void getEffects(SmallVectorImpl &effects); + }]; + + let hasVerifier = 1; +} + def TaskGroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments, ReductionClauseInterface, AutomaticAllocationScope]> { diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -40,7 +40,7 @@ let methods = [ InterfaceMethod< - "Get reduction vars", "::mlir::Operation::operand_range", + "Get reduction vars", "::mlir::SmallVector<::mlir::Value>", "getReductionVars">, ]; } diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -523,11 +523,11 @@ /// loop-bounds := `(` ssa-id-list `)` to `(` ssa-id-list `)` inclusive? steps /// steps := `step` `(`ssa-id-list`)` ParseResult -parseWsLoopControl(OpAsmParser &parser, Region ®ion, - SmallVectorImpl &lowerBound, - SmallVectorImpl &upperBound, - SmallVectorImpl &steps, - SmallVectorImpl &loopVarTypes, UnitAttr &inclusive) { +parseLoopControl(OpAsmParser &parser, Region ®ion, + SmallVectorImpl &lowerBound, + SmallVectorImpl &upperBound, + SmallVectorImpl &steps, + SmallVectorImpl &loopVarTypes, UnitAttr &inclusive) { // Parse an opening `(` followed by induction variables followed by `)` SmallVector ivs; Type loopVarType; @@ -557,10 +557,10 @@ return parser.parseRegion(region, ivs); } -void printWsLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion, - ValueRange lowerBound, ValueRange upperBound, - ValueRange steps, TypeRange loopVarTypes, - UnitAttr inclusive) { +void printLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion, + ValueRange lowerBound, ValueRange upperBound, + ValueRange steps, TypeRange loopVarTypes, + UnitAttr inclusive) { auto args = region.front().getArguments(); p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound << ") to (" << upperBound << ") "; @@ -736,6 +736,43 @@ task_reduction_vars()); } +//===----------------------------------------------------------------------===// +// TaskLoopOp +//===----------------------------------------------------------------------===// +SmallVector TaskLoopOp::getReductionVars() { + SmallVector all_reduction_nvars(in_reduction_vars().begin(), + in_reduction_vars().end()); + all_reduction_nvars.insert(all_reduction_nvars.end(), + reduction_vars().begin(), reduction_vars().end()); + return all_reduction_nvars; +} + +LogicalResult TaskLoopOp::verify() { + if (allocate_vars().size() != allocators_vars().size()) + return emitError( + "expected equal sizes for allocate and allocator variables"); + if (failed(verifyReductionVarList(*this, reductions(), reduction_vars())) || + failed( + verifyReductionVarList(*this, in_reductions(), in_reduction_vars()))) + return failure(); + + if (reduction_vars().size() > 0 && nogroup()) + return emitError("if a reduction clause is present on the taskloop " + "directive, the nogroup clause must not be specified"); + for (auto var : reduction_vars()) { + if (llvm::is_contained(in_reduction_vars(), var)) + return emitError("the same list item cannot appear in both a reduction " + "and an in_reduction clause"); + } + + if (grain_size() && num_tasks()) { + return emitError( + "the grainsize clause and num_tasks clause are mutually exclusive and " + "may not appear on the same taskloop directive"); + } + return success(); +} + //===----------------------------------------------------------------------===// // WsLoopOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -1294,3 +1294,128 @@ } return } + +// ----- + +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + %testmemref = "test.memref"() : () -> (memref) + // expected-error @below {{expected equal sizes for allocate and allocator variables}} + "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testmemref) ({ + ^bb0(%arg3: i32, %arg4: i32): + "omp.terminator"() : () -> () + }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, memref) -> () + return +} + +// ----- + +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + %testf32 = "test.f32"() : () -> (!llvm.ptr) + %testf32_2 = "test.f32"() : () -> (!llvm.ptr) + // expected-error @below {{expected as many reduction symbol references as reduction variables}} + "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({ + ^bb0(%arg3: i32, %arg4: i32): + "omp.terminator"() : () -> () + }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> () + return +} + +// ----- + +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + %testf32 = "test.f32"() : () -> (!llvm.ptr) + %testf32_2 = "test.f32"() : () -> (!llvm.ptr) + // expected-error @below {{expected as many reduction symbol references as reduction variables}} + "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32) ({ + ^bb0(%arg3: i32, %arg4: i32): + "omp.terminator"() : () -> () + }) {operand_segment_sizes = dense<[2, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0]> : vector<12xi32>, reductions = [@add_f32, @add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> () + return +} + +// ----- + +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + %testf32 = "test.f32"() : () -> (!llvm.ptr) + %testf32_2 = "test.f32"() : () -> (!llvm.ptr) + // expected-error @below {{expected as many reduction symbol references as reduction variables}} + "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({ + ^bb0(%arg3: i32, %arg4: i32): + "omp.terminator"() : () -> () + }) {in_reductions = [@add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> () + return +} + +// ----- + +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + %testf32 = "test.f32"() : () -> (!llvm.ptr) + %testf32_2 = "test.f32"() : () -> (!llvm.ptr) + // expected-error @below {{expected as many reduction symbol references as reduction variables}} + "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32_2) ({ + ^bb0(%arg3: i32, %arg4: i32): + "omp.terminator"() : () -> () + }) {in_reductions = [@add_f32, @add_f32], operand_segment_sizes = dense<[2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0]> : vector<12xi32>} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> () + return +} + +// ----- + +omp.reduction.declare @add_f32 : f32 +init { +^bb0(%arg: f32): + %0 = arith.constant 0.0 : f32 + omp.yield (%0 : f32) +} +combiner { +^bb1(%arg0: f32, %arg1: f32): + %1 = arith.addf %arg0, %arg1 : f32 + omp.yield (%1 : f32) +} + +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + %testf32 = "test.f32"() : () -> (!llvm.ptr) + %testf32_2 = "test.f32"() : () -> (!llvm.ptr) + // expected-error @below {{if a reduction clause is present on the taskloop directive, the nogroup clause must not be specified}} + omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) nogroup + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.terminator + } + return +} + +// ----- + +omp.reduction.declare @add_f32 : f32 +init { +^bb0(%arg: f32): + %0 = arith.constant 0.0 : f32 + omp.yield (%0 : f32) +} +combiner { +^bb1(%arg0: f32, %arg1: f32): + %1 = arith.addf %arg0, %arg1 : f32 + omp.yield (%1 : f32) +} + +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + %testf32 = "test.f32"() : () -> (!llvm.ptr) + // expected-error @below {{the same list item cannot appear in both a reduction and an in_reduction clause}} + omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr) in_reduction(@add_f32 -> %testf32 : !llvm.ptr) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.terminator + } + return +} + +// ----- + +func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { + %testi64 = "test.i64"() : () -> (i64) + // expected-error @below {{the grainsize clause and num_tasks clause are mutually exclusive and may not appear on the same taskloop directive}} + omp.taskloop grain_size(%testi64: i64) num_tasks(%testi64: i64) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + omp.terminator + } + return +} diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file %s | mlir-opt | FileCheck %s +// RUN: mlir-opt %s | mlir-opt | FileCheck %s func.func @omp_barrier() -> () { // CHECK: omp.barrier @@ -1394,8 +1394,6 @@ return } -// ----- - func.func @omp_threadprivate() { %0 = arith.constant 1 : i32 %1 = arith.constant 2 : i32 @@ -1528,3 +1526,141 @@ } return } + +// CHECK-LABEL: @omp_taskloop +func.func @omp_taskloop(%lb: i32, %ub: i32, %step: i32) -> () { + + // CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) { + omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop for (%{{.+}}) : i32 = (%{{.+}}) to (%{{.+}}) step (%{{.+}}) { + omp.taskloop for (%i) : i32 = (%lb) to (%ub) step (%step) { + // CHECK: test.op1 + "test.op1"(%lb) : (i32) -> () + // CHECK: test.op2 + "test.op2"() : () -> () + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) inclusive step (%{{.+}}, %{{.+}}) { + omp.taskloop for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) inclusive step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + %testbool = "test.bool"() : () -> (i1) + + // CHECK: omp.taskloop if(%{{[^)]+}}) + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop if(%testbool) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop final(%{{[^)]+}}) + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop final(%testbool) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop untied + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop untied + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop mergeable + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop mergeable + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + %testf32 = "test.f32"() : () -> (!llvm.ptr) + %testf32_2 = "test.f32"() : () -> (!llvm.ptr) + // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop reduction(@add_f32 -> %{{.+}} : !llvm.ptr, @add_f32 -> %{{.+}} : !llvm.ptr) + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop reduction(@add_f32 -> %testf32 : !llvm.ptr, @add_f32 -> %testf32_2 : !llvm.ptr) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop in_reduction(@add_f32 -> %{{.+}} : !llvm.ptr) reduction(@add_f32 -> %{{.+}} : !llvm.ptr) + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop in_reduction(@add_f32 -> %testf32 : !llvm.ptr) reduction(@add_f32 -> %testf32_2 : !llvm.ptr) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + %testi32 = "test.i32"() : () -> (i32) + // CHECK: omp.taskloop priority(%{{[^:]+}}: i32) + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop priority(%testi32: i32) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + %testmemref = "test.memref"() : () -> (memref) + // CHECK: omp.taskloop allocate(%{{.+}} : memref -> %{{.+}} : memref) + omp.taskloop allocate(%testmemref : memref -> %testmemref : memref) + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + %testi64 = "test.i64"() : () -> (i64) + // CHECK: omp.taskloop grain_size(%{{[^:]+}}: i64) + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop grain_size(%testi64: i64) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop num_tasks(%{{[^:]+}}: i64) + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop num_tasks(%testi64: i64) + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.taskloop nogroup + // CHECK-SAME: for (%{{.+}}, %{{.+}}) : i32 = (%{{.+}}, %{{.+}}) to (%{{.+}}, %{{.+}}) step (%{{.+}}, %{{.+}}) { + omp.taskloop nogroup + for (%i, %j) : i32 = (%lb, %ub) to (%ub, %lb) step (%step, %step) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: return + return +}