diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -23,6 +23,7 @@ #include "mlir/Dialect/OpenACC/OpenACCOpsEnums.h.inc" #include "mlir/Dialect/OpenACC/OpenACCTypeInterfaces.h.inc" #include "mlir/Interfaces/ControlFlowInterfaces.h" +#include "mlir/Interfaces/LoopLikeInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #define GET_TYPEDEF_CLASSES diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -14,6 +14,7 @@ #define OPENACC_OPS include "mlir/Interfaces/ControlFlowInterfaces.td" +include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/IR/BuiltinTypes.td" include "mlir/IR/EnumAttr.td" @@ -1111,45 +1112,52 @@ //===----------------------------------------------------------------------===// def OpenACC_LoopOp : OpenACC_Op<"loop", - [AttrSizedOperandSegments, RecursiveMemoryEffects]> { + [AttrSizedOperandSegments, RecursiveMemoryEffects, + DeclareOpInterfaceMethods]> { let summary = "loop construct"; let description = [{ - The "acc.loop" operation represents the OpenACC loop construct. + The "acc.loop" operation represents the OpenACC loop construct. The lower + and upper bounds specify a half-open range: the range includes the lower + bound but does not include the upper bound. If the `inclusive` attribute is + set then the upper bound is included. Example: ```mlir - acc.loop gang vector { - scf.for %arg3 = %c0 to %c10 step %c1 { - scf.for %arg4 = %c0 to %c10 step %c1 { - scf.for %arg5 = %c0 to %c10 step %c1 { - // ... body - } - } - } + acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = + (%c0, %c0, %c0 : index, index, index) to + (%c10, %c10, %c10 : index, index, index) step + (%c1, %c1, %c1 : index, index, index) { + // Loop body acc.yield } attributes { collapse = 3 } ``` }]; - let arguments = (ins OptionalAttr:$collapse, - Optional:$gangNum, - Optional:$gangDim, - Optional:$gangStatic, - Optional:$workerNum, - Optional:$vectorLength, - UnitAttr:$seq, - UnitAttr:$independent, - UnitAttr:$auto_, - UnitAttr:$hasGang, - UnitAttr:$hasWorker, - UnitAttr:$hasVector, - Variadic:$tileOperands, - Variadic:$privateOperands, - OptionalAttr:$privatizations, - Variadic:$reductionOperands, - OptionalAttr:$reductionRecipes); + let arguments = (ins + Variadic:$lowerbound, + Variadic:$upperbound, + Variadic:$step, + DenseBoolArrayAttr:$inclusiveUpperbound, + OptionalAttr:$collapse, + Optional:$gangNum, + Optional:$gangDim, + Optional:$gangStatic, + Optional:$workerNum, + Optional:$vectorLength, + UnitAttr:$seq, + UnitAttr:$independent, + UnitAttr:$auto_, + UnitAttr:$hasGang, + UnitAttr:$hasWorker, + UnitAttr:$hasVector, + Variadic:$tileOperands, + Variadic:$privateOperands, + OptionalAttr:$privatizations, + Variadic:$reductionOperands, + OptionalAttr:$reductionRecipes + ); let results = (outs Variadic:$results); @@ -1165,9 +1173,12 @@ let hasCustomAssemblyFormat = 1; let assemblyFormat = [{ oilist( - `gang` `` custom($gangNum, type($gangNum), $gangDim, type($gangDim), $gangStatic, type($gangStatic), $hasGang) - | `worker` `` custom($workerNum, type($workerNum), $hasWorker) - | `vector` `` custom($vectorLength, type($vectorLength), $hasVector) + `gang` `` custom($gangNum, type($gangNum), $gangDim, + type($gangDim), $gangStatic, type($gangStatic), $hasGang) + | `worker` `` + custom($workerNum, type($workerNum), $hasWorker) + | `vector` `` + custom($vectorLength, type($vectorLength), $hasVector) | `private` `(` custom( $privateOperands, type($privateOperands), $privatizations) `)` @@ -1176,7 +1187,8 @@ $reductionOperands, type($reductionOperands), $reductionRecipes) `)` ) - $region + custom($region, $lowerbound, type($lowerbound), $upperbound, + type($upperbound), $step, type($step)) ( `(` type($results)^ `)` )? attr-dict-with-keyword }]; diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -712,6 +712,9 @@ // optional gang operands if (succeeded(parser.parseOptionalLParen())) { + // Allow empty parenthesis. + if (succeeded(parser.parseOptionalRParen())) + return success(); while (true) { bool newValue = false; bool needValue = false; @@ -758,8 +761,8 @@ void printGangClause(OpAsmPrinter &p, Operation *op, Value gangNum, Type gangNumType, Value gangDim, Type gangDimType, Value gangStatic, Type gangStaticType, UnitAttr hasGang) { + p << "("; if (gangNum || gangStatic || gangDim) { - p << "("; if (gangNum) { p << LoopOp::getGangNumKeyword() << "=" << gangNum << " : " << gangNumType; @@ -775,8 +778,8 @@ if (gangStatic) p << LoopOp::getGangStaticKeyword() << "=" << gangStatic << " : " << gangStaticType; - p << ")"; } + p << ")"; } static ParseResult @@ -785,6 +788,9 @@ Type &workerNumType, UnitAttr &hasWorker) { hasWorker = UnitAttr::get(parser.getBuilder().getContext()); if (succeeded(parser.parseOptionalLParen())) { + // Allow empty parenthesis. + if (succeeded(parser.parseOptionalRParen())) + return success(); workerNum = OpAsmParser::UnresolvedOperand{}; if (parser.parseOperand(*workerNum) || parser.parseColonType(workerNumType) || parser.parseRParen()) @@ -795,8 +801,10 @@ void printWorkerClause(OpAsmPrinter &p, Operation *op, Value workerNum, Type workerNumType, UnitAttr hasWorker) { + p << "("; if (workerNum) - p << "(" << workerNum << " : " << workerNumType << ")"; + p << workerNum << " : " << workerNumType; + p << ")"; } static ParseResult @@ -805,6 +813,9 @@ Type &vectorLengthType, UnitAttr &hasVector) { hasVector = UnitAttr::get(parser.getBuilder().getContext()); if (succeeded(parser.parseOptionalLParen())) { + // Allow empty parenthesis. + if (succeeded(parser.parseOptionalRParen())) + return success(); vectorLength = OpAsmParser::UnresolvedOperand{}; if (parser.parseOperand(*vectorLength) || parser.parseColonType(vectorLengthType) || parser.parseRParen()) @@ -815,11 +826,17 @@ void printVectorClause(OpAsmPrinter &p, Operation *op, Value vectorLength, Type vectorLengthType, UnitAttr hasVector) { + p << "("; if (vectorLength) - p << "(" << vectorLength << " : " << vectorLengthType << ")"; + p << vectorLength << " : " << vectorLengthType; + p << ")"; } LogicalResult acc::LoopOp::verify() { + if (getUpperbound().size() != getInclusiveUpperbound().size()) + return emitError() << "inclusiveUpperbound size is expected to be the same" + << " as upperbound size"; + // auto, independent and seq attribute are mutually exclusive. if ((getAuto_() && (getIndependent() || getSeq())) || (getIndependent() && getSeq())) { @@ -850,6 +867,52 @@ return success(); } +Region &acc::LoopOp::getLoopBody() { return getRegion(); } + +/// loop-control ::= `(` ssa-id-and-type-list `)` `=` `(` ssa-id-and-type-list +/// `)` `to` `(` ssa-id-and-type-list `)` `step` `(` ssa-id-and-type-list `)` +ParseResult +parseLoopControl(OpAsmParser &parser, Region ®ion, + SmallVectorImpl &lowerbound, + SmallVectorImpl &lowerboundType, + SmallVectorImpl &upperbound, + SmallVectorImpl &upperboundType, + SmallVectorImpl &step, + SmallVectorImpl &stepType) { + + SmallVector inductionVars; + if (parser.parseArgumentList(inductionVars, OpAsmParser::Delimiter::Paren, + /*allowType=*/true) || + parser.parseEqual() || parser.parseLParen() || + parser.parseOperandList(lowerbound, inductionVars.size(), + OpAsmParser::Delimiter::None) || + parser.parseColonTypeList(lowerboundType) || parser.parseRParen() || + parser.parseKeyword("to") || parser.parseLParen() || + parser.parseOperandList(upperbound, inductionVars.size(), + OpAsmParser::Delimiter::None) || + parser.parseColonTypeList(upperboundType) || parser.parseRParen() || + parser.parseKeyword("step") || parser.parseLParen() || + parser.parseOperandList(step, inductionVars.size(), + OpAsmParser::Delimiter::None) || + parser.parseColonTypeList(stepType) || parser.parseRParen()) + return failure(); + return parser.parseRegion(region, inductionVars); +} + +void printLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion, + ValueRange lowerbound, TypeRange lowerboundType, + ValueRange upperbound, TypeRange upperboundType, + ValueRange steps, TypeRange stepType) { + ValueRange regionArgs = region.front().getArguments(); + p << "("; + llvm::interleaveComma(regionArgs, p, + [&p](Value v) { p << v << " : " << v.getType(); }); + p << ") = (" << lowerbound << " : " << lowerboundType << ") to (" + << upperbound << " : " << upperboundType << ") " + << " step (" << steps << " : " << stepType << ") "; + p.printRegion(region, /*printEntryBlockArgs=*/false); +} + //===----------------------------------------------------------------------===// // DataOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/OpenACC/canonicalize.mlir b/mlir/test/Dialect/OpenACC/canonicalize.mlir --- a/mlir/test/Dialect/OpenACC/canonicalize.mlir +++ b/mlir/test/Dialect/OpenACC/canonicalize.mlir @@ -110,14 +110,16 @@ func.func @testhostdataop(%a: memref, %ifCond: i1) -> () { %0 = acc.use_device varPtr(%a : memref) -> memref + %1 = arith.constant 1 : i32 + %2 = arith.constant 10 : i32 %false = arith.constant false acc.host_data dataOperands(%0 : memref) if(%false) { - acc.loop { + acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { acc.yield - } - acc.loop { + } attributes { inclusiveUpperbound = array } + acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { acc.yield - } + } attributes { inclusiveUpperbound = array } acc.terminator } return diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -1,71 +1,81 @@ // RUN: mlir-opt -split-input-file -verify-diagnostics %s +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} -acc.loop gang { +acc.loop gang() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() : () -> () acc.yield -} attributes {seq} +} attributes {seq, inclusiveUpperbound = array} // ----- +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} -acc.loop worker { +acc.loop worker() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() : () -> () acc.yield -} attributes {seq} +} attributes {seq, inclusiveUpperbound = array} // ----- +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} -acc.loop vector { +acc.loop vector() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() : () -> () acc.yield -} attributes {seq} +} attributes {seq, inclusiveUpperbound = array} // ----- +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} -acc.loop gang worker { +acc.loop gang() worker() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() : () -> () acc.yield -} attributes {seq} +} attributes {seq, inclusiveUpperbound = array} // ----- +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} -acc.loop gang vector { +acc.loop gang() vector() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() : () -> () acc.yield -} attributes {seq} +} attributes {seq, inclusiveUpperbound = array} // ----- +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} -acc.loop worker vector { +acc.loop worker() vector() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() : () -> () acc.yield -} attributes {seq} +} attributes {seq, inclusiveUpperbound = array} // ----- +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{gang, worker or vector cannot appear with the seq attr}} -acc.loop gang worker vector { +acc.loop gang() worker() vector() (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() : () -> () acc.yield -} attributes {seq} - -// ----- - -// expected-error@+1 {{expected non-empty body.}} -acc.loop { -} +} attributes {seq, inclusiveUpperbound = array} // ----- +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{only one of "auto", "independent", "seq" can be present at the same time}} -acc.loop { +acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { acc.yield -} attributes {auto_, seq} +} attributes {auto_, seq, inclusiveUpperbound = array} // ----- @@ -133,11 +143,13 @@ // ----- -acc.loop { +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 +acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32){ // expected-error@+1 {{'acc.init' op cannot be nested in a compute operation}} acc.init acc.yield -} +} attributes {inclusiveUpperbound = array} // ----- @@ -149,21 +161,25 @@ // ----- -acc.loop { +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 +acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { // expected-error@+1 {{'acc.shutdown' op cannot be nested in a compute operation}} acc.shutdown acc.yield -} +} attributes {inclusiveUpperbound = array} // ----- -acc.loop { +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 +acc.loop (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() ({ // expected-error@+1 {{'acc.shutdown' op cannot be nested in a compute operation}} acc.shutdown }) : () -> () acc.yield -} +} attributes {inclusiveUpperbound = array} // ----- @@ -395,8 +411,10 @@ // ----- +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{expected ')'}} -acc.loop gang(static=%i64Value: i64, num=%i64Value: i64 { +acc.loop gang(static=%i64Value: i64, num=%i64Value: i64 (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() : () -> () acc.yield } @@ -464,8 +482,10 @@ // ----- +%1 = arith.constant 1 : i32 +%2 = arith.constant 10 : i32 // expected-error@+1 {{new value expected after comma}} -acc.loop gang(static=%i64Value: i64, ) { +acc.loop gang(static=%i64Value: i64, ) (%iv : i32) = (%1 : i32) to (%2 : i32) step (%1 : i32) { "test.openacc_dummy_op"() : () -> () acc.yield } @@ -481,14 +501,6 @@ // ----- -// expected-error@+1 {{expect at least one of num, dim or static values}} -acc.loop gang() { - "test.openacc_dummy_op"() : () -> () - acc.yield -} - -// ----- - %i64value = arith.constant 1 : i64 // expected-error@+1 {{num_gangs expects a maximum of 3 values}} acc.parallel num_gangs(%i64value, %i64value, %i64value, %i64value : i64, i64, i64, i64) { diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -11,52 +11,40 @@ %async = arith.constant 1 : i64 acc.parallel async(%async: i64) { - acc.loop gang vector { - scf.for %arg3 = %c0 to %c10 step %c1 { - scf.for %arg4 = %c0 to %c10 step %c1 { - scf.for %arg5 = %c0 to %c10 step %c1 { - %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32> - %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32> - %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32> - %p = arith.mulf %a, %b : f32 - %co = arith.addf %cij, %p : f32 - memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32> - } - } - } + acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) = (%c0, %c0, %c0 : index, index, index) to (%c10, %c10, %c10 : index, index, index) step (%c1, %c1, %c1 : index, index, index) { + %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32> + %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32> + %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32> + %p = arith.mulf %a, %b : f32 + %co = arith.addf %cij, %p : f32 + memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32> acc.yield - } attributes { collapse = 3 } + } attributes { collapse = 3, inclusiveUpperbound = array } acc.yield } return %C : memref<10x10xf32> } -// CHECK-LABEL: func @compute1( +// CHECK-LABEL: func @compute1 // CHECK-NEXT: %{{.*}} = arith.constant 0 : index // CHECK-NEXT: %{{.*}} = arith.constant 10 : index // CHECK-NEXT: %{{.*}} = arith.constant 1 : index // CHECK-NEXT: [[ASYNC:%.*]] = arith.constant 1 : i64 // CHECK-NEXT: acc.parallel async([[ASYNC]] : i64) { -// CHECK-NEXT: acc.loop gang vector { -// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> -// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> -// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> -// CHECK-NEXT: %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32 -// CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 -// CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> -// CHECK-NEXT: } -// CHECK-NEXT: } -// CHECK-NEXT: } +// CHECK-NEXT: acc.loop gang() vector() (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { +// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> +// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> +// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> +// CHECK-NEXT: %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32 +// CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 +// CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> // CHECK-NEXT: acc.yield -// CHECK-NEXT: } attributes {collapse = 3 : i64} +// CHECK-NEXT: } attributes {collapse = 3 : i64, inclusiveUpperbound = array} // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: return %{{.*}} : memref<10x10xf32> -// CHECK-NEXT: } + // ----- @@ -66,21 +54,19 @@ %c1 = arith.constant 1 : index acc.parallel { - acc.loop { - scf.for %arg3 = %c0 to %c10 step %c1 { - scf.for %arg4 = %c0 to %c10 step %c1 { - scf.for %arg5 = %c0 to %c10 step %c1 { - %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32> - %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32> - %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32> - %p = arith.mulf %a, %b : f32 - %co = arith.addf %cij, %p : f32 - memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32> - } + acc.loop (%arg3 : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { + scf.for %arg4 = %c0 to %c10 step %c1 { + scf.for %arg5 = %c0 to %c10 step %c1 { + %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32> + %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32> + %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32> + %p = arith.mulf %a, %b : f32 + %co = arith.addf %cij, %p : f32 + memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32> } } acc.yield - } attributes {seq} + } attributes { seq, inclusiveUpperbound = array } acc.yield } @@ -92,8 +78,7 @@ // CHECK-NEXT: %{{.*}} = arith.constant 10 : index // CHECK-NEXT: %{{.*}} = arith.constant 1 : index // CHECK-NEXT: acc.parallel { -// CHECK-NEXT: acc.loop { -// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { +// CHECK-NEXT: acc.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) // CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { // CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { // CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> @@ -104,9 +89,8 @@ // CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> // CHECK-NEXT: } // CHECK-NEXT: } -// CHECK-NEXT: } // CHECK-NEXT: acc.yield -// CHECK-NEXT: } attributes {seq} +// CHECK-NEXT: } attributes {inclusiveUpperbound = array, seq} // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: return %{{.*}} : memref<10x10xf32> @@ -138,32 +122,25 @@ acc.data dataOperands(%pa, %pb, %pc, %pd: memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) { %private = acc.private varPtr(%c : memref<10xf32>) -> memref<10xf32> acc.parallel num_gangs(%numGangs: i64) num_workers(%numWorkers: i64) private(@privatization_memref_10_f32 -> %private : memref<10xf32>) { - acc.loop gang { - scf.for %x = %lb to %c10 step %st { - acc.loop worker { - scf.for %y = %lb to %c10 step %st { - %axy = memref.load %a[%x, %y] : memref<10x10xf32> - %bxy = memref.load %b[%x, %y] : memref<10x10xf32> - %tmp = arith.addf %axy, %bxy : f32 - memref.store %tmp, %c[%y] : memref<10xf32> - } - acc.yield - } - - acc.loop { - // for i = 0 to 10 step 1 - // d[x] += c[i] - scf.for %i = %lb to %c10 step %st { - %ci = memref.load %c[%i] : memref<10xf32> - %dx = memref.load %d[%x] : memref<10xf32> - %z = arith.addf %ci, %dx : f32 - memref.store %z, %d[%x] : memref<10xf32> - } - acc.yield - } attributes {seq} - } + acc.loop gang() (%x : index) = (%lb : index) to (%c10 : index) step (%st : index) { + acc.loop worker() (%y : index) = (%lb : index) to (%c10 : index) step (%st : index) { + %axy = memref.load %a[%x, %y] : memref<10x10xf32> + %bxy = memref.load %b[%x, %y] : memref<10x10xf32> + %tmp = arith.addf %axy, %bxy : f32 + memref.store %tmp, %c[%y] : memref<10xf32> + acc.yield + } attributes {inclusiveUpperbound = array} + acc.loop (%i : index) = (%lb : index) to (%c10 : index) step (%st : index) { + // for i = 0 to 10 step 1 + // d[x] += c[i] + %ci = memref.load %c[%i] : memref<10xf32> + %dx = memref.load %d[%x] : memref<10xf32> + %z = arith.addf %ci, %dx : f32 + memref.store %z, %d[%x] : memref<10xf32> + acc.yield + } attributes {seq, inclusiveUpperbound = array} acc.yield - } + } attributes {inclusiveUpperbound = array} acc.yield } acc.terminator @@ -181,27 +158,21 @@ // CHECK: acc.data dataOperands(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) { // CHECK-NEXT: %[[P_ARG2:.*]] = acc.private varPtr([[ARG2]] : memref<10xf32>) -> memref<10xf32> // CHECK-NEXT: acc.parallel num_gangs([[NUMGANG]] : i64) num_workers([[NUMWORKERS]] : i64) private(@privatization_memref_10_f32 -> %[[P_ARG2]] : memref<10xf32>) { -// CHECK-NEXT: acc.loop gang { -// CHECK-NEXT: scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] { -// CHECK-NEXT: acc.loop worker { -// CHECK-NEXT: scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] { +// CHECK-NEXT: acc.loop gang() (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) +// CHECK-NEXT: acc.loop worker() (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) // CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> // CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> // CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 // CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> -// CHECK-NEXT: } // CHECK-NEXT: acc.yield // CHECK-NEXT: } -// CHECK-NEXT: acc.loop { -// CHECK-NEXT: scf.for %{{.*}} = [[C0]] to [[C10]] step [[C1]] { +// CHECK-NEXT: acc.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) // CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32> // CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32> // CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 // CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> -// CHECK-NEXT: } // CHECK-NEXT: acc.yield -// CHECK-NEXT: } attributes {seq} -// CHECK-NEXT: } +// CHECK-NEXT: } attributes {inclusiveUpperbound = array, seq} // CHECK-NEXT: acc.yield // CHECK-NEXT: } // CHECK-NEXT: acc.yield @@ -217,161 +188,151 @@ %i64Value = arith.constant 1 : i64 %i32Value = arith.constant 128 : i32 %idxValue = arith.constant 8 : index + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index - acc.loop gang worker vector { + acc.loop gang() worker() vector() (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop gang(num=%i64Value: i64) { + } attributes {inclusiveUpperbound = array} + acc.loop gang(num=%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop gang(static=%i64Value: i64) { + } attributes {inclusiveUpperbound = array} + acc.loop gang(static=%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop worker(%i64Value: i64) { + } attributes {inclusiveUpperbound = array} + acc.loop worker(%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop worker(%i32Value: i32) { + } attributes {inclusiveUpperbound = array} + acc.loop worker(%i32Value: i32) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop worker(%idxValue: index) { + } attributes {inclusiveUpperbound = array} + acc.loop worker(%idxValue: index) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop vector(%i64Value: i64) { + } attributes {inclusiveUpperbound = array} + acc.loop vector(%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop vector(%i32Value: i32) { + } attributes {inclusiveUpperbound = array} + acc.loop vector(%i32Value: i32) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop vector(%idxValue: index) { + } attributes {inclusiveUpperbound = array} + acc.loop vector(%idxValue: index) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop gang(num=%i64Value: i64) worker vector { + } attributes {inclusiveUpperbound = array} + acc.loop gang(num=%i64Value: i64) worker() vector() (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop gang(num=%i64Value: i64, static=%i64Value: i64) worker(%i64Value: i64) vector(%i64Value: i64) { + } attributes {inclusiveUpperbound = array} + acc.loop gang(num=%i64Value: i64, static=%i64Value: i64) worker(%i64Value: i64) vector(%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop gang(num=%i32Value: i32, static=%idxValue: index) { + } attributes {inclusiveUpperbound = array} + acc.loop gang(num=%i32Value: i32, static=%idxValue: index) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop tile(%i64Value, %i64Value : i64, i64) { + } attributes {inclusiveUpperbound = array} + acc.loop tile(%i64Value, %i64Value : i64, i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop tile(%i32Value, %i32Value : i32, i32) { + } attributes {inclusiveUpperbound = array} + acc.loop tile(%i32Value, %i32Value : i32, i32) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop gang(static=%i64Value: i64, num=%i64Value: i64) { + } attributes {inclusiveUpperbound = array} + acc.loop gang(static=%i64Value: i64, num=%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } - acc.loop gang(dim=%i64Value : i64, static=%i64Value: i64) { + } attributes {inclusiveUpperbound = array} + acc.loop gang(dim=%i64Value : i64, static=%i64Value: i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { "test.openacc_dummy_op"() : () -> () acc.yield - } + } attributes {inclusiveUpperbound = array} return } // CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 // CHECK-NEXT: [[I32VALUE:%.*]] = arith.constant 128 : i32 // CHECK-NEXT: [[IDXVALUE:%.*]] = arith.constant 8 : index -// CHECK: acc.loop gang worker vector { +// CHECK: acc.loop gang() worker() vector() // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop gang(num=[[I64VALUE]] : i64) { +// CHECK: acc.loop gang(num=[[I64VALUE]] : i64) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop gang(static=[[I64VALUE]] : i64) { +// CHECK: acc.loop gang(static=[[I64VALUE]] : i64) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop worker([[I64VALUE]] : i64) { +// CHECK: acc.loop worker([[I64VALUE]] : i64) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop worker([[I32VALUE]] : i32) { +// CHECK: acc.loop worker([[I32VALUE]] : i32) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop worker([[IDXVALUE]] : index) { +// CHECK: acc.loop worker([[IDXVALUE]] : index) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop vector([[I64VALUE]] : i64) { +// CHECK: acc.loop vector([[I64VALUE]] : i64) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop vector([[I32VALUE]] : i32) { +// CHECK: acc.loop vector([[I32VALUE]] : i32) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop vector([[IDXVALUE]] : index) { +// CHECK: acc.loop vector([[IDXVALUE]] : index) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop gang(num=[[I64VALUE]] : i64) worker vector { +// CHECK: acc.loop gang(num=[[I64VALUE]] : i64) worker() vector() // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop gang(num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) worker([[I64VALUE]] : i64) vector([[I64VALUE]] : i64) { +// CHECK: acc.loop gang(num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) worker([[I64VALUE]] : i64) vector([[I64VALUE]] : i64) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop gang(num=[[I32VALUE]] : i32, static=[[IDXVALUE]] : index) { +// CHECK: acc.loop gang(num=[[I32VALUE]] : i32, static=[[IDXVALUE]] : index) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop tile([[I64VALUE]], [[I64VALUE]] : i64, i64) { +// CHECK: acc.loop tile([[I64VALUE]], [[I64VALUE]] : i64, i64) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop tile([[I32VALUE]], [[I32VALUE]] : i32, i32) { +// CHECK: acc.loop tile([[I32VALUE]], [[I32VALUE]] : i32, i32) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop gang(num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) { +// CHECK: acc.loop gang(num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } -// CHECK: acc.loop gang(dim=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) { +// CHECK: acc.loop gang(dim=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64) // CHECK-NEXT: "test.openacc_dummy_op"() : () -> () // CHECK-NEXT: acc.yield -// CHECK-NEXT: } // ----- func.func @acc_loop_multiple_block() { + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index acc.parallel { - acc.loop { - %c1 = arith.constant 1 : index - cf.br ^bb1(%c1 : index) + acc.loop (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { + %c1_1 = arith.constant 1 : index + cf.br ^bb1(%c1_1 : index) ^bb1(%9: index): - %c0 = arith.constant 0 : index - %12 = arith.cmpi sgt, %9, %c0 : index + %c0_1 = arith.constant 0 : index + %12 = arith.cmpi sgt, %9, %c0_1 : index cf.cond_br %12, ^bb2, ^bb3 ^bb2: %c1_0 = arith.constant 1 : index - %c10 = arith.constant 10 : index - %22 = arith.subi %c10, %c1_0 : index + %c10_1 = arith.constant 10 : index + %22 = arith.subi %c10_1, %c1_0 : index cf.br ^bb1(%22 : index) ^bb3: acc.yield - } + } attributes {inclusiveUpperbound = array} acc.yield } return @@ -1503,10 +1464,13 @@ // CHECK: } func.func @acc_reduc_test(%a : i64) -> () { + %c0 = arith.constant 0 : index + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index acc.parallel reduction(@reduction_add_i64 -> %a : i64) { - acc.loop reduction(@reduction_add_i64 -> %a : i64) { + acc.loop reduction(@reduction_add_i64 -> %a : i64) (%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { acc.yield - } + } attributes { inclusiveUpperbound = array } acc.yield } return