diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -16,6 +16,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" +#include "mlir/IR/PatternMatch.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Bytecode/BytecodeOpInterface.h" diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -1191,13 +1191,14 @@ // Yield operation for the acc.loop and acc.parallel operations. def OpenACC_YieldOp : OpenACC_Op<"yield", [ReturnLike, Terminator, - ParentOneOf<["FirstprivateRecipeOp, LoopOp, ParallelOp, PrivateRecipeOp, ReductionRecipeOp, SerialOp"]>]> { + ParentOneOf<["FirstprivateRecipeOp, LoopOp, ParallelOp, PrivateRecipeOp," + "ReductionRecipeOp, SerialOp, AtomicUpdateOp"]>]> { let summary = "Acc yield and termination operation"; let description = [{ `acc.yield` is a special terminator operation for block inside regions in - acc ops (parallel and loop). It returns values to the immediately enclosing - acc op. + various acc ops (including parallel, loop, atomic.update). It returns values + to the immediately enclosing acc op. }]; let arguments = (ins Variadic:$operands); @@ -1207,6 +1208,165 @@ let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; } +//===----------------------------------------------------------------------===// +// 2.12 atomic construct +//===----------------------------------------------------------------------===// + +def AtomicReadOp : OpenACC_Op<"atomic.read", [AllTypesMatch<["x", "v"]>]> { + + let summary = "performs an atomic read"; + + let description = [{ + This operation performs an atomic read. + + The operand `x` is the address from where the value is atomically read. + The operand `v` is the address where the value is stored after reading. + }]; + + let arguments = (ins OpenACC_PointerLikeType:$x, + OpenACC_PointerLikeType:$v, + TypeAttr:$element_type); + let assemblyFormat = [{ + $v `=` $x + `:` type($x) `,` $element_type attr-dict + }]; + let hasVerifier = 1; +} + +def AtomicWriteOp : OpenACC_Op<"atomic.write"> { + + let summary = "performs an atomic write"; + + let description = [{ + This operation performs an atomic write. + + The operand `address` is the address to where the `value` is atomically + written w.r.t. multiple threads. The evaluation of `value` need not be + atomic w.r.t. the write to address. In general, the type(address) must + dereference to type(value). + }]; + + let arguments = (ins OpenACC_PointerLikeType:$address, + AnyType:$value); + let assemblyFormat = [{ + $address `=` $value + `:` type($address) `,` type($value) + attr-dict + }]; + let hasVerifier = 1; +} + +def AtomicUpdateOp : OpenACC_Op<"atomic.update", + [SingleBlockImplicitTerminator<"YieldOp">, + RecursiveMemoryEffects]> { + + let summary = "performs an atomic update"; + + let description = [{ + This operation performs an atomic update. + + The operand `x` is exactly the same as the operand `x` in the OpenACC + Standard (OpenACC 3.3, section 2.12). It is the address of the variable + that is being updated. `x` is atomically read/written. + + The region describes how to update the value of `x`. It takes the value at + `x` as an input and must yield the updated value. Only the update to `x` is + atomic. Generally the region must have only one instruction, but can + potentially have more than one instructions too. The update is sematically + similar to a compare-exchange loop based atomic update. + + The syntax of atomic update operation is different from atomic read and + atomic write operations. This is because only the host dialect knows how to + appropriately update a value. For example, while generating LLVM IR, if + there are no special `atomicrmw` instructions for the operation-type + combination in atomic update, a compare-exchange loop is generated, where + the core update operation is directly translated like regular operations by + the host dialect. The front-end must handle semantic checks for allowed + operations. + }]; + + let arguments = (ins Arg:$x); + let regions = (region SizedRegion<1>:$region); + let assemblyFormat = [{ + $x `:` type($x) $region attr-dict + }]; + let hasVerifier = 1; + let hasRegionVerifier = 1; + let hasCanonicalizeMethod = 1; + let extraClassDeclaration = [{ + Operation* getFirstOp() { + return &getRegion().front().getOperations().front(); + } + + /// Returns true if the new value is same as old value and the operation is + /// a no-op, false otherwise. + bool isNoOp(); + + /// Returns the new value if the operation is equivalent to just a write + /// operation. Otherwise, returns nullptr. + Value getWriteOpVal(); + }]; +} + +def AtomicCaptureOp : OpenACC_Op<"atomic.capture", + [SingleBlockImplicitTerminator<"TerminatorOp">]> { + let summary = "performs an atomic capture"; + let description = [{ + This operation performs an atomic capture. + + The region has the following allowed forms: + + ``` + acc.atomic.capture { + acc.atomic.update ... + acc.atomic.read ... + acc.terminator + } + + acc.atomic.capture { + acc.atomic.read ... + acc.atomic.update ... + acc.terminator + } + + acc.atomic.capture { + acc.atomic.read ... + acc.atomic.write ... + acc.terminator + } + ``` + + }]; + + let regions = (region SizedRegion<1>:$region); + let assemblyFormat = [{ + $region attr-dict + }]; + let hasRegionVerifier = 1; + let hasVerifier = 1; + let extraClassDeclaration = [{ + /// Returns the first operation in atomic capture region + Operation* getFirstOp(); + + /// Returns the second operation in atomic capture region + Operation* getSecondOp(); + + /// Returns the `atomic.read` operation inside the region, if any. + /// Otherwise, it returns nullptr. + AtomicReadOp getAtomicReadOp(); + + /// Returns the `atomic.write` operation inside the region, if any. + /// Otherwise, it returns nullptr. + AtomicWriteOp getAtomicWriteOp(); + + /// Returns the `atomic.update` operation inside the region, if any. + /// Otherwise, it returns nullptr. + AtomicUpdateOp getAtomicUpdateOp(); + }]; +} + //===----------------------------------------------------------------------===// // 2.13 Declare Directive //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -25,6 +25,17 @@ #include "mlir/Dialect/OpenACC/OpenACCOpsEnums.cpp.inc" #include "mlir/Dialect/OpenACC/OpenACCTypeInterfaces.cpp.inc" +namespace { +/// Model for pointer-like types that already provide a `getElementType` method. +template +struct PointerLikeModel + : public PointerLikeType::ExternalModel, T> { + Type getElementType(Type pointer) const { + return llvm::cast(pointer).getElementType(); + } +}; +} // namespace + //===----------------------------------------------------------------------===// // OpenACC operations //===----------------------------------------------------------------------===// @@ -46,8 +57,9 @@ // By attaching interfaces here, we make the OpenACC dialect dependent on // the other dialects. This is probably better than having dialects like LLVM // and memref be dependent on OpenACC. - LLVM::LLVMPointerType::attachInterface(*getContext()); - MemRefType::attachInterface(*getContext()); + LLVM::LLVMPointerType::attachInterface< + PointerLikeModel>(*getContext()); + MemRefType::attachInterface>(*getContext()); } //===----------------------------------------------------------------------===// @@ -975,6 +987,157 @@ results.add>(context); } +//===----------------------------------------------------------------------===// +// AtomicReadOp +//===----------------------------------------------------------------------===// + +LogicalResult AtomicReadOp::verify() { + if (getX() == getV()) + return emitError( + "read and write must not be to the same location for atomic reads"); + + return success(); +} + +//===----------------------------------------------------------------------===// +// AtomicWriteOp +//===----------------------------------------------------------------------===// + +LogicalResult AtomicWriteOp::verify() { + Type elementType = getAddress().getType().getElementType(); + if (elementType && elementType != getValue().getType()) + return emitError("address must dereference to value type"); + return success(); +} + +//===----------------------------------------------------------------------===// +// AtomicUpdateOp +//===----------------------------------------------------------------------===// + +bool AtomicUpdateOp::isNoOp() { + YieldOp yieldOp = dyn_cast(getFirstOp()); + return (yieldOp && + yieldOp.getOperands().front() == getRegion().front().getArgument(0)); +} + +Value AtomicUpdateOp::getWriteOpVal() { + YieldOp yieldOp = dyn_cast(getFirstOp()); + if (yieldOp && + yieldOp.getOperands().front() != getRegion().front().getArgument(0)) + return yieldOp.getOperands().front(); + return nullptr; +} + +LogicalResult AtomicUpdateOp::canonicalize(AtomicUpdateOp op, + PatternRewriter &rewriter) { + if (op.isNoOp()) { + rewriter.eraseOp(op); + return success(); + } + + if (Value writeVal = op.getWriteOpVal()) { + rewriter.replaceOpWithNewOp(op, op.getX(), writeVal); + return success(); + } + + return failure(); +} + +LogicalResult AtomicUpdateOp::verify() { + if (getRegion().getNumArguments() != 1) + return emitError("the region must accept exactly one argument"); + + Type elementType = getX().getType().getElementType(); + if (elementType && elementType != getRegion().getArgument(0).getType()) { + return emitError("the type of the operand must be a pointer type whose " + "element type is the same as that of the region argument"); + } + + return success(); +} + +LogicalResult AtomicUpdateOp::verifyRegions() { + YieldOp yieldOp = *getRegion().getOps().begin(); + + if (yieldOp.getOperands().size() != 1) + return emitError("only updated value must be returned"); + if (yieldOp.getOperands().front().getType() != + getRegion().getArgument(0).getType()) + return emitError("input and yielded value must have the same type"); + return success(); +} + +//===----------------------------------------------------------------------===// +// AtomicCaptureOp +//===----------------------------------------------------------------------===// + +Operation *AtomicCaptureOp::getFirstOp() { + return &getRegion().front().getOperations().front(); +} + +Operation *AtomicCaptureOp::getSecondOp() { + auto &ops = getRegion().front().getOperations(); + return ops.getNextNode(ops.front()); +} + +AtomicReadOp AtomicCaptureOp::getAtomicReadOp() { + if (auto op = dyn_cast(getFirstOp())) + return op; + return dyn_cast(getSecondOp()); +} + +AtomicWriteOp AtomicCaptureOp::getAtomicWriteOp() { + if (auto op = dyn_cast(getFirstOp())) + return op; + return dyn_cast(getSecondOp()); +} + +AtomicUpdateOp AtomicCaptureOp::getAtomicUpdateOp() { + if (auto op = dyn_cast(getFirstOp())) + return op; + return dyn_cast(getSecondOp()); +} + +LogicalResult AtomicCaptureOp::verify() { return success(); } + +LogicalResult AtomicCaptureOp::verifyRegions() { + Block::OpListType &ops = getRegion().front().getOperations(); + if (ops.size() != 3) + return emitError() + << "expected three operations in acc.atomic.capture region (one " + "terminator, and two atomic ops)"; + auto &firstOp = ops.front(); + auto &secondOp = *ops.getNextNode(firstOp); + auto firstReadStmt = dyn_cast(firstOp); + auto firstUpdateStmt = dyn_cast(firstOp); + auto secondReadStmt = dyn_cast(secondOp); + auto secondUpdateStmt = dyn_cast(secondOp); + auto secondWriteStmt = dyn_cast(secondOp); + + if (!((firstUpdateStmt && secondReadStmt) || + (firstReadStmt && secondUpdateStmt) || + (firstReadStmt && secondWriteStmt))) + return ops.front().emitError() + << "invalid sequence of operations in the capture region"; + if (firstUpdateStmt && secondReadStmt && + firstUpdateStmt.getX() != secondReadStmt.getX()) + return firstUpdateStmt.emitError() + << "updated variable in acc.atomic.update must be captured in " + "second operation"; + if (firstReadStmt && secondUpdateStmt && + firstReadStmt.getX() != secondUpdateStmt.getX()) + return firstReadStmt.emitError() + << "captured variable in acc.atomic.read must be updated in second " + "operation"; + if (firstReadStmt && secondWriteStmt && + firstReadStmt.getX() != secondWriteStmt.getAddress()) + return firstReadStmt.emitError() + << "captured variable in acc.atomic.read must be updated in " + "second operation"; + + return success(); +} + //===----------------------------------------------------------------------===// // DeclareEnterOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -1726,3 +1726,149 @@ // CHECK: acc.set device_num([[IDXVALUE]] : index) // CHECK: acc.set device_num([[IDXVALUE]] : index) if([[IFCOND]]) // CHECK: acc.set default_async([[I32VALUE]] : i32) + +// ----- + +// CHECK-LABEL: func.func @acc_atomic_read +// CHECK-SAME: (%[[v:.*]]: memref, %[[x:.*]]: memref) +func.func @acc_atomic_read(%v: memref, %x: memref) { + // CHECK: acc.atomic.read %[[v]] = %[[x]] : memref, i32 + acc.atomic.read %v = %x : memref, i32 + return +} + +// ----- + +// CHECK-LABEL: func.func @acc_atomic_write +// CHECK-SAME: (%[[ADDR:.*]]: memref, %[[VAL:.*]]: i32) +func.func @acc_atomic_write(%addr : memref, %val : i32) { + // CHECK: acc.atomic.write %[[ADDR]] = %[[VAL]] : memref, i32 + acc.atomic.write %addr = %val : memref, i32 + return +} + +// ----- + +// CHECK-LABEL: func.func @acc_atomic_update +// CHECK-SAME: (%[[X:.*]]: memref, %[[EXPR:.*]]: i32, %[[XBOOL:.*]]: memref, %[[EXPRBOOL:.*]]: i1) +func.func @acc_atomic_update(%x : memref, %expr : i32, %xBool : memref, %exprBool : i1) { + // CHECK: acc.atomic.update %[[X]] : memref + // CHECK-NEXT: (%[[XVAL:.*]]: i32): + // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.add %[[XVAL]], %[[EXPR]] : i32 + // CHECK-NEXT: acc.yield %[[NEWVAL]] : i32 + acc.atomic.update %x : memref { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + acc.yield %newval : i32 + } + // CHECK: acc.atomic.update %[[XBOOL]] : memref + // CHECK-NEXT: (%[[XVAL:.*]]: i1): + // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.and %[[XVAL]], %[[EXPRBOOL]] : i1 + // CHECK-NEXT: acc.yield %[[NEWVAL]] : i1 + acc.atomic.update %xBool : memref { + ^bb0(%xval: i1): + %newval = llvm.and %xval, %exprBool : i1 + acc.yield %newval : i1 + } + // CHECK: acc.atomic.update %[[X]] : memref + // CHECK-NEXT: (%[[XVAL:.*]]: i32): + // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.shl %[[XVAL]], %[[EXPR]] : i32 + // CHECK-NEXT: acc.yield %[[NEWVAL]] : i32 + // CHECK-NEXT: } + acc.atomic.update %x : memref { + ^bb0(%xval: i32): + %newval = llvm.shl %xval, %expr : i32 + acc.yield %newval : i32 + } + // CHECK: acc.atomic.update %[[X]] : memref + // CHECK-NEXT: (%[[XVAL:.*]]: i32): + // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.intr.smax(%[[XVAL]], %[[EXPR]]) : (i32, i32) -> i32 + // CHECK-NEXT: acc.yield %[[NEWVAL]] : i32 + // CHECK-NEXT: } + acc.atomic.update %x : memref { + ^bb0(%xval: i32): + %newval = llvm.intr.smax(%xval, %expr) : (i32, i32) -> i32 + acc.yield %newval : i32 + } + + // CHECK: acc.atomic.update %[[XBOOL]] : memref + // CHECK-NEXT: (%[[XVAL:.*]]: i1): + // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.icmp "eq" %[[XVAL]], %[[EXPRBOOL]] : i1 + // CHECK-NEXT: acc.yield %[[NEWVAL]] : i1 + // } + acc.atomic.update %xBool : memref { + ^bb0(%xval: i1): + %newval = llvm.icmp "eq" %xval, %exprBool : i1 + acc.yield %newval : i1 + } + + // CHECK: acc.atomic.update %[[X]] : memref { + // CHECK-NEXT: (%[[XVAL:.*]]: i32): + // CHECK-NEXT: acc.yield %[[XVAL]] : i32 + // CHECK-NEXT: } + acc.atomic.update %x : memref { + ^bb0(%xval:i32): + acc.yield %xval : i32 + } + + // CHECK: acc.atomic.update %[[X]] : memref { + // CHECK-NEXT: (%[[XVAL:.*]]: i32): + // CHECK-NEXT: acc.yield %{{.+}} : i32 + // CHECK-NEXT: } + %const = arith.constant 42 : i32 + acc.atomic.update %x : memref { + ^bb0(%xval:i32): + acc.yield %const : i32 + } + + return +} + +// ----- + +// CHECK-LABEL: func.func @acc_atomic_capture +// CHECK-SAME: (%[[v:.*]]: memref, %[[x:.*]]: memref, %[[expr:.*]]: i32) +func.func @acc_atomic_capture(%v: memref, %x: memref, %expr: i32) { + // CHECK: acc.atomic.capture { + // CHECK-NEXT: acc.atomic.update %[[x]] : memref + // CHECK-NEXT: (%[[xval:.*]]: i32): + // CHECK-NEXT: %[[newval:.*]] = llvm.add %[[xval]], %[[expr]] : i32 + // CHECK-NEXT: acc.yield %[[newval]] : i32 + // CHECK-NEXT: } + // CHECK-NEXT: acc.atomic.read %[[v]] = %[[x]] : memref, i32 + // CHECK-NEXT: } + acc.atomic.capture { + acc.atomic.update %x : memref { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + acc.yield %newval : i32 + } + acc.atomic.read %v = %x : memref, i32 + } + // CHECK: acc.atomic.capture { + // CHECK-NEXT: acc.atomic.read %[[v]] = %[[x]] : memref, i32 + // CHECK-NEXT: acc.atomic.update %[[x]] : memref + // CHECK-NEXT: (%[[xval:.*]]: i32): + // CHECK-NEXT: %[[newval:.*]] = llvm.add %[[xval]], %[[expr]] : i32 + // CHECK-NEXT: acc.yield %[[newval]] : i32 + // CHECK-NEXT: } + // CHECK-NEXT: } + acc.atomic.capture { + acc.atomic.read %v = %x : memref, i32 + acc.atomic.update %x : memref { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + acc.yield %newval : i32 + } + } + // CHECK: acc.atomic.capture { + // CHECK-NEXT: acc.atomic.read %[[v]] = %[[x]] : memref, i32 + // CHECK-NEXT: acc.atomic.write %[[x]] = %[[expr]] : memref, i32 + // CHECK-NEXT: } + acc.atomic.capture { + acc.atomic.read %v = %x : memref, i32 + acc.atomic.write %x = %expr : memref, i32 + } + + return +}