diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3586,6 +3586,8 @@ InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) { + // TODO: handle the case where XElemTy is not byte-sized or not a power of 2 + // or a complex datatype. bool DoCmpExch = (RMWOp == AtomicRMWInst::BAD_BINOP) || (RMWOp == AtomicRMWInst::FAdd) || (RMWOp == AtomicRMWInst::FSub) || diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -650,6 +650,15 @@ atomic. Generally the region must have only one instruction, but can potentially have more than one instructions too. The update is sematically similar to a compare-exchange loop based atomic update. + + The syntax of atomic update operation is different from atomic read and + atomic write operations. This is because only the host dialect knows how to + appropriately update a value. For example, while generating LLVM IR, if + there are no special `atomicrmw` instructions for the operation-type + combination in atomic update, a compare-exchange loop is generated, where + the core update operation is directly translated like regular operations by + the host dialect. The front-end must handle semantic checks for allowed + operations. }]; let arguments = (ins OpenMP_PointerLikeType:$x, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1047,6 +1047,10 @@ "element type is the same as that of the region argument"); } + if (region().front().getOperations().size() < 2) + return emitError() << "the update region must have at least two operations " + "(binop and terminator)"; + YieldOp yieldOp = *region().getOps().begin(); if (yieldOp.results().size() != 1) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -942,6 +942,103 @@ return success(); } +/// Converts an LLVM dialect binary operation to the corresponding enum value +/// for `atomicrmw` supported binary operation. +llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) { + return llvm::TypeSwitch(&op) + .Case([&](LLVM::AddOp) { return llvm::AtomicRMWInst::BinOp::Add; }) + .Case([&](LLVM::SubOp) { return llvm::AtomicRMWInst::BinOp::Sub; }) + .Case([&](LLVM::AndOp) { return llvm::AtomicRMWInst::BinOp::And; }) + .Case([&](LLVM::OrOp) { return llvm::AtomicRMWInst::BinOp::Or; }) + .Case([&](LLVM::XOrOp) { return llvm::AtomicRMWInst::BinOp::Xor; }) + .Case([&](LLVM::UMaxOp) { return llvm::AtomicRMWInst::BinOp::UMax; }) + .Case([&](LLVM::UMinOp) { return llvm::AtomicRMWInst::BinOp::UMin; }) + .Case([&](LLVM::FAddOp) { return llvm::AtomicRMWInst::BinOp::FAdd; }) + .Case([&](LLVM::FSubOp) { return llvm::AtomicRMWInst::BinOp::FSub; }) + .Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP); +} + +/// Converts an OpenMP atomic update operation using OpenMPIRBuilder. +static LogicalResult +convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + + // Convert values and types. + auto &innerOpList = opInst.region().front().getOperations(); + if (innerOpList.size() != 2) + return opInst.emitError("exactly two operations are allowed inside an " + "atomic update region while lowering to LLVM IR"); + + Operation &innerUpdateOp = innerOpList.front(); + + if (innerUpdateOp.getNumOperands() != 2 || + !llvm::is_contained(innerUpdateOp.getOperands(), + opInst.getRegion().getArgument(0))) + return opInst.emitError( + "the update operation inside the region must be a binary operation and " + "that update operation must have the region argument as an operand"); + + llvm::AtomicRMWInst::BinOp binop = convertBinOpToAtomic(innerUpdateOp); + + bool isXBinopExpr = + innerUpdateOp.getNumOperands() > 0 && + innerUpdateOp.getOperand(0) == opInst.getRegion().getArgument(0); + + mlir::Value mlirExpr = (isXBinopExpr ? innerUpdateOp.getOperand(1) + : innerUpdateOp.getOperand(0)); + llvm::Value *llvmExpr = moduleTranslation.lookupValue(mlirExpr); + llvm::Value *llvmX = moduleTranslation.lookupValue(opInst.x()); + LLVM::LLVMPointerType mlirXType = + opInst.x().getType().cast(); + llvm::Type *llvmXElementType = + moduleTranslation.convertType(mlirXType.getElementType()); + llvm::OpenMPIRBuilder::AtomicOpValue llvmAtomicX = {llvmX, llvmXElementType, + /*isSigned=*/false, + /*isVolatile=*/false}; + + llvm::AtomicOrdering atomicOrdering = + convertAtomicOrdering(opInst.memory_order_val()); + + // Generate update code. + LogicalResult updateGenStatus = success(); + auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus]( + llvm::Value *atomicx, + llvm::IRBuilder<> &builder) -> llvm::Value * { + Block &bb = *opInst.region().begin(); + moduleTranslation.mapValue(*opInst.region().args_begin(), atomicx); + moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); + if (failed(moduleTranslation.convertBlock(bb, true, builder))) { + updateGenStatus = (opInst.emitError() + << "unable to convert update operation to llvm IR"); + return nullptr; + } + omp::YieldOp yieldop = dyn_cast(bb.getTerminator()); + assert(yieldop && yieldop.results().size() == 1 && + "terminator must be omp.yield op and it must have exactly one " + "argument"); + return moduleTranslation.lookupValue(yieldop.results()[0]); + }; + + // Handle ambiguous alloca, if any. + auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + llvm::UnreachableInst *unreachableInst; + if (allocaIP.getPoint() == ompLoc.IP.getPoint()) { + // Same point => split basic block and make them unambigous. + unreachableInst = builder.CreateUnreachable(); + builder.SetInsertPoint(builder.GetInsertBlock()->splitBasicBlock( + unreachableInst, "alloca_split")); + ompLoc.IP = builder.saveIP(); + unreachableInst->removeFromParent(); + } + builder.restoreIP(ompBuilder->createAtomicUpdate( + ompLoc, findAllocaInsertPoint(builder, moduleTranslation), llvmAtomicX, + llvmExpr, atomicOrdering, binop, updateFn, isXBinopExpr)); + return updateGenStatus; +} + /// Converts an OpenMP reduction operation using OpenMPIRBuilder. Expects the /// mapping between reduction variables and their private equivalents to have /// been stored on the ModuleTranslation stack. Currently only supports @@ -1069,6 +1166,9 @@ .Case([&](omp::AtomicWriteOp) { return convertOmpAtomicWrite(*op, builder, moduleTranslation); }) + .Case([&](omp::AtomicUpdateOp op) { + return convertOmpAtomicUpdate(op, builder, moduleTranslation); + }) .Case([&](omp::SectionsOp) { return convertOmpSections(*op, builder, moduleTranslation); }) diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -621,6 +621,17 @@ // ----- +func @omp_atomic_update9(%x: memref, %expr: i32) { + // expected-error @below {{the update region must have at least two operations (binop and terminator)}} + omp.atomic.update %x : memref { + ^bb0(%xval: i32): + omp.yield (%xval : i32) + } + return +} + +// ----- + func @omp_atomic_capture(%x: memref, %v: memref, %expr: i32) { // expected-error @below {{expected three operations in omp.atomic.capture region}} omp.atomic.capture { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir @@ -0,0 +1,31 @@ +// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file -verify-diagnostics + +// Checking translation when the update is carried out by using more than one op +// in the region. +llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr, %expr: i32) { + // expected-error @+2 {{exactly two operations are allowed inside an atomic update region while lowering to LLVM IR}} + // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.update}} + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %t1 = llvm.mul %xval, %expr : i32 + %t2 = llvm.sdiv %t1, %expr : i32 + %newval = llvm.add %xval, %t2 : i32 + omp.yield(%newval : i32) + } + llvm.return +} + +// ----- + +// Checking translation when the captured variable is not used in the inner +// update operation +llvm.func @omp_atomic_update_multiple_step_update(%x: !llvm.ptr, %expr: i32) { + // expected-error @+2 {{the update operation inside the region must be a binary operation and that update operation must have the region argument as an operand}} + // expected-error @+1 {{LLVM Translation failed for operation: omp.atomic.update}} + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.mul %expr, %expr : i32 + omp.yield(%newval : i32) + } + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -933,6 +933,85 @@ // ----- +// Checking simple atomicrmw and cmpxchg based translation. This also checks for +// ambigous alloca insert point by putting llvm.mul as the first update operation. +// CHECK-LABEL: @omp_atomic_update +// CHECK-SAME: (i32* %[[x:.*]], i32 %[[expr:.*]], i1* %[[xbool:.*]], i1 %[[exprbool:.*]]) +llvm.func @omp_atomic_update(%x:!llvm.ptr, %expr: i32, %xbool: !llvm.ptr, %exprbool: i1) { + // CHECK: %[[t1:.*]] = mul i32 %[[x_old:.*]], %[[expr]] + // CHECK: store i32 %[[t1]], i32* %[[x_new:.*]] + // CHECK: %[[t2:.*]] = load i32, i32* %[[x_new]] + // CHECK: cmpxchg i32* %[[x]], i32 %[[x_old]], i32 %[[t2]] + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.mul %xval, %expr : i32 + omp.yield(%newval : i32) + } + // CHECK: atomicrmw add i32* %[[x]], i32 %[[expr]] monotonic + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.add %xval, %expr : i32 + omp.yield(%newval : i32) + } + llvm.return +} + +// ----- + +// Checking an order-dependent operation when the order is `expr binop x` +// CHECK-LABEL: @omp_atomic_update_ordering +// CHECK-SAME: (i32* %[[x:.*]], i32 %[[expr:.*]]) +llvm.func @omp_atomic_update_ordering(%x:!llvm.ptr, %expr: i32) { + // CHECK: %[[t1:.*]] = shl i32 %[[expr]], %[[x_old:[^ ,]*]] + // CHECK: store i32 %[[t1]], i32* %[[x_new:.*]] + // CHECK: %[[t2:.*]] = load i32, i32* %[[x_new]] + // CHECK: cmpxchg i32* %[[x]], i32 %[[x_old]], i32 %[[t2]] + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.shl %expr, %xval : i32 + omp.yield(%newval : i32) + } + llvm.return +} + +// ----- + +// Checking an order-dependent operation when the order is `x binop expr` +// CHECK-LABEL: @omp_atomic_update_ordering +// CHECK-SAME: (i32* %[[x:.*]], i32 %[[expr:.*]]) +llvm.func @omp_atomic_update_ordering(%x:!llvm.ptr, %expr: i32) { + // CHECK: %[[t1:.*]] = shl i32 %[[x_old:.*]], %[[expr]] + // CHECK: store i32 %[[t1]], i32* %[[x_new:.*]] + // CHECK: %[[t2:.*]] = load i32, i32* %[[x_new]] + // CHECK: cmpxchg i32* %[[x]], i32 %[[x_old]], i32 %[[t2]] monotonic + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = llvm.shl %xval, %expr : i32 + omp.yield(%newval : i32) + } + llvm.return +} + +// ----- + +// Checking intrinsic translation. +// CHECK-LABEL: @omp_atomic_update_intrinsic +// CHECK-SAME: (i32* %[[x:.*]], i32 %[[expr:.*]]) +llvm.func @omp_atomic_update_intrinsic(%x:!llvm.ptr, %expr: i32) { + // CHECK: %[[t1:.*]] = call i32 @llvm.smax.i32(i32 %[[x_old:.*]], i32 %[[expr]]) + // CHECK: store i32 %[[t1]], i32* %[[x_new:.*]] + // CHECK: %[[t2:.*]] = load i32, i32* %[[x_new]] + // CHECK: cmpxchg i32* %[[x]], i32 %[[x_old]], i32 %[[t2]] + omp.atomic.update %x : !llvm.ptr { + ^bb0(%xval: i32): + %newval = "llvm.intr.smax"(%xval, %expr) : (i32, i32) -> i32 + omp.yield(%newval : i32) + } + llvm.return +} + +// ----- + // CHECK-LABEL: @omp_sections_empty llvm.func @omp_sections_empty() -> () { omp.sections {