Index: mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td =================================================================== --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -402,6 +402,73 @@ let assemblyFormat = "attr-dict"; } +//===----------------------------------------------------------------------===// +// [5.1] 2.19.9 ordered Construct +//===----------------------------------------------------------------------===// + +def ClauseDependSource : StrEnumAttrCase<"dependsource">; +def ClauseDependSink : StrEnumAttrCase<"dependsink">; + +def ClauseDepend : StrEnumAttr< + "ClauseDepend", + "depend clause", + [ClauseDependSource, ClauseDependSink]> { + let cppNamespace = "::mlir::omp"; +} + +def OrderedOp : OpenMP_Op<"ordered"> { + let summary = "ordered construct without region"; + let description = [{ + The ordered construct without region is a stand-alone directive that + specifies cross-iteration dependences in a doacross loop nest. + + The `depend_type_val` attribute refers to either the DEPEND(SOURCE) clause + or the DEPEND(SINK: vec) clause. + + The `num_loops_val` attribute specifies the number of loops in the doacross + nest. + + The `depend_vec_vars` is a variadic list of operands that specifies the index + of the loop iterator in the doacross nest for the DEPEND(SOURCE) clause or + the index of the element of "vec" for the DEPEND(SINK: vec) clause. It + contains the operands in multiple "vec" when multiple DEPEND(SINK: vec) + clauses exist in one ORDERED directive. + }]; + + let arguments = (ins OptionalAttr:$depend_type_val, + Confined, [IntMinValue<0>]>:$num_loops_val, + Variadic:$depend_vec_vars); + + let assemblyFormat = [{ + ( `depend_type` `(` $depend_type_val^ `)` )? + ( `depend_vec` `(` $depend_vec_vars^ `:` type($depend_vec_vars) `)` )? + attr-dict + }]; + + let verifier = "return ::verifyOrderedOp(*this);"; +} + +def OrderedRegionOp : OpenMP_Op<"ordered_region"> { + let summary = "ordered construct with region"; + let description = [{ + The ordered construct with region specifies a structured block in a + worksharing-loop, SIMD, or worksharing-loop SIMD region that is executed in + the order of the loop iterations. + + The `simd` attribute corresponds to the SIMD clause specified. If it is not + present, it behaves as if the THREADS clause is specified or no clause is + specified. + }]; + + let arguments = (ins UnitAttr:$simd); + + let regions = (region AnyRegion:$region); + + let assemblyFormat = [{ ( `simd` $simd^ )? $region attr-dict}]; + + let verifier = "return ::verifyOrderedRegionOp(*this);"; +} + //===----------------------------------------------------------------------===// // 2.17.5 taskwait Construct //===----------------------------------------------------------------------===// Index: mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp =================================================================== --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1076,5 +1076,42 @@ return success(); } +//===----------------------------------------------------------------------===// +// Verifier for ordered construct +//===----------------------------------------------------------------------===// + +static LogicalResult verifyOrderedOp(OrderedOp op) { + auto container = op->getParentOfType(); + if (!container || !container.ordered_valAttr() || + container.ordered_valAttr().getInt() == 0) + return op.emitOpError() << "ordered depend directive must be closely " + << "nested inside a worksharing-loop with ordered " + << "clause with parameter present"; + + if (container.ordered_valAttr().getInt() != + (int64_t)op.num_loops_val().getValue()) + return op.emitOpError() << "number of variables in depend clause does not " + << "match number of iteration variables in the " + << "doacross loop"; + + return success(); +} + +static LogicalResult verifyOrderedRegionOp(OrderedRegionOp op) { + // TODO: The code generation for ordered simd directive is not supported yet. + if (op.simd()) + return failure(); + + if (auto container = op->getParentOfType()) { + if (!container.ordered_valAttr() || + container.ordered_valAttr().getInt() != 0) + return op.emitOpError() << "ordered region must be closely nested inside " + << "a worksharing-loop region with an ordered " + << "clause without parameter present"; + } + + return success(); +} + #define GET_OP_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOps.cpp.inc" Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -468,6 +468,73 @@ return atomicGen; } +/// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. +static LogicalResult +convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + auto orderedOp = cast(opInst); + + omp::ClauseDepend dependType = + *omp::symbolizeClauseDepend(orderedOp.depend_type_valAttr().getValue()); + bool isDependSource = dependType == omp::ClauseDepend::dependsource; + unsigned numLoops = orderedOp.num_loops_val().getValue(); + SmallVector vecValues = + moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); + + llvm::OpenMPIRBuilder::LocationDescription ompLoc( + builder.saveIP(), builder.getCurrentDebugLocation()); + size_t indexVecValues = 0; + while (indexVecValues < vecValues.size()) { + SmallVector storeValues; + storeValues.reserve(numLoops); + for (unsigned i = 0; i < numLoops; i++) { + storeValues.push_back(vecValues[indexVecValues]); + indexVecValues++; + } + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( + ompLoc, findAllocaInsertPoint(builder, moduleTranslation), numLoops, + storeValues, ".cnt.addr", isDependSource)); + } + return success(); +} + +/// Converts an OpenMP 'ordered_region' operation into LLVM IR using +/// OpenMPIRBuilder. +static LogicalResult +convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + auto orderedRegionOp = cast(opInst); + + // TODO: The code generation for ordered simd directive is not supported yet. + if (orderedRegionOp.simd()) + return failure(); + + // TODO: support error propagation in OpenMPIRBuilder and use it instead of + // relying on captured variables. + LogicalResult bodyGenStatus = success(); + + auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, + llvm::BasicBlock &continuationBlock) { + // OrderedOp has only one region associated with it. + auto ®ion = cast(opInst).getRegion(); + convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(), + continuationBlock, builder, moduleTranslation, + bodyGenStatus); + }; + + // TODO: Perform finalization actions for variables. This has to be + // called for variables which have destructors/finalizers. + auto finiCB = [&](InsertPointTy codeGenIP) {}; + + llvm::OpenMPIRBuilder::LocationDescription ompLoc( + builder.saveIP(), builder.getCurrentDebugLocation()); + builder.restoreIP( + moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( + ompLoc, bodyGenCB, finiCB, !orderedRegionOp.simd())); + return bodyGenStatus; +} + /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, @@ -796,6 +863,12 @@ .Case([&](omp::CriticalOp) { return convertOmpCritical(*op, builder, moduleTranslation); }) + .Case([&](omp::OrderedRegionOp) { + return convertOmpOrderedRegion(*op, builder, moduleTranslation); + }) + .Case([&](omp::OrderedOp) { + return convertOmpOrdered(*op, builder, moduleTranslation); + }) .Case([&](omp::WsLoopOp) { return convertOmpWsLoop(*op, builder, moduleTranslation); }) Index: mlir/test/Dialect/OpenMP/invalid.mlir =================================================================== --- mlir/test/Dialect/OpenMP/invalid.mlir +++ mlir/test/Dialect/OpenMP/invalid.mlir @@ -345,3 +345,61 @@ omp.terminator } } + +// ----- + +func @omp_ordered1(%arg1 : i64, %arg2 : i64, %arg3 : i64) -> () { + omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(1) inclusive { + // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}} + omp.ordered_region { + omp.terminator + } + omp.yield + } + return +} + +// ----- + +func @omp_ordered2(%arg1 : i64, %arg2 : i64, %arg3 : i64) -> () { + omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) inclusive { + // expected-error @below {{ordered region must be closely nested inside a worksharing-loop region with an ordered clause without parameter present}} + omp.ordered_region { + omp.terminator + } + omp.yield + } + return +} + +// ----- + +func @omp_ordered3(%vec0 : i64) -> () { + // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}} + omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} + return +} + +// ----- + +func @omp_ordered4(%arg1 : i64, %arg2 : i64, %arg3 : i64, %vec0 : i64) -> () { + omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(0) inclusive { + // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}} + omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} + + omp.yield + } + return +} + +// ----- + +func @omp_ordered5(%arg1 : i64, %arg2 : i64, %arg3 : i64, %vec0 : i64, %vec1 : i64) -> () { + omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(1) inclusive { + // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}} + omp.ordered depend_type("dependsource") depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64} + + omp.yield + } + return +} Index: mlir/test/Dialect/OpenMP/ops.mlir =================================================================== --- mlir/test/Dialect/OpenMP/ops.mlir +++ mlir/test/Dialect/OpenMP/ops.mlir @@ -414,3 +414,43 @@ } return } + +func @omp_ordered(%arg1 : i64, %arg2 : i64, %arg3 : i64, + %vec0 : i64, %vec1 : i64, %vec2 : i64, %vec3 : i64) -> () { + // CHECK: omp.ordered_region + omp.ordered_region { + // CHECK: omp.terminator + omp.terminator + } + + omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(0) inclusive { + omp.ordered_region { + omp.terminator + } + omp.yield + } + + omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(1) inclusive { + // Only one DEPEND(SINK: vec) clause + // CHECK: omp.ordered depend_type("dependsink") depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64} + omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} + + // CHECK: omp.ordered depend_type("dependsource") depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64} + omp.ordered depend_type("dependsource") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} + + omp.yield + } + + omp.wsloop (%0) : i64 = (%arg1) to (%arg2) step (%arg3) ordered(2) inclusive { + // Multiple DEPEND(SINK: vec) clauses + // CHECK: omp.ordered depend_type("dependsink") depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64} + omp.ordered depend_type("dependsink") depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64} + + // CHECK: omp.ordered depend_type("dependsource") depend_vec(%{{.*}}, %{{.*}} : i64, i64) {num_loops_val = 2 : i64} + omp.ordered depend_type("dependsource") depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64} + + omp.yield + } + + return +} Index: mlir/test/Target/LLVMIR/openmp-llvm.mlir =================================================================== --- mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -554,3 +554,78 @@ } llvm.return } + +llvm.func @omp_ordered(%arg0 : i64, %arg1 : i64, %arg2 : i64, %arg3 : i64, + %arg4: i64, %arg5: i64, %arg6: i64) -> () { + // CHECK: [[ADDR9:%.*]] = alloca [2 x i64], align 8 + // CHECK: [[ADDR7:%.*]] = alloca [2 x i64], align 8 + // CHECK: [[ADDR5:%.*]] = alloca [2 x i64], align 8 + // CHECK: [[ADDR3:%.*]] = alloca [1 x i64], align 8 + // CHECK: [[ADDR:%.*]] = alloca [1 x i64], align 8 + + // CHECK: [[OMP_THREAD:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) + // CHECK-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]]) + omp.ordered_region { + omp.terminator + // CHECK: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]]) + } + + omp.wsloop (%arg7) : i64 = (%arg0) to (%arg1) step (%arg2) ordered(0) inclusive { + // CHECK: [[OMP_THREAD:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) + // CHECK-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]]) + omp.ordered_region { + omp.terminator + // CHECK: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]]) + } + omp.yield + } + + omp.wsloop (%arg7) : i64 = (%arg0) to (%arg1) step (%arg2) ordered(1) inclusive { + // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0 + // CHECK: store i64 [[ARG0:%.*]], i64* [[TMP]], align 4 + // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0 + // CHECK: [[OMP_THREAD2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) + // CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB3]], i32 [[OMP_THREAD2]], i64* [[TMP2]]) + omp.ordered depend_type("dependsink") depend_vec(%arg3 : i64) {num_loops_val = 1 : i64} + + // CHECK: [[TMP3:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR3]], i64 0, i64 0 + // CHECK: store i64 [[ARG0]], i64* [[TMP3]], align 4 + // CHECK: [[TMP4:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR3]], i64 0, i64 0 + // CHECK: [[OMP_THREAD4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]]) + // CHECK: call void @__kmpc_doacross_post(%struct.ident_t* @[[GLOB5]], i32 [[OMP_THREAD4]], i64* [[TMP4]]) + omp.ordered depend_type("dependsource") depend_vec(%arg3 : i64) {num_loops_val = 1 : i64} + + omp.yield + } + + omp.wsloop (%arg7) : i64 = (%arg0) to (%arg1) step (%arg2) ordered(2) inclusive { + // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 0 + // CHECK: store i64 [[ARG0]], i64* [[TMP5]], align 4 + // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 1 + // CHECK: store i64 [[ARG1:%.*]], i64* [[TMP6]], align 4 + // CHECK: [[TMP7:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 0 + // CHECK: [[OMP_THREAD6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7:[0-9]+]]) + // CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB7]], i32 [[OMP_THREAD6]], i64* [[TMP7]]) + // CHECK: [[TMP8:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 0 + // CHECK: store i64 [[ARG2:%.*]], i64* [[TMP8]], align 4 + // CHECK: [[TMP9:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 1 + // CHECK: store i64 [[ARG3:%.*]], i64* [[TMP9]], align 4 + // CHECK: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 0 + // CHECK: [[OMP_THREAD8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]]) + // CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB7]], i32 [[OMP_THREAD8]], i64* [[TMP10]]) + omp.ordered depend_type("dependsink") depend_vec(%arg3, %arg4, %arg5, %arg6 : i64, i64, i64, i64) {num_loops_val = 2 : i64} + + // CHECK: [[TMP11:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 0 + // CHECK: store i64 [[ARG0]], i64* [[TMP11]], align 4 + // CHECK: [[TMP12:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 1 + // CHECK: store i64 [[ARG1]], i64* [[TMP12]], align 4 + // CHECK: [[TMP13:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 0 + // CHECK: [[OMP_THREAD10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9:[0-9]+]]) + // CHECK: call void @__kmpc_doacross_post(%struct.ident_t* @[[GLOB9]], i32 [[OMP_THREAD10]], i64* [[TMP13]]) + omp.ordered depend_type("dependsource") depend_vec(%arg3, %arg4 : i64, i64) {num_loops_val = 2 : i64} + + omp.yield + } + + llvm.return +}