Index: mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td =================================================================== --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -415,6 +415,59 @@ let assemblyFormat = "attr-dict"; } +//===----------------------------------------------------------------------===// +// [5.1] 2.19.9 ordered Construct +//===----------------------------------------------------------------------===// + +def ClauseDependSource : StrEnumAttrCase<"dependsource">; +def ClauseDependSink : StrEnumAttrCase<"dependsink">; + +def ClauseDepend : StrEnumAttr< + "ClauseDepend", + "depend clause", + [ClauseDependSource, ClauseDependSink]> { + let cppNamespace = "::mlir::omp"; +} + +def OrderedOp : OpenMP_Op<"ordered"> { + let summary = "ordered construct"; + let description = [{ + The ordered construct either specifies a structured block in a + workshwring-loop, SIMD, or worksharing-loop SIMD region that is executed in + the order of the loop iterations, or it is a stand-alone directive that + specifies cross-iteration dependences in a doacross loop nest. + + The `depend` attribute corresponds to the DEPEND clause specified. The + `simd` attribute corresponds to the SIMD clause specified. If both of them + are not present, it behaves that the THREADS cluase is specified. + + The `depend_type_val` attribute refers to either the DEPEND(SOURCE) clause + or the DEPEND(SINK: vec) clause when the `depend` attribute is present. + + The `num_loops_val` attribute specifies the number of loops in the doacross + nest when the `depend` attribute is present. + + The `depend_vec_vars` are variadic list of operands that specifies the index + of the loop iterator in the doacross nest for the DEPEND(SOURCE) clause or + the index of the element of "vec" for the DEPEND(SINK: vec) clause. It + contains the operands in multiple "vec" when multiple DEPEND(SINK: vec) + clauses exist in one ORDERED directive. + }]; + + let arguments = (ins UnitAttr:$depend, + OptionalAttr:$depend_type_val, + Confined, [IntMinValue<0>]>:$num_loops_val, + Variadic:$depend_vec_vars, + UnitAttr:$simd); + + let regions = (region AnyRegion:$region); + + let assemblyFormat = [{ + ( `depend_vec` `(` $depend_vec_vars^ `:` type($depend_vec_vars) `)` )? + $region attr-dict + }]; +} + //===----------------------------------------------------------------------===// // 2.17.5 taskwait Construct //===----------------------------------------------------------------------===// Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -474,6 +474,66 @@ return atomicGen; } +/// Converts an OpenMP 'ordered' operation into LLVM IR using OpenMPIRBuilder. +static LogicalResult +convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + auto orderedOp = cast(opInst); + + if (orderedOp.depend()) { + omp::ClauseDepend dependType = + *omp::symbolizeClauseDepend(orderedOp.depend_type_valAttr().getValue()); + bool isDependSource = + (dependType == omp::ClauseDepend::dependsource) ? true : false; + unsigned numLoops = orderedOp.num_loops_val().getValue(); + SmallVector vecValues = + moduleTranslation.lookupValues(orderedOp.depend_vec_vars()); + + llvm::OpenMPIRBuilder::LocationDescription ompLoc( + builder.saveIP(), builder.getCurrentDebugLocation()); + size_t indexVecValues = 0; + while (indexVecValues < vecValues.size()) { + SmallVector storeValues; + for (unsigned i = 0; i < numLoops; i++) { + storeValues.push_back(vecValues[indexVecValues]); + indexVecValues++; + } + builder.restoreIP( + moduleTranslation.getOpenMPBuilder()->createOrderedDepend( + ompLoc, findAllocaInsertPoint(builder, moduleTranslation), + numLoops, storeValues, ".cnt.addr", isDependSource)); + } + return success(); + } + + // TODO: The code generation for ordered simd directive is not supported yet. + + // TODO: support error propagation in OpenMPIRBuilder and use it instead of + // relying on captured variables. + LogicalResult bodyGenStatus = success(); + + auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, + llvm::BasicBlock &continuationBlock) { + // OrderedOp has only one region associated with it. + auto ®ion = cast(opInst).getRegion(); + convertOmpOpRegions(region, "omp.ordered.region", *codeGenIP.getBlock(), + continuationBlock, builder, moduleTranslation, + bodyGenStatus); + }; + + // TODO: Perform finalization actions for variables. This has to be + // called for variables which have destructors/finalizers. + auto finiCB = [&](InsertPointTy codeGenIP) {}; + + llvm::OpenMPIRBuilder::LocationDescription ompLoc( + builder.saveIP(), builder.getCurrentDebugLocation()); + builder.restoreIP( + moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( + ompLoc, bodyGenCB, finiCB, !orderedOp.simd())); + return success(); +} + /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, @@ -802,6 +862,9 @@ .Case([&](omp::CriticalOp) { return convertOmpCritical(*op, builder, moduleTranslation); }) + .Case([&](omp::OrderedOp) { + return convertOmpOrdered(*op, builder, moduleTranslation); + }) .Case([&](omp::WsLoopOp) { return convertOmpWsLoop(*op, builder, moduleTranslation); }) Index: mlir/test/Dialect/OpenMP/ops.mlir =================================================================== --- mlir/test/Dialect/OpenMP/ops.mlir +++ mlir/test/Dialect/OpenMP/ops.mlir @@ -384,3 +384,43 @@ } return } + +func @omp_ordered(%vec0 : i64, %vec1 : i64, %vec2 : i64, %vec3 : i64) -> () { + // CHECK: omp.ordered + omp.ordered { + // CHECK: omp.terminator + omp.terminator + } + + // Only one DEPEND(SINK: vec) clause + // CHECK: omp.ordered depend_vec(%{{.*}} : i64) { + omp.ordered depend_vec(%vec0 : i64) { + // CHECK: omp.terminator + omp.terminator + // CHECK: } {depend, depend_type_val = "dependsink", num_loops_val = 1 : i64} + } {depend, depend_type_val = "dependsink", num_loops_val = 1 : i64} + + // CHECK: omp.ordered depend_vec(%{{.*}} : i64) { + omp.ordered depend_vec(%vec0 : i64) { + // CHECK: omp.terminator + omp.terminator + // CHECK: } {depend, depend_type_val = "dependsource", num_loops_val = 1 : i64} + } {depend, depend_type_val = "dependsource", num_loops_val = 1 : i64} + + // Multiple DEPEND(SINK: vec) clauses + // CHECK: omp.ordered depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) { + omp.ordered depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) { + // CHECK: omp.terminator + omp.terminator + // CHECK: } {depend, depend_type_val = "dependsink", num_loops_val = 2 : i64} + } {depend, depend_type_val = "dependsink", num_loops_val = 2 : i64} + + // CHECK: omp.ordered depend_vec(%{{.*}}, %{{.*}} : i64, i64) { + omp.ordered depend_vec(%vec0, %vec1 : i64, i64) { + // CHECK: omp.terminator + omp.terminator + // CHECK: } {depend, depend_type_val = "dependsource", num_loops_val = 2 : i64} + } {depend, depend_type_val = "dependsource", num_loops_val = 2 : i64} + + return +} Index: mlir/test/Target/LLVMIR/openmp-llvm.mlir =================================================================== --- mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -554,3 +554,67 @@ } llvm.return } + +llvm.func @omp_ordered(%arg0 : i64, %arg1 : i64, %arg2 : i64, %arg3 : i64) -> () { + // CHECK: [[ADDR9:%.*]] = alloca [2 x i64], align 8 + // CHECK: [[ADDR7:%.*]] = alloca [2 x i64], align 8 + // CHECK: [[ADDR5:%.*]] = alloca [2 x i64], align 8 + // CHECK: [[ADDR3:%.*]] = alloca [1 x i64], align 8 + // CHECK: [[ADDR:%.*]] = alloca [1 x i64], align 8 + + // CHECK: [[OMP_THREAD:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) + // CHECK-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]]) + omp.ordered { + omp.terminator + // CHECK: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[OMP_THREAD]]) + } + + // CHECK: [[TMP:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0 + // CHECK: store i64 [[ARG0:%.*]], i64* [[TMP]], align 4 + // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0 + // CHECK: [[OMP_THREAD2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) + // CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB3]], i32 [[OMP_THREAD2]], i64* [[TMP2]]) + omp.ordered depend_vec(%arg0 : i64) { + omp.terminator + } {depend, depend_type_val = "dependsink", num_loops_val = 1 : i64} + + // CHECK: [[TMP3:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR3]], i64 0, i64 0 + // CHECK: store i64 [[ARG0]], i64* [[TMP3]], align 4 + // CHECK: [[TMP4:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR3]], i64 0, i64 0 + // CHECK: [[OMP_THREAD4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]]) + // CHECK: call void @__kmpc_doacross_post(%struct.ident_t* @[[GLOB5]], i32 [[OMP_THREAD4]], i64* [[TMP4]]) + omp.ordered depend_vec(%arg0 : i64) { + omp.terminator + } {depend, depend_type_val = "dependsource", num_loops_val = 1 : i64} + + // CHECK: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 0 + // CHECK: store i64 [[ARG0]], i64* [[TMP5]], align 4 + // CHECK: [[TMP6:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 1 + // CHECK: store i64 [[ARG1:%.*]], i64* [[TMP6]], align 4 + // CHECK: [[TMP7:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR5]], i64 0, i64 0 + // CHECK: [[OMP_THREAD6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7:[0-9]+]]) + // CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB7]], i32 [[OMP_THREAD6]], i64* [[TMP7]]) + // CHECK: [[TMP8:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 0 + // CHECK: store i64 [[ARG2:%.*]], i64* [[TMP8]], align 4 + // CHECK: [[TMP9:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 1 + // CHECK: store i64 [[ARG3:%.*]], i64* [[TMP9]], align 4 + // CHECK: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 0 + // CHECK: [[OMP_THREAD8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]]) + // CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB7]], i32 [[OMP_THREAD8]], i64* [[TMP10]]) + omp.ordered depend_vec(%arg0, %arg1, %arg2, %arg3 : i64, i64, i64, i64) { + omp.terminator + } {depend, depend_type_val = "dependsink", num_loops_val = 2 : i64} + + // CHECK: [[TMP11:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 0 + // CHECK: store i64 [[ARG0]], i64* [[TMP11]], align 4 + // CHECK: [[TMP12:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 1 + // CHECK: store i64 [[ARG1]], i64* [[TMP12]], align 4 + // CHECK: [[TMP13:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 0 + // CHECK: [[OMP_THREAD10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9:[0-9]+]]) + // CHECK: call void @__kmpc_doacross_post(%struct.ident_t* @[[GLOB9]], i32 [[OMP_THREAD10]], i64* [[TMP13]]) + omp.ordered depend_vec(%arg0, %arg1 : i64, i64) { + omp.terminator + } {depend, depend_type_val = "dependsource", num_loops_val = 2 : i64} + + llvm.return +}