diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -667,6 +667,30 @@ return bodyGenStatus; } +/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. +static LogicalResult +convertOmpTaskOp(omp::TaskOp &taskOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + LogicalResult bodyGenStatus = success(); + if (taskOp.if_expr() || taskOp.final_expr() || taskOp.untiedAttr() || + taskOp.mergeableAttr() || taskOp.in_reductions() || taskOp.priority() || + !taskOp.allocate_vars().empty()) { + return taskOp.emitError("unhandled clauses for translation to LLVM IR"); + } + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP, + llvm::BasicBlock &continuationBB) { + convertOmpOpRegions(taskOp.region(), "omp.task.region", + *codegenIP.getBlock(), continuationBB, builder, + moduleTranslation, bodyGenStatus); + }; + auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask( + ompLoc, allocaIP, bodyCB)); + return bodyGenStatus; +} + /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, @@ -1446,6 +1470,9 @@ .Case([&](omp::SingleOp op) { return convertOmpSingle(op, builder, moduleTranslation); }) + .Case([&](omp::TaskOp op) { + return convertOmpTaskOp(op, builder, moduleTranslation); + }) .Case([](auto op) { // `yield` and `terminator` can be just omitted. The block structure diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2090,3 +2090,90 @@ } llvm.mlir.global internal @_QFsubEx() : i32 + +// ----- + +// CHECK-LABEL: define void @omp_task +// CHECK-SAME: (i32 %[[x:.+]], i32 %[[y:.+]], i32* %[[zaddr:.+]]) +llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { + // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) + // CHECK: %[[task_data:.+]] = call i8* @__kmpc_omp_task_alloc + // CHECK-SAME: (%{{.+}}* @1, i32 %[[omp_global_thread_num]], i32 1, i64 0, + // CHECK-SAME: i64 0, i32 (i32, i8*)* bitcast (i32 (i32)* @[[wrapper_fn:.+]] to + // CHECK-SAME: i32 (i32, i8*)*)) + // CHECK: call i32 @__kmpc_omp_task(%{{.*}}, i32 %[[omp_global_thread_num]], i8* %[[task_data]]) + omp.task { + %n = llvm.mlir.constant(1 : i64) : i64 + %valaddr = llvm.alloca %n x i32 : (i64) -> !llvm.ptr + %val = llvm.load %valaddr : !llvm.ptr + %double = llvm.add %val, %val : i32 + llvm.store %double, %valaddr : !llvm.ptr + omp.terminator + } + llvm.return +} + +// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: task.alloca{{.*}}: +// CHECK: br label %[[task_body:[^, ]+]] +// CHECK: [[task_body]]: +// CHECK: br label %[[task_region:[^, ]+]] +// CHECK: [[task_region]]: +// CHECK: %[[val:.+]] = load i32, i32* %{{[^, ]+}} +// CHECK: %[[newval:.+]] = add i32 %[[val]], %[[val]] +// CHECK: store i32 %[[newval]], i32* %{{[^, ]+}} +// CHECK: br label %[[exit_stub:[^, ]+]] +// CHECK: [[exit_stub]]: +// CHECK: ret void + + +// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { +// CHECK: call void @[[outlined_fn]]() +// CHECK: ret i32 0 +// CHECK: } + +// ----- + +// CHECK-LABEL: define void @omp_task +// CHECK-SAME: (i32 %[[x:.+]], i32 %[[y:.+]], i32* %[[zaddr:.+]]) +llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { + // CHECK: %[[diff:.+]] = sub i32 %[[x]], %[[y]], + %diff = llvm.sub %x, %y : i32 + // CHECK: store i32 %[[diff]], i32* %2 + llvm.store %diff, %zaddr : !llvm.ptr + // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) + // CHECK: %[[task_data:.+]] = call i8* @__kmpc_omp_task_alloc + // CHECK-SAME: (%{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 16, i64 0, + // CHECK-SAME: i32 (i32, i8*)* bitcast (i32 (i32, { i32, i32, i32* }*)* + // CHECK-SAME: @[[wrapper_fn:.+]] to i32 (i32, i8*)*)) + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.+}} %[[task_data]], i8* {{.+}}, i64 16, i1 false) + // CHECK: call i32 @__kmpc_omp_task(%{{.*}}, i32 %[[omp_global_thread_num]], i8* %[[task_data]]) + omp.task { + %z = llvm.add %x, %y : i32 + llvm.store %z, %zaddr : !llvm.ptr + omp.terminator + } + // CHECK: %[[prod:.+]] = mul i32 %[[x]], %[[y]] + %b = llvm.mul %x, %y : i32 + // CHECK: store i32 %[[prod]], i32* %[[zaddr]] + llvm.store %b, %zaddr : !llvm.ptr + llvm.return +} + +// CHECK: define internal void @[[outlined_fn:.+]]({ i32, i32, i32* }* %[[task_data:.+]]) +// CHECK: task.alloca{{.*}}: +// CHECK: br label %[[task_body:[^, ]+]] +// CHECK: [[task_body]]: +// CHECK: br label %[[task_region:[^, ]+]] +// CHECK: [[task_region]]: +// CHECK: %[[sum:.+]] = add i32 %{{.+}}, %{{.+}} +// CHECK: store i32 %[[sum]], i32* %{{.+}} +// CHECK: br label %[[exit_stub:[^, ]+]] +// CHECK: [[exit_stub]]: +// CHECK: ret void + + +// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}, { i32, i32, i32* }* %[[task_data:.+]]) { +// CHECK: call void @[[outlined_fn]]({ i32, i32, i32* }* %[[task_data]]) +// CHECK: ret i32 0 +// CHECK: }