diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1260,6 +1260,9 @@ if (!updateToLocation(Loc)) return InsertPointTy(); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); // The current basic block is split into four basic blocks. After outlining, // they will be mapped as follows: // ``` @@ -1285,7 +1288,7 @@ OI.EntryBB = TaskAllocaBB; OI.OuterAllocaBB = AllocaIP.getBlock(); OI.ExitBB = TaskExitBB; - OI.PostOutlineCB = [this, &Loc, Tied, Final](Function &OutlinedFn) { + OI.PostOutlineCB = [this, Ident, Tied, Final](Function &OutlinedFn) { // The input IR here looks like the following- // ``` // func @current_fn() { @@ -1324,9 +1327,6 @@ // Arguments - `loc_ref` (Ident) and `gtid` (ThreadID) // call. - uint32_t SrcLocStrSize; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); - Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadID = getOrCreateThreadID(Ident); // Argument - `flags` diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -677,6 +677,30 @@ return bodyGenStatus; } +/// Converts an OpenMP task construct into LLVM IR using OpenMPIRBuilder. +static LogicalResult +convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + LogicalResult bodyGenStatus = success(); + if (taskOp.if_expr() || taskOp.final_expr() || taskOp.untiedAttr() || + taskOp.mergeableAttr() || taskOp.in_reductions() || taskOp.priority() || + !taskOp.allocate_vars().empty()) { + return taskOp.emitError("unhandled clauses for translation to LLVM IR"); + } + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + builder.restoreIP(codegenIP); + convertOmpOpRegions(taskOp.region(), "omp.task.region", builder, + moduleTranslation, bodyGenStatus); + }; + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = + findAllocaInsertPoint(builder, moduleTranslation); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask( + ompLoc, allocaIP, bodyCB, !taskOp.untied())); + return bodyGenStatus; +} + /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, @@ -1367,6 +1391,9 @@ .Case([&](omp::SingleOp op) { return convertOmpSingle(op, builder, moduleTranslation); }) + .Case([&](omp::TaskOp op) { + return convertOmpTaskOp(op, builder, moduleTranslation); + }) .Case([](auto op) { // `yield` and `terminator` can be just omitted. The block structure diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2170,3 +2170,91 @@ } llvm.mlir.global internal @_QFsubEx() : i32 + +// ----- + +// CHECK-LABEL: define void @omp_task +// CHECK-SAME: (i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]]) +llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { + // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) + // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc + // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 0, + // CHECK-SAME: i64 0, ptr @[[wrapper_fn:.+]]) + // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) + omp.task { + %n = llvm.mlir.constant(1 : i64) : i64 + %valaddr = llvm.alloca %n x i32 : (i64) -> !llvm.ptr + %val = llvm.load %valaddr : !llvm.ptr + %double = llvm.add %val, %val : i32 + llvm.store %double, %valaddr : !llvm.ptr + omp.terminator + } + llvm.return +} + +// CHECK: define internal void @[[outlined_fn:.+]]() +// CHECK: task.alloca{{.*}}: +// CHECK: br label %[[task_body:[^, ]+]] +// CHECK: [[task_body]]: +// CHECK: br label %[[task_region:[^, ]+]] +// CHECK: [[task_region]]: +// CHECK: %[[alloca:.+]] = alloca i32, i64 1 +// CHECK: %[[val:.+]] = load i32, ptr %[[alloca]] +// CHECK: %[[newval:.+]] = add i32 %[[val]], %[[val]] +// CHECK: store i32 %[[newval]], ptr %{{[^, ]+}} +// CHECK: br label %[[exit_stub:[^, ]+]] +// CHECK: [[exit_stub]]: +// CHECK: ret void + + +// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}) { +// CHECK: call void @[[outlined_fn]]() +// CHECK: ret i32 0 +// CHECK: } + +// ----- + +// CHECK-LABEL: define void @omp_task +// CHECK-SAME: (i32 %[[x:.+]], i32 %[[y:.+]], ptr %[[zaddr:.+]]) +module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu"} { + llvm.func @omp_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { + // CHECK: %[[diff:.+]] = sub i32 %[[x]], %[[y]], + %diff = llvm.sub %x, %y : i32 + // CHECK: store i32 %[[diff]], ptr %2 + llvm.store %diff, %zaddr : !llvm.ptr + // CHECK: %[[omp_global_thread_num:.+]] = call i32 @__kmpc_global_thread_num({{.+}}) + // CHECK: %[[task_data:.+]] = call ptr @__kmpc_omp_task_alloc + // CHECK-SAME: (ptr @{{.+}}, i32 %[[omp_global_thread_num]], i32 1, i64 16, i64 0, + // CHECK-SAME: ptr @[[wrapper_fn:.+]]) + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.+}} %[[task_data]], ptr {{.+}}, i64 16, i1 false) + // CHECK: call i32 @__kmpc_omp_task(ptr @{{.+}}, i32 %[[omp_global_thread_num]], ptr %[[task_data]]) + omp.task { + %z = llvm.add %x, %y : i32 + llvm.store %z, %zaddr : !llvm.ptr + omp.terminator + } + // CHECK: %[[prod:.+]] = mul i32 %[[x]], %[[y]] + %b = llvm.mul %x, %y : i32 + // CHECK: store i32 %[[prod]], ptr %[[zaddr]] + llvm.store %b, %zaddr : !llvm.ptr + llvm.return + } +} + +// CHECK: define internal void @[[outlined_fn:.+]](ptr %[[task_data:.+]]) +// CHECK: task.alloca{{.*}}: +// CHECK: br label %[[task_body:[^, ]+]] +// CHECK: [[task_body]]: +// CHECK: br label %[[task_region:[^, ]+]] +// CHECK: [[task_region]]: +// CHECK: %[[sum:.+]] = add i32 %{{.+}}, %{{.+}} +// CHECK: store i32 %[[sum]], ptr %{{.+}} +// CHECK: br label %[[exit_stub:[^, ]+]] +// CHECK: [[exit_stub]]: +// CHECK: ret void + + +// CHECK: define i32 @[[wrapper_fn]](i32 %{{.+}}, ptr %[[task_data:.+]]) { +// CHECK: call void @[[outlined_fn]](ptr %[[task_data]]) +// CHECK: ret i32 0 +// CHECK: }