diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -248,6 +248,9 @@ not satisfied in the event of an instantiation failures in a requires expression's parameter list. We previously handled this correctly in a constraint evaluation context, but not in a requires clause evaluated as a boolean. +- Address the thread identification problems in coroutines. + `Issue 47177 `_ + `Issue 47179 `_ Improvements to Clang's diagnostics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -200,6 +200,65 @@ return Rc; } +// The helper function that calls omp_target_memcpy +int __kmpc_target_memcpy_async_helper(kmp_int32 gtid, kmp_task_t *task) { + if (task == 0) + return -1; + + TargetMemcpyArgsTy* args = (TargetMemcpyArgsTy *)task->shareds; + + if (args == 0) + return -1; + + // Call blocked version + omp_target_memcpy(args->Dst, args->Src, args->Length, args->DstOffset, args->SrcOffset, + args->DstDevice, args->SrcDevice); + + return 0; +} + +EXTERN int +omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, + size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice, + int Depobj_count, omp_depend_t *Depobj_list) { + TIMESCOPE(); + DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " + "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " + "src offset %zu, length %zu\n", + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, + Length); + + // Check the source and dest address + if (Dst == 0 || Src == 0) + return 5; + + // Create task + int (* fn)(kmp_int32, kmp_task_t*) = &__kmpc_target_memcpy_async_helper; + int errsz = sizeof(kmp_task_t); + int errhr = 0; + int gtid = __kmpc_global_thread_num(NULL); + + // Setup the hidden helper flags; + kmp_int32 flags = 0; + kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; + input_flags->hidden_helper = 1; + + // Alloc helper task + kmp_task_t *ptr = __kmpc_omp_target_task_alloc(NULL, gtid, flags, errsz, errhr, fn, -1); + + // Create task object + TargetMemcpyArgsTy* args_ = new TargetMemcpyArgsTy(Dst, Src, Length, DstOffset, SrcOffset, + DstDevice, SrcDevice, Depobj_count, Depobj_list); + ptr->shareds = args_; + + int Rc = OFFLOAD_SUCCESS; + // omp_target_memcpy(Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice); + __kmpc_omp_task_with_deps(NULL, gtid, ptr, Depobj_count, args_->Depobjs, 0, NULL); + + DP("omp_target_memcpy_async returns %d\n", Rc); + return Rc; +} + EXTERN int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, int NumDims, const size_t *Volume, @@ -260,6 +319,73 @@ return Rc; } +// The helper function that calls omp_target_memcpy_rect +int __kmpc_target_memcpy_rect_async_helper(kmp_int32 gtid, kmp_task_t *task) { + if (task == 0) + return -1; + + TargetMemcpyRectArgsTy* args = (TargetMemcpyRectArgsTy *)task->shareds; + + if (args == 0) + return -1; + + // Call blocked version + omp_target_memcpy_rect(args->Dst, args->Src, args->ElementSize, args->NumDims, args->Volume, + args->DstOffsets, args->SrcOffsets, args->DstDimensions, args->SrcDimensions, + args->DstDevice, args->SrcDevice); + + return 0; +} + +EXTERN int +omp_target_memcpy_rect_async(void *Dst, const void *Src, size_t ElementSize, + int NumDims, const size_t *Volume, + const size_t *DstOffsets, const size_t *SrcOffsets, + const size_t *DstDimensions, const size_t *SrcDimensions, + int DstDevice, int SrcDevice, + int Depobj_count, omp_depend_t *Depobj_list) { + TIMESCOPE(); + DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " + "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " + "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " + "volume " DPxMOD ", element size %zu, num_dims %d\n", + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), + DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), + DPxPTR(Volume), ElementSize, NumDims); + + // Check the source and dest address + if (Dst == 0 || Src == 0) + return 5; + + // Create task + int (* fn)(kmp_int32, kmp_task_t*) = &__kmpc_target_memcpy_rect_async_helper; + int errsz = sizeof(kmp_task_t); + int errhr = 0; + int gtid = __kmpc_global_thread_num(NULL); + + // Setup the hidden helper flags + kmp_int32 flags = 0; + kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; + input_flags->hidden_helper = 1; + + // Alloc helper task + kmp_task_t *ptr = __kmpc_omp_target_task_alloc(NULL, gtid, flags, errsz, errhr, fn, -1); + + // Create task object + TargetMemcpyRectArgsTy* args_ = new TargetMemcpyRectArgsTy(Dst, Src, ElementSize, NumDims, Volume, + DstOffsets, SrcOffsets, + DstDimensions, SrcDimensions, + DstDevice, SrcDevice, + Depobj_count, Depobj_list); + ptr->shareds = args_; + + int Rc = OFFLOAD_SUCCESS; + __kmpc_omp_task_with_deps(NULL, gtid, ptr, Depobj_count, args_->Depobjs, 0, NULL); + + DP("omp_target_memcpy_rect_async returns %d\n", Rc); + return Rc; +} + EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, size_t Size, size_t DeviceOffset, int DeviceNum) { diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -38,6 +38,8 @@ omp_target_is_present; omp_target_memcpy; omp_target_memcpy_rect; + omp_target_memcpy_async; + omp_target_memcpy_rect_async; omp_target_associate_ptr; omp_target_disassociate_ptr; llvm_omp_target_alloc_host; diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -98,7 +98,47 @@ * We maintain the same data structure for compatibility. */ typedef int kmp_int32; +typedef int64_t kmp_int64; typedef intptr_t kmp_intptr_t; + +typedef void * omp_depend_t; +struct kmp_task; +typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, struct kmp_task * ); +typedef struct kmp_task { + void * shareds; + kmp_routine_entry_t routine; + kmp_int32 part_id; +} kmp_task_t; + +typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ + /* Compiler flags */ /* Total compiler flags must be 16 bits */ + unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ + unsigned final : 1; /* task is final(1) so execute immediately */ + unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */ + unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */ + unsigned proxy : 1; /* task is a proxy task (it will be executed outside the context of the RTL) */ + unsigned priority_specified : 1; /* set if the compiler provides priority setting for the task */ + unsigned detachable : 1; /* 1 == can detach */ + unsigned hidden_helper : 1; /* 1 == hidden helper task */ + unsigned reserved : 8; /* reserved for compiler use */ + + /* Library flags */ /* Total library flags must be 16 bits */ + unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ + unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) + unsigned tasking_ser : 1; // all tasks in team are either executed immediately + // (1) or may be deferred (0) + unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel + // (0) [>= 2 threads] + /* If either team_serial or tasking_ser is set, task team may be NULL */ + /* Task State Flags: */ + unsigned started : 1; /* 1==started, 0==not started */ + unsigned executing : 1; /* 1==executing, 0==not executing */ + unsigned complete : 1; /* 1==complete, 0==not complete */ + unsigned freed : 1; /* 1==freed, 0==allocated */ + unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ + unsigned reserved31 : 7; /* reserved for library use */ +} kmp_tasking_flags_t; + // Compiler sends us this info: typedef struct kmp_depend_info { kmp_intptr_t base_addr; @@ -117,6 +157,96 @@ kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) __attribute__((weak)); + +kmp_task_t* __kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry) + __attribute__((weak)); + +kmp_task_t* __kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry, kmp_int64 device_id) + __attribute__((weak)); + +void __kmpc_proxy_task_completed_ooo (kmp_task_t *ptask) __attribute__((weak)); +kmp_int32 __kmpc_omp_task_with_deps (ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, + kmp_int32 ndeps, kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) + __attribute__((weak)); + +class TargetMemcpyArgsTy { +public: + TargetMemcpyArgsTy(void *Dst_, const void *Src_, size_t Length_, + size_t DstOffset_, size_t SrcOffset_, int DstDevice_, int SrcDevice_, + int Depobj_count, omp_depend_t* Depobj_list) : + Dst(Dst_), Src(Src_), Length(Length_), DstOffset(DstOffset_), SrcOffset(SrcOffset_), + DstDevice(DstDevice_), SrcDevice(SrcDevice_), Depobjs(0) { + if (Depobj_count > 0) { + Depobjs = new kmp_depend_info_t[Depobj_count]; + for (int i = 0; i < Depobj_count; i ++) { + omp_depend_t depobj = Depobj_list[i]; + Depobjs[i] = * ((kmp_depend_info_t* )depobj); + } + } + }; + + ~TargetMemcpyArgsTy() { + if (Depobjs != 0) + delete Depobjs; + } + + void *Dst; + const void *Src; + size_t Length; + size_t DstOffset; + size_t SrcOffset; + int DstDevice; + int SrcDevice; + + // The buffer for depend objects + kmp_depend_info_t* Depobjs; +}; + +class TargetMemcpyRectArgsTy { +public: + TargetMemcpyRectArgsTy(void *Dst_, const void *Src_, size_t ElementSize_, int NumDims_, + const size_t* Volume_, const size_t* DstOffsets_, const size_t* SrcOffsets_, + const size_t* DstDimensions_, const size_t* SrcDimensions_, + int DstDevice_, int SrcDevice_, + int Depobj_count, omp_depend_t* Depobj_list) : + Dst(Dst_), Src(Src_), ElementSize(ElementSize_), NumDims(NumDims_), Volume(Volume_), + DstOffsets(DstOffsets_), SrcOffsets(SrcOffsets_), DstDimensions(DstDimensions_), + SrcDimensions(SrcDimensions_), DstDevice(DstDevice_), SrcDevice(SrcDevice_), Depobjs(0) { + if (Depobj_count > 0) { + Depobjs = new kmp_depend_info_t[Depobj_count]; + for (int i = 0; i < Depobj_count; i ++) { + omp_depend_t depobj = Depobj_list[i]; + Depobjs[i] = * ((kmp_depend_info_t* )depobj); + } + } + }; + + ~TargetMemcpyRectArgsTy() { + if (Depobjs != 0) + delete Depobjs; + } + + void *Dst; + const void *Src; + size_t ElementSize; + int NumDims; + const size_t *Volume; + const size_t *DstOffsets; + const size_t *SrcOffsets; + const size_t *DstDimensions; + const size_t *SrcDimensions; + int DstDevice; + int SrcDevice; + + // The buffer for depend objects + kmp_depend_info_t* Depobjs; +}; + #ifdef __cplusplus } #endif