diff --git a/openmp/libomptarget/src/CMakeLists.txt b/openmp/libomptarget/src/CMakeLists.txt --- a/openmp/libomptarget/src/CMakeLists.txt +++ b/openmp/libomptarget/src/CMakeLists.txt @@ -21,8 +21,7 @@ ) if(LIBOMP_OMPT_SUPPORT_IN_LIBOMPTARGET) - list(APPEND LIBOMPTARGET_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/ompt-target.cpp) - list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${LIBOMP_INCLUDE_DIR}) + list(APPEND LIBOMPTARGET_LLVM_INCLUDE_DIRS ${LIBOMP_INCLUDE_DIR} ${LIBOMP_SRC_DIR}) endif() set(LIBOMPTARGET_SRC_FILES ${LIBOMPTARGET_SRC_FILES} PARENT_SCOPE) diff --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var --- a/openmp/runtime/src/include/omp-tools.h.var +++ b/openmp/runtime/src/include/omp-tools.h.var @@ -317,10 +317,12 @@ ompt_target_data_delete = 4, ompt_target_data_associate = 5, ompt_target_data_disassociate = 6, + ompt_target_data_transfer = 7, ompt_target_data_alloc_async = 17, ompt_target_data_transfer_to_device_async = 18, ompt_target_data_transfer_from_device_async = 19, - ompt_target_data_delete_async = 20 + ompt_target_data_delete_async = 20, + ompt_target_data_transfer_async = 23 } ompt_target_data_op_t; typedef enum ompt_work_t { @@ -403,7 +405,11 @@ ompt_target_map_flag_alloc = 0x04, ompt_target_map_flag_release = 0x08, ompt_target_map_flag_delete = 0x10, - ompt_target_map_flag_implicit = 0x20 + ompt_target_map_flag_implicit = 0x20, + ompt_target_map_flag_always = 0x40, + ompt_target_map_flag_present = 0x80, + ompt_target_map_flag_close = 0x100, + ompt_target_map_flag_shared = 0x200 } ompt_target_map_flag_t; typedef enum ompt_dependence_type_t { diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -569,6 +569,8 @@ ompt_frame_runtime | ompt_frame_framepointer; task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; + task->ompt_task_info.target_data.value = 0; + task->ompt_task_info.is_target_task = false; } // __ompt_task_start: @@ -1443,8 +1445,17 @@ input_flags.hidden_helper = TRUE; } - return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t, - sizeof_shareds, task_entry); + kmp_task_t *retval = __kmpc_omp_task_alloc( + loc_ref, gtid, flags, sizeof_kmp_task_t, sizeof_shareds, task_entry); + +#if OMPT_SUPPORT + if (UNLIKELY(ompt_enabled.enabled)) { + kmp_taskdata_t *task_data = KMP_TASK_TO_TASKDATA(retval); + task_data->ompt_task_info.is_target_task = true; + } +#endif + + return retval; } /*! @@ -1775,12 +1786,15 @@ parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } + int flag = ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata); + if (new_taskdata->ompt_task_info.is_target_task) { + flag |= ompt_task_target; + } if (ompt_enabled.ompt_callback_task_create) { ompt_callbacks.ompt_callback(ompt_callback_task_create)( &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame), - &(new_taskdata->ompt_task_info.task_data), - ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, + &(new_taskdata->ompt_task_info.task_data), flag, 0, OMPT_LOAD_RETURN_ADDRESS(gtid)); } } else { diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -23,6 +23,7 @@ #if KMP_OS_UNIX #include #endif +#include "kmp.h" /***************************************************************************** * ompt include files @@ -154,26 +155,49 @@ ompt_data_t *target_data, ompt_id_t *host_op_id, ompt_target_data_op_t optype, void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, size_t bytes, - const void *codeptr_ra) {} + const void *codeptr_ra) { + if (endpoint == ompt_scope_begin || endpoint == ompt_scope_beginend) { + *host_op_id = __ompt_get_unique_id_internal(); + } + if (endpoint == ompt_scope_end || endpoint == ompt_scope_beginend) { + ompt_callbacks_noemi.ompt_callback(ompt_callback_target_data_op)( + target_data->value, *host_op_id, optype, src_addr, src_device_num, + dest_addr, dest_device_num, bytes, codeptr_ra); + } +} void ompt_callback_target_emi_wrapper(ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num, ompt_data_t *task_data, ompt_data_t *target_task_data, ompt_data_t *target_data, - const void *codeptr_ra) {} + const void *codeptr_ra) { + if (endpoint == ompt_scope_begin || endpoint == ompt_scope_beginend) { + target_data->value = __ompt_get_unique_id_internal(); + } + ompt_callbacks_noemi.ompt_callback(ompt_callback_target)( + kind, endpoint, device_num, task_data, target_data->value, codeptr_ra); +} void ompt_callback_target_map_emi_wrapper(ompt_data_t *target_data, unsigned int nitems, void **host_addr, void **device_addr, size_t *bytes, unsigned int *mapping_flags, - const void *codeptr_ra) {} + const void *codeptr_ra) { + ompt_callbacks_noemi.ompt_callback(ompt_callback_target_map)( + target_data->value, nitems, host_addr, device_addr, bytes, mapping_flags, + codeptr_ra); +} void ompt_callback_target_submit_emi_wrapper(ompt_scope_endpoint_t endpoint, ompt_data_t *target_data, ompt_id_t *host_op_id, unsigned int requested_num_teams) { - + if (endpoint == ompt_scope_begin || endpoint == ompt_scope_beginend) { + ompt_id_t op_id = __ompt_get_unique_id_internal(); + ompt_callbacks_noemi.ompt_callback(ompt_callback_target_submit)( + target_data->value, op_id, requested_num_teams); + } } #if KMP_OS_DARWIN @@ -993,3 +1017,115 @@ return NULL; } + +void __ompt_get_target_data_info(ompt_data_t **task_data, + ompt_data_t **target_task_data, + ompt_data_t **target_data, bool *is_nowait) { + ompt_data_t *task_data_val, *target_task_data_val, *target_data_val; + kmp_info_t *thr = ompt_get_thread(); + kmp_taskdata *current_task = thr->th.th_current_task; + ompt_task_info_t *current_task_info = OMPT_CUR_TASK_INFO(thr); + if (current_task_info->is_target_task) { + target_task_data_val = ¤t_task_info->task_data; + task_data_val = ¤t_task->td_parent->ompt_task_info.task_data; + target_data_val = ¤t_task_info->target_data; + } else { + target_task_data_val = NULL; + task_data_val = ¤t_task_info->task_data; + target_data_val = ¤t_task_info->target_data; + } + if (task_data) { + *task_data = task_data_val; + } + if (target_task_data) { + *target_task_data = target_task_data_val; + } + if (target_data) { + *target_data = target_data_val; + } + if (is_nowait) { + *is_nowait = current_task_info->is_target_task; + } +} +/***************************************************************************** + * Delegation of target-related OMPT callbacks + ****************************************************************************/ +_OMP_EXTERN void libomp_ompt_callback_target_emi(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, + void *codeptr) { + ompt_data_t *task_data, *target_task_data, *target_data; + bool is_nowait; + __ompt_get_target_data_info(&task_data, &target_task_data, &target_data, + &is_nowait); + if (is_nowait) { + codeptr = nullptr; + } + switch (endpoint) { + case ompt_scope_begin: + *target_data = ompt_data_none; + ompt_target_callbacks.ompt_callback(ompt_callback_target_emi)( + kind, endpoint, device_num, task_data, target_task_data, target_data, + codeptr); + break; + case ompt_scope_end: + ompt_target_callbacks.ompt_callback(ompt_callback_target_emi)( + kind, endpoint, device_num, task_data, target_task_data, target_data, + codeptr); + *target_data = ompt_data_none; + break; + } +} + +_OMP_EXTERN void libomp_ompt_callback_target_data_op_emi( + ompt_scope_endpoint_t endpoint, ompt_target_data_op_t optype, + void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, + size_t bytes, bool ompRoutine, void *codeptr) { + ompt_data_t *target_task_data, *target_data; + bool is_nowait; + __ompt_get_target_data_info(nullptr, &target_task_data, &target_data, + &is_nowait); + if (is_nowait) { + codeptr = nullptr; + } + ompt_id_t *host_op_id; + kmp_info_t *thr = ompt_get_thread(); + host_op_id = &thr->th.ompt_thread_info.host_op_id; + if (ompRoutine && + (endpoint == ompt_scope_begin || endpoint == ompt_scope_beginend)) { + *target_data = ompt_data_none; + } + ompt_target_callbacks.ompt_callback(ompt_callback_target_data_op_emi)( + endpoint, target_task_data, target_data, host_op_id, optype, src_addr, + src_device_num, dest_addr, dest_device_num, bytes, codeptr); + if (ompRoutine && + (endpoint == ompt_scope_end || endpoint == ompt_scope_beginend)) { + *target_data = ompt_data_none; + } +} + +_OMP_EXTERN void libomp_ompt_callback_target_map_emi( + unsigned int nitems, void **host_addr, void **device_addr, size_t *bytes, + unsigned int *mapping_flags, void *codeptr) { + ompt_data_t *target_data; + bool is_nowait; + __ompt_get_target_data_info(nullptr, nullptr, &target_data, &is_nowait); + if (is_nowait) { + codeptr = nullptr; + } + ompt_target_callbacks.ompt_callback(ompt_callback_target_map_emi)( + target_data, nitems, host_addr, device_addr, bytes, mapping_flags, + codeptr); +} + +_OMP_EXTERN void +libomp_ompt_callback_target_submit_emi(ompt_scope_endpoint_t endpoint, + unsigned int requested_num_teams) { + ompt_data_t *target_data; + __ompt_get_target_data_info(nullptr, nullptr, &target_data, nullptr); + ompt_id_t *host_op_id; + kmp_info_t *thr = ompt_get_thread(); + host_op_id = &thr->th.ompt_thread_info.host_op_id; + ompt_target_callbacks.ompt_callback(ompt_callback_target_submit_emi)( + endpoint, target_data, host_op_id, requested_num_teams); +} diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -14,6 +14,7 @@ #define __OMPT_INTERNAL_H__ #include "ompt-event-specific.h" +#include "ompt-target-api.h" #include "omp-tools.h" #define OMPT_VERSION 1 @@ -74,16 +75,6 @@ #undef ompt_event_macro } ompt_callbacks_active_t; -/* Bitmap to mark OpenMP 5.1 target events as registered*/ -typedef struct ompt_target_callbacks_active_s { - unsigned int enabled : 1; -#define ompt_event_macro(event, callback, eventid) unsigned int event : 1; - - FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) - -#undef ompt_event_macro -} ompt_target_callbacks_active_t; - #define TASK_TYPE_DETAILS_FORMAT(info) \ ((info->td_flags.task_serial || info->td_flags.tasking_ser) \ ? ompt_task_undeferred \ @@ -96,7 +87,9 @@ ompt_frame_t frame; ompt_data_t task_data; struct kmp_taskdata *scheduling_parent; + ompt_data_t target_data; // ompt_data for the enclosed target region int thread_num; + bool is_target_task; } ompt_task_info_t; typedef struct { @@ -121,6 +114,7 @@ int ompt_task_yielded; int parallel_flags; // information for the last parallel region invoked void *idle_frame; + ompt_id_t host_op_id; } ompt_thread_info_t; extern ompt_callbacks_internal_t ompt_callbacks; diff --git a/openmp/runtime/src/ompt-target-api.h b/openmp/runtime/src/ompt-target-api.h new file mode 100644 --- /dev/null +++ b/openmp/runtime/src/ompt-target-api.h @@ -0,0 +1,52 @@ + + +#ifndef OMPT_TARGET_API_H +#define OMPT_TARGET_API_H + +#include "kmp_os.h" +#include "omp-tools.h" + +#define _OMP_EXTERN extern "C" + +#ifdef FROM_LIBOMPTARGET +#define OMPT_INTERFACE_ATTRIBUTE KMP_WEAK_ATTRIBUTE_INTERNAL +#else +#define OMPT_INTERFACE_ATTRIBUTE +#endif + +/* Bitmap to mark OpenMP 5.1 target events as registered*/ +typedef struct ompt_target_callbacks_active_s { + unsigned int enabled : 1; +#define ompt_event_macro(event, callback, eventid) unsigned int event : 1; + + FOREACH_OMPT_51_TARGET_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_target_callbacks_active_t; + +_OMP_EXTERN OMPT_INTERFACE_ATTRIBUTE bool +libomp_start_tool(ompt_target_callbacks_active_t *libomptarget_ompt_enabled); + +_OMP_EXTERN OMPT_INTERFACE_ATTRIBUTE void +libomp_ompt_callback_target_emi(ompt_target_t kind, + ompt_scope_endpoint_t endpoint, int device_num, + void *codeptr); + +_OMP_EXTERN OMPT_INTERFACE_ATTRIBUTE void +libomp_ompt_callback_target_data_op_emi(ompt_scope_endpoint_t endpoint, + ompt_target_data_op_t optype, + void *src_addr, int src_device_num, + void *dest_addr, int dest_device_num, + size_t bytes, bool ompRoutine, + void *codeptr); + +_OMP_EXTERN OMPT_INTERFACE_ATTRIBUTE void +libomp_ompt_callback_target_map_emi(unsigned int nitems, void **host_addr, + void **device_addr, size_t *bytes, + unsigned int *mapping_flags, void *codeptr); + +_OMP_EXTERN OMPT_INTERFACE_ATTRIBUTE void +libomp_ompt_callback_target_submit_emi(ompt_scope_endpoint_t endpoint, + unsigned int requested_num_teams); + +#endif // OMPT_TARGET_API_H