diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h --- a/openmp/runtime/test/ompt/callback.h +++ b/openmp/runtime/test/ompt/callback.h @@ -1,7 +1,9 @@ #ifndef _BSD_SOURCE #define _BSD_SOURCE #endif +#ifndef _DEFAULT_SOURCE #define _DEFAULT_SOURCE +#endif #include #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS diff --git a/openmp/tools/multiplex/CMakeLists.txt b/openmp/tools/multiplex/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/CMakeLists.txt @@ -0,0 +1,12 @@ +project(OMPT-Multiplex) + +if(LIBOMP_OMPT_SUPPORT) + include_directories(${LIBOMP_INCLUDE_DIR}) + + add_library(ompt-multiplex INTERFACE) + target_include_directories(ompt-multiplex INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}) + + install(FILES ompt-multiplex.h DESTINATION include) + + add_subdirectory(tests) +endif() diff --git a/openmp/tools/multiplex/README.md b/openmp/tools/multiplex/README.md new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/README.md @@ -0,0 +1,60 @@ +# OMPT-Multiplexing +The OMPT-Multiplexing header file allows a tool to load a second tool to +overcome the restriction of the OpenMP to only load one tool at a time. +The header file can also be used to load more than two tools using a cascade +of tools that include the header file. OMPT-Multiplexing takes care of the +multiplexing of OMPT callbacks, data pointers and runtime entry functions. + +Examples can be found under ./tests + +## Prerequisits +- LLVM/OpenMP runtime with OMPT (https://github.com/OpenMPToolsInterface/LLVM-openmp) +- LLVM-lit + +### Getting LLVM-lit +Either build llvm and find lit+FileCheck in build directory of llvm or install using `pip`: +``` + $ pip install --upgrade --user pip + $ export PATH=$HOME/.local/bin:$PATH + $ export PYTHONPATH=$HOME/.local/lib/python3.*/site-packages/ + $ pip install --user lit +``` + +## How to test +``` + $ make check-ompt-multiplex +``` + +## How to compile and use your OpenMP tools +Code of first tool must include the following with the convention, that the environment variable containing the path to the client tool is the tool name with the suffix "_TOOL_LIBRARIES": +``` +#define CLIENT_TOOL_LIBRARIES_VAR "EXAMPLE_TOOL_LIBRARIES" +#include +``` +Note that functions and variables with prefix "ompt_multiplex" are reserved by the tool + + +To use both tools execute the following: +``` + $ clang -fopenmp -o program.exe + $ OMP_TOOL_LIBRARIES=/path/to/first/tool.so EXAMPLE_TOOL_LBRARIES=/path/to/second/tool.so ./program.exe +``` +Note that EXAMPLE_TOOL_LIBRARIES may also contain a list of paths to tools which will be tried to load in order (similar to lists in OMP_TOOL_LIBRARIES). + +## Advanced usage +To reduce the amount of memory allocations, the user can define macros before including the ompt-multiplex.h file, that specify custom data access handlers: + +``` +#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA get_client_thread_data +#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA get_client_parallel_data +#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA get_client_task_data +``` + +This will reverse the calling order of the current tool and its client. In order to avoid this, one can specify a custom delete handler as well: + +``` +#define OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA delete_thread_data +#define OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA delete_parallel_data +#define OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA delete_task_data +``` + diff --git a/openmp/tools/multiplex/ompt-multiplex.h b/openmp/tools/multiplex/ompt-multiplex.h new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/ompt-multiplex.h @@ -0,0 +1,1094 @@ +//===--- ompt-multiplex.h - header-only multiplexing of OMPT tools -- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file enables an OMPT tool to load another OMPT tool and +// automatically forwards OMPT event-callbacks to the nested tool. +// +// For details see openmp/tools/multiplex/README.md +// +//===----------------------------------------------------------------------===// + +#ifndef OMPT_MULTIPLEX_H +#define OMPT_MULTIPLEX_H + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include + +static ompt_set_callback_t ompt_multiplex_set_callback; +static ompt_get_task_info_t ompt_multiplex_get_task_info; +static ompt_get_thread_data_t ompt_multiplex_get_thread_data; +static ompt_get_parallel_info_t ompt_multiplex_get_parallel_info; + +// contains name of the environment var in which the tool path is specified +#ifndef CLIENT_TOOL_LIBRARIES_VAR +#error CLIENT_TOOL_LIBRARIES_VAR should be defined before including of ompt-multiplex.h +#endif + +#if defined(CUSTOM_DELETE_DATA) && !defined(CUSTOM_GET_CLIENT_DATA) +#error CUSTOM_GET_CLIENT_DATA must be set if CUSTOM_DELETE_DATA is set +#endif + +#define OMPT_API_ROUTINE static + +#define OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(macro) \ + macro(callback_thread_begin, ompt_callback_thread_begin_t, 1); \ + macro(callback_thread_end, ompt_callback_thread_end_t, 2); \ + macro(callback_parallel_begin, ompt_callback_parallel_begin_t, 3); \ + macro(callback_parallel_end, ompt_callback_parallel_end_t, 4); \ + macro(callback_task_create, ompt_callback_task_create_t, 5); \ + macro(callback_task_schedule, ompt_callback_task_schedule_t, 6); \ + macro(callback_implicit_task, ompt_callback_implicit_task_t, 7); \ + macro(callback_target, ompt_callback_target_t, 8); \ + macro(callback_target_data_op, ompt_callback_target_data_op_t, 9); \ + macro(callback_target_submit, ompt_callback_target_submit_t, 10); \ + macro(callback_control_tool, ompt_callback_control_tool_t, 11); \ + macro(callback_device_initialize, ompt_callback_device_initialize_t, 12); \ + macro(callback_device_finalize, ompt_callback_device_finalize_t, 13); \ + macro(callback_device_load, ompt_callback_device_load_t, 14); \ + macro(callback_device_unload, ompt_callback_device_unload_t, 15); \ + macro(callback_sync_region_wait, ompt_callback_sync_region_t, 16); \ + macro(callback_mutex_released, ompt_callback_mutex_t, 17); \ + macro(callback_dependences, ompt_callback_dependences_t, 18); \ + macro(callback_task_dependence, ompt_callback_task_dependence_t, 19); \ + macro(callback_work, ompt_callback_work_t, 20); \ + macro(callback_master, ompt_callback_master_t, 21); \ + macro(callback_target_map, ompt_callback_target_map_t, 22); \ + macro(callback_sync_region, ompt_callback_sync_region_t, 23); \ + macro(callback_lock_init, ompt_callback_mutex_acquire_t, 24); \ + macro(callback_lock_destroy, ompt_callback_mutex_t, 25); \ + macro(callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26); \ + macro(callback_mutex_acquired, ompt_callback_mutex_t, 27); \ + macro(callback_nest_lock, ompt_callback_nest_lock_t, 28); \ + macro(callback_flush, ompt_callback_flush_t, 29); \ + macro(callback_cancel, ompt_callback_cancel_t, 30); \ + macro(callback_reduction, ompt_callback_sync_region_t, 31); \ + macro(callback_dispatch, ompt_callback_dispatch_t, 32); + +typedef struct ompt_multiplex_callbacks_s { +#define ompt_event_macro(event, callback, eventid) callback ompt_##event + + OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_multiplex_callbacks_t; + +typedef struct ompt_multiplex_callback_implementation_status_s { +#define ompt_event_macro(event, callback, eventid) int ompt_##event + + OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro +} ompt_multiplex_callback_implementation_status_t; + +ompt_start_tool_result_t *ompt_multiplex_own_fns; +ompt_start_tool_result_t *ompt_multiplex_client_fns; +ompt_function_lookup_t ompt_multiplex_lookup_function; +ompt_multiplex_callbacks_t ompt_multiplex_own_callbacks, + ompt_multiplex_client_callbacks; +ompt_multiplex_callback_implementation_status_t + ompt_multiplex_implementation_status; + +typedef struct ompt_multiplex_data_pair_s { + ompt_data_t own_data; + ompt_data_t client_data; +} ompt_multiplex_data_pair_t; + +#if !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) || \ + !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) || \ + !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) +static ompt_multiplex_data_pair_t * +ompt_multiplex_allocate_data_pair(ompt_data_t *data_pointer) { + data_pointer->ptr = malloc(sizeof(ompt_multiplex_data_pair_t)); + if (!data_pointer->ptr) { + printf("Malloc ERROR\n"); + exit(-1); + } + ompt_multiplex_data_pair_t *data_pair = + (ompt_multiplex_data_pair_t *)data_pointer->ptr; + data_pair->own_data.ptr = NULL; + data_pair->client_data.ptr = NULL; + return data_pair; +} + +static void ompt_multiplex_free_data_pair(ompt_data_t *data_pointer) { + free((*data_pointer).ptr); +} + +static ompt_data_t *ompt_multiplex_get_own_ompt_data(ompt_data_t *data) { + if (!data) + return NULL; + ompt_multiplex_data_pair_t *data_pair = + (ompt_multiplex_data_pair_t *)data->ptr; + return &(data_pair->own_data); +} + +static ompt_data_t *ompt_multiplex_get_client_ompt_data(ompt_data_t *data) { + if (!data) + return NULL; + ompt_multiplex_data_pair_t *data_pair = + (ompt_multiplex_data_pair_t *)data->ptr; + return &(data_pair->client_data); +} +#endif //! defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) || + //! !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) || + //! !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) + +static ompt_data_t *ompt_multiplex_get_own_thread_data(ompt_data_t *data) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA + return ompt_multiplex_get_own_ompt_data(data); +#else + return data; +#endif +} + +static ompt_data_t *ompt_multiplex_get_own_parallel_data(ompt_data_t *data) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA + return ompt_multiplex_get_own_ompt_data(data); +#else + return data; +#endif +} + +static ompt_data_t *ompt_multiplex_get_own_task_data(ompt_data_t *data) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA + return ompt_multiplex_get_own_ompt_data(data); +#else + return data; +#endif +} + +static ompt_data_t *ompt_multiplex_get_client_thread_data(ompt_data_t *data) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA + return ompt_multiplex_get_client_ompt_data(data); +#else + return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA(data); +#endif +} + +static ompt_data_t *ompt_multiplex_get_client_parallel_data(ompt_data_t *data) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA + return ompt_multiplex_get_client_ompt_data(data); +#else + return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(data); +#endif +} + +static ompt_data_t *ompt_multiplex_get_client_task_data(ompt_data_t *data) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA + return ompt_multiplex_get_client_ompt_data(data); +#else + return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA(data); +#endif +} + +static void ompt_multiplex_callback_mutex_acquire(ompt_mutex_t kind, + unsigned int hint, + unsigned int impl, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_mutex_acquire) { + ompt_multiplex_own_callbacks.ompt_callback_mutex_acquire( + kind, hint, impl, wait_id, codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_mutex_acquire) { + ompt_multiplex_client_callbacks.ompt_callback_mutex_acquire( + kind, hint, impl, wait_id, codeptr_ra); + } +} + +static void ompt_multiplex_callback_mutex_acquired(ompt_mutex_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_mutex_acquired) { + ompt_multiplex_own_callbacks.ompt_callback_mutex_acquired(kind, wait_id, + codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_mutex_acquired) { + ompt_multiplex_client_callbacks.ompt_callback_mutex_acquired(kind, wait_id, + codeptr_ra); + } +} + +static void ompt_multiplex_callback_mutex_released(ompt_mutex_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_mutex_released) { + ompt_multiplex_own_callbacks.ompt_callback_mutex_released(kind, wait_id, + codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_mutex_released) { + ompt_multiplex_client_callbacks.ompt_callback_mutex_released(kind, wait_id, + codeptr_ra); + } +} + +static void ompt_multiplex_callback_nest_lock(ompt_scope_endpoint_t endpoint, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_nest_lock) { + ompt_multiplex_own_callbacks.ompt_callback_nest_lock(endpoint, wait_id, + codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_nest_lock) { + ompt_multiplex_client_callbacks.ompt_callback_nest_lock(endpoint, wait_id, + codeptr_ra); + } +} + +static void ompt_multiplex_callback_sync_region(ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_sync_region) { + ompt_multiplex_own_callbacks.ompt_callback_sync_region( + kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_sync_region) { + ompt_multiplex_client_callbacks.ompt_callback_sync_region( + kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data), + ompt_multiplex_get_client_task_data(task_data), codeptr_ra); + } +} + +static void ompt_multiplex_callback_sync_region_wait( + ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, ompt_data_t *task_data, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_sync_region_wait) { + ompt_multiplex_own_callbacks.ompt_callback_sync_region_wait( + kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_sync_region_wait) { + ompt_multiplex_client_callbacks.ompt_callback_sync_region_wait( + kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data), + ompt_multiplex_get_client_task_data(task_data), codeptr_ra); + } +} + +static void ompt_multiplex_callback_flush(ompt_data_t *thread_data, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_flush) { + ompt_multiplex_own_callbacks.ompt_callback_flush( + ompt_multiplex_get_own_thread_data(thread_data), codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_flush) { + ompt_multiplex_client_callbacks.ompt_callback_flush( + ompt_multiplex_get_client_thread_data(thread_data), codeptr_ra); + } +} + +static void ompt_multiplex_callback_cancel(ompt_data_t *task_data, int flags, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_cancel) { + ompt_multiplex_own_callbacks.ompt_callback_cancel( + ompt_multiplex_get_own_task_data(task_data), flags, codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_cancel) { + ompt_multiplex_client_callbacks.ompt_callback_cancel( + ompt_multiplex_get_client_task_data(task_data), flags, codeptr_ra); + } +} + +static void ompt_multiplex_callback_implicit_task( + ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, + ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num, + int flags) { + if (endpoint == ompt_scope_begin) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA + ompt_multiplex_allocate_data_pair(task_data); +#endif +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA + if (flags & ompt_task_initial) + ompt_multiplex_allocate_data_pair(parallel_data); +#endif + if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) { + ompt_multiplex_own_callbacks.ompt_callback_implicit_task( + endpoint, ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), team_size, thread_num, + flags); + } + if (ompt_multiplex_client_callbacks.ompt_callback_implicit_task) { + ompt_multiplex_client_callbacks.ompt_callback_implicit_task( + endpoint, ompt_multiplex_get_client_parallel_data(parallel_data), + ompt_multiplex_get_client_task_data(task_data), team_size, thread_num, + flags); + } + } else { +// defines to make sure, callbacks are called in correct order depending on +// defines set by the user +#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) || \ + !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) + if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) { + ompt_multiplex_own_callbacks.ompt_callback_implicit_task( + endpoint, ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), team_size, thread_num, + flags); + } +#endif + + if (ompt_multiplex_client_callbacks.ompt_callback_implicit_task) { + ompt_multiplex_client_callbacks.ompt_callback_implicit_task( + endpoint, ompt_multiplex_get_client_parallel_data(parallel_data), + ompt_multiplex_get_client_task_data(task_data), team_size, thread_num, + flags); + } + +#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) && \ + !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) + if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) { + ompt_multiplex_own_callbacks.ompt_callback_implicit_task( + endpoint, ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), team_size, thread_num, + flags); + } +#endif + +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA + ompt_multiplex_free_data_pair(task_data); +#endif + +#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA) + if (flags & ompt_task_initial) + OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA(parallel_data); +#endif +#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) + OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA(task_data); +#endif + } +} + +static void ompt_multiplex_callback_lock_init(ompt_mutex_t kind, + unsigned int hint, + unsigned int impl, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_lock_init) { + ompt_multiplex_own_callbacks.ompt_callback_lock_init(kind, hint, impl, + wait_id, codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_lock_init) { + ompt_multiplex_client_callbacks.ompt_callback_lock_init( + kind, hint, impl, wait_id, codeptr_ra); + } +} + +static void ompt_multiplex_callback_lock_destroy(ompt_mutex_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_lock_destroy) { + ompt_multiplex_own_callbacks.ompt_callback_lock_destroy(kind, wait_id, + codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_lock_destroy) { + ompt_multiplex_client_callbacks.ompt_callback_lock_destroy(kind, wait_id, + codeptr_ra); + } +} + +static void ompt_multiplex_callback_work(ompt_work_t wstype, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, uint64_t count, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_work) { + ompt_multiplex_own_callbacks.ompt_callback_work( + wstype, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), count, codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_work) { + ompt_multiplex_client_callbacks.ompt_callback_work( + wstype, endpoint, + ompt_multiplex_get_client_parallel_data(parallel_data), + ompt_multiplex_get_client_task_data(task_data), count, codeptr_ra); + } +} + +static void ompt_multiplex_callback_master(ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_master) { + ompt_multiplex_own_callbacks.ompt_callback_master( + endpoint, ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_master) { + ompt_multiplex_client_callbacks.ompt_callback_master( + endpoint, ompt_multiplex_get_client_parallel_data(parallel_data), + ompt_multiplex_get_client_task_data(task_data), codeptr_ra); + } +} + +static void ompt_multiplex_callback_parallel_begin( + ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame, + ompt_data_t *parallel_data, uint32_t requested_team_size, int flag, + const void *codeptr_ra) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA + ompt_multiplex_allocate_data_pair(parallel_data); +#endif + if (ompt_multiplex_own_callbacks.ompt_callback_parallel_begin) { + ompt_multiplex_own_callbacks.ompt_callback_parallel_begin( + ompt_multiplex_get_own_task_data(parent_task_data), parent_task_frame, + ompt_multiplex_get_own_parallel_data(parallel_data), + requested_team_size, flag, codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_parallel_begin) { + ompt_multiplex_client_callbacks.ompt_callback_parallel_begin( + ompt_multiplex_get_client_task_data(parent_task_data), + parent_task_frame, + ompt_multiplex_get_client_parallel_data(parallel_data), + requested_team_size, flag, codeptr_ra); + } +} + +static void ompt_multiplex_callback_parallel_end(ompt_data_t *parallel_data, + ompt_data_t *task_data, + int flag, + const void *codeptr_ra) { +// defines to make sure, callbacks are called in correct order depending on +// defines set by the user +#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA) || \ + !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) + if (ompt_multiplex_own_callbacks.ompt_callback_parallel_end) { + ompt_multiplex_own_callbacks.ompt_callback_parallel_end( + ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), flag, codeptr_ra); + } +#endif + + if (ompt_multiplex_client_callbacks.ompt_callback_parallel_end) { + ompt_multiplex_client_callbacks.ompt_callback_parallel_end( + ompt_multiplex_get_client_parallel_data(parallel_data), + ompt_multiplex_get_client_task_data(task_data), flag, codeptr_ra); + } + +#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) && \ + !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA) + if (ompt_multiplex_own_callbacks.ompt_callback_parallel_end) { + ompt_multiplex_own_callbacks.ompt_callback_parallel_end( + ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), flag, codeptr_ra); + } +#endif + +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA + ompt_multiplex_free_data_pair(parallel_data); +#endif + +#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA) + OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA(parallel_data); +#endif +} + +static void ompt_multiplex_callback_task_create( + ompt_data_t *parent_task_data, const ompt_frame_t *parent_frame, + ompt_data_t *new_task_data, int type, int has_dependences, + const void *codeptr_ra) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA + ompt_multiplex_allocate_data_pair(new_task_data); +#endif + +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA + if (type & ompt_task_initial) { + ompt_data_t *parallel_data; + ompt_multiplex_get_parallel_info(0, ¶llel_data, NULL); + ompt_multiplex_allocate_data_pair(parallel_data); + } +#endif + + if (ompt_multiplex_own_callbacks.ompt_callback_task_create) { + ompt_multiplex_own_callbacks.ompt_callback_task_create( + ompt_multiplex_get_own_task_data(parent_task_data), parent_frame, + ompt_multiplex_get_own_task_data(new_task_data), type, has_dependences, + codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_task_create) { + ompt_multiplex_client_callbacks.ompt_callback_task_create( + ompt_multiplex_get_client_task_data(parent_task_data), parent_frame, + ompt_multiplex_get_client_task_data(new_task_data), type, + has_dependences, codeptr_ra); + } +} + +static void +ompt_multiplex_callback_task_schedule(ompt_data_t *first_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *second_task_data) { + if (prior_task_status != ompt_task_complete) { + if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) { + ompt_multiplex_own_callbacks.ompt_callback_task_schedule( + ompt_multiplex_get_own_task_data(first_task_data), prior_task_status, + ompt_multiplex_get_own_task_data(second_task_data)); + } + if (ompt_multiplex_client_callbacks.ompt_callback_task_schedule) { + ompt_multiplex_client_callbacks.ompt_callback_task_schedule( + ompt_multiplex_get_client_task_data(first_task_data), + prior_task_status, + ompt_multiplex_get_client_task_data(second_task_data)); + } + } else { +// defines to make sure, callbacks are called in correct order depending on +// defines set by the user +#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) || \ + !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) + if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) { + ompt_multiplex_own_callbacks.ompt_callback_task_schedule( + ompt_multiplex_get_own_task_data(first_task_data), prior_task_status, + ompt_multiplex_get_own_task_data(second_task_data)); + } +#endif + + if (ompt_multiplex_client_callbacks.ompt_callback_task_schedule) { + ompt_multiplex_client_callbacks.ompt_callback_task_schedule( + ompt_multiplex_get_client_task_data(first_task_data), + prior_task_status, + ompt_multiplex_get_client_task_data(second_task_data)); + } + +#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) && \ + !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) + if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) { + ompt_multiplex_own_callbacks.ompt_callback_task_schedule( + ompt_multiplex_get_own_task_data(first_task_data), prior_task_status, + ompt_multiplex_get_own_task_data(second_task_data)); + } +#endif + +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA + ompt_multiplex_free_data_pair(first_task_data); +#endif + +#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) + OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA(first_task_data); +#endif + } +} + +static void ompt_multiplex_callback_dependences(ompt_data_t *task_data, + const ompt_dependence_t *deps, + int ndeps) { + if (ompt_multiplex_own_callbacks.ompt_callback_dependences) { + ompt_multiplex_own_callbacks.ompt_callback_dependences( + ompt_multiplex_get_own_task_data(task_data), deps, ndeps); + } + if (ompt_multiplex_client_callbacks.ompt_callback_dependences) { + ompt_multiplex_client_callbacks.ompt_callback_dependences( + ompt_multiplex_get_client_task_data(task_data), deps, ndeps); + } +} + +static void +ompt_multiplex_callback_task_dependence(ompt_data_t *first_task_data, + ompt_data_t *second_task_data) { + if (ompt_multiplex_own_callbacks.ompt_callback_task_dependence) { + ompt_multiplex_own_callbacks.ompt_callback_task_dependence( + ompt_multiplex_get_own_task_data(first_task_data), + ompt_multiplex_get_own_task_data(second_task_data)); + } + if (ompt_multiplex_client_callbacks.ompt_callback_task_dependence) { + ompt_multiplex_client_callbacks.ompt_callback_task_dependence( + ompt_multiplex_get_client_task_data(first_task_data), + ompt_multiplex_get_client_task_data(second_task_data)); + } +} + +static void ompt_multiplex_callback_thread_begin(ompt_thread_t thread_type, + ompt_data_t *thread_data) { +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA + ompt_multiplex_allocate_data_pair(thread_data); +#endif + if (ompt_multiplex_own_callbacks.ompt_callback_thread_begin) { + ompt_multiplex_own_callbacks.ompt_callback_thread_begin( + thread_type, ompt_multiplex_get_own_thread_data(thread_data)); + } + if (ompt_multiplex_client_callbacks.ompt_callback_thread_begin) { + ompt_multiplex_client_callbacks.ompt_callback_thread_begin( + thread_type, ompt_multiplex_get_client_thread_data(thread_data)); + } +} + +static void ompt_multiplex_callback_thread_end(ompt_data_t *thread_data) { +// defines to make sure, callbacks are called in correct order depending on +// defines set by the user +#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA) || \ + !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) + if (ompt_multiplex_own_callbacks.ompt_callback_thread_end) { + ompt_multiplex_own_callbacks.ompt_callback_thread_end( + ompt_multiplex_get_own_thread_data(thread_data)); + } +#endif + + if (ompt_multiplex_client_callbacks.ompt_callback_thread_end) { + ompt_multiplex_client_callbacks.ompt_callback_thread_end( + ompt_multiplex_get_client_thread_data(thread_data)); + } + +#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) && \ + !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA) + if (ompt_multiplex_own_callbacks.ompt_callback_thread_end) { + ompt_multiplex_own_callbacks.ompt_callback_thread_end( + ompt_multiplex_get_own_thread_data(thread_data)); + } +#endif + +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA + ompt_multiplex_free_data_pair(thread_data); +#endif + +#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA) + OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA(thread_data); +#endif +} + +static int ompt_multiplex_callback_control_tool(uint64_t command, + uint64_t modifier, void *arg, + const void *codeptr_ra) { + int ownRet = 0, clientRet = 0; + if (ompt_multiplex_own_callbacks.ompt_callback_control_tool) { + ownRet = ompt_multiplex_own_callbacks.ompt_callback_control_tool( + command, modifier, arg, codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_control_tool) { + clientRet = ompt_multiplex_client_callbacks.ompt_callback_control_tool( + command, modifier, arg, codeptr_ra); + } + return ownRet < clientRet ? ownRet : clientRet; +} + +static void ompt_multiplex_callback_target( + ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num, + ompt_data_t *task_data, ompt_id_t target_id, const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_target) { + ompt_multiplex_own_callbacks.ompt_callback_target( + kind, endpoint, device_num, ompt_multiplex_get_own_task_data(task_data), + target_id, codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_target) { + ompt_multiplex_client_callbacks.ompt_callback_target( + kind, endpoint, device_num, + ompt_multiplex_get_client_task_data(task_data), target_id, codeptr_ra); + } +} + +static void ompt_multiplex_callback_target_data_op( + ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype, + void *src_addr, int src_device_num, void *dest_addr, int dest_device_num, + size_t bytes, const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_target_data_op) { + ompt_multiplex_own_callbacks.ompt_callback_target_data_op( + target_id, host_op_id, optype, src_addr, src_device_num, dest_addr, + dest_device_num, bytes, codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_target_data_op) { + ompt_multiplex_client_callbacks.ompt_callback_target_data_op( + target_id, host_op_id, optype, src_addr, src_device_num, dest_addr, + dest_device_num, bytes, codeptr_ra); + } +} + +static void +ompt_multiplex_callback_target_submit(ompt_id_t target_id, ompt_id_t host_op_id, + unsigned int requested_num_teams) { + if (ompt_multiplex_own_callbacks.ompt_callback_target_submit) { + ompt_multiplex_own_callbacks.ompt_callback_target_submit( + target_id, host_op_id, requested_num_teams); + } + if (ompt_multiplex_client_callbacks.ompt_callback_target_submit) { + ompt_multiplex_client_callbacks.ompt_callback_target_submit( + target_id, host_op_id, requested_num_teams); + } +} + +static void ompt_multiplex_callback_device_initialize( + int device_num, const char *type, ompt_device_t *device, + ompt_function_lookup_t lookup, const char *documentation) { + if (ompt_multiplex_own_callbacks.ompt_callback_device_initialize) { + ompt_multiplex_own_callbacks.ompt_callback_device_initialize( + device_num, type, device, lookup, documentation); + } + if (ompt_multiplex_client_callbacks.ompt_callback_device_initialize) { + ompt_multiplex_client_callbacks.ompt_callback_device_initialize( + device_num, type, device, lookup, documentation); + } +} + +static void ompt_multiplex_callback_device_finalize(int device_num) { + if (ompt_multiplex_own_callbacks.ompt_callback_device_finalize) { + ompt_multiplex_own_callbacks.ompt_callback_device_finalize(device_num); + } + if (ompt_multiplex_client_callbacks.ompt_callback_device_finalize) { + ompt_multiplex_client_callbacks.ompt_callback_device_finalize(device_num); + } +} + +static void +ompt_multiplex_callback_device_load(int device_num, const char *filename, + int64_t offset_in_file, void *vma_in_file, + size_t bytes, void *host_addr, + void *device_addr, uint64_t module_id) { + if (ompt_multiplex_own_callbacks.ompt_callback_device_load) { + ompt_multiplex_own_callbacks.ompt_callback_device_load( + device_num, filename, offset_in_file, vma_in_file, bytes, host_addr, + device_addr, module_id); + } + if (ompt_multiplex_client_callbacks.ompt_callback_device_load) { + ompt_multiplex_client_callbacks.ompt_callback_device_load( + device_num, filename, offset_in_file, vma_in_file, bytes, host_addr, + device_addr, module_id); + } +} + +static void ompt_multiplex_callback_device_unload(int device_num, + uint64_t module_id) { + if (ompt_multiplex_own_callbacks.ompt_callback_device_unload) { + ompt_multiplex_own_callbacks.ompt_callback_device_unload(device_num, + module_id); + } + if (ompt_multiplex_client_callbacks.ompt_callback_device_unload) { + ompt_multiplex_client_callbacks.ompt_callback_device_unload(device_num, + module_id); + } +} + +static void +ompt_multiplex_callback_target_map(ompt_id_t target_id, unsigned int nitems, + void **host_addr, void **device_addr, + size_t *bytes, unsigned int *mapping_flags, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_target_map) { + ompt_multiplex_own_callbacks.ompt_callback_target_map( + target_id, nitems, host_addr, device_addr, bytes, mapping_flags, + codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_target_map) { + ompt_multiplex_client_callbacks.ompt_callback_target_map( + target_id, nitems, host_addr, device_addr, bytes, mapping_flags, + codeptr_ra); + } +} + +static void ompt_multiplex_callback_reduction(ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + if (ompt_multiplex_own_callbacks.ompt_callback_reduction) { + ompt_multiplex_own_callbacks.ompt_callback_reduction( + kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), codeptr_ra); + } + if (ompt_multiplex_client_callbacks.ompt_callback_reduction) { + ompt_multiplex_client_callbacks.ompt_callback_reduction( + kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data), + ompt_multiplex_get_client_task_data(task_data), codeptr_ra); + } +} + +static void ompt_multiplex_callback_dispatch(ompt_data_t *parallel_data, + ompt_data_t *task_data, + ompt_dispatch_t kind, + ompt_data_t instance) { + if (ompt_multiplex_own_callbacks.ompt_callback_dispatch) { + ompt_multiplex_own_callbacks.ompt_callback_dispatch( + ompt_multiplex_get_own_parallel_data(parallel_data), + ompt_multiplex_get_own_task_data(task_data), kind, instance); + } + if (ompt_multiplex_client_callbacks.ompt_callback_dispatch) { + ompt_multiplex_client_callbacks.ompt_callback_dispatch( + ompt_multiplex_get_client_parallel_data(parallel_data), + ompt_multiplex_get_client_task_data(task_data), kind, instance); + } +} + +// runtime entry functions + +int ompt_multiplex_own_get_task_info(int ancestor_level, int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num) { + int ret = ompt_multiplex_get_task_info(ancestor_level, type, task_data, + task_frame, parallel_data, thread_num); + +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA + if (task_data) + *task_data = ompt_multiplex_get_own_ompt_data(*task_data); +#endif +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA + if (parallel_data) + *parallel_data = ompt_multiplex_get_own_ompt_data(*parallel_data); +#endif + return ret; +} + +int ompt_multiplex_client_get_task_info(int ancestor_level, int *type, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num) { + int ret = ompt_multiplex_get_task_info(ancestor_level, type, task_data, + task_frame, parallel_data, thread_num); + + if (task_data) +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA + *task_data = ompt_multiplex_get_client_ompt_data(*task_data); +#else + *task_data = OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA(*task_data); +#endif + + if (parallel_data) +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA + *parallel_data = ompt_multiplex_get_client_ompt_data(*parallel_data); +#else + *parallel_data = + OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(*parallel_data); +#endif + return ret; +} + +ompt_data_t *ompt_multiplex_own_get_thread_data() { + ompt_data_t *ret; +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA + ret = ompt_multiplex_get_own_ompt_data(ompt_multiplex_get_thread_data()); +#else + ret = ompt_multiplex_get_thread_data(); +#endif + return ret; +} + +ompt_data_t *ompt_multiplex_client_get_thread_data() { + ompt_data_t *ret; +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA + ret = ompt_multiplex_get_client_ompt_data(ompt_multiplex_get_thread_data()); +#else + ret = OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA( + ompt_multiplex_get_thread_data()); +#endif + return ret; +} + +int ompt_multiplex_own_get_parallel_info(int ancestor_level, + ompt_data_t **parallel_data, + int *team_size) { + int ret = ompt_multiplex_get_parallel_info(ancestor_level, parallel_data, + team_size); + if (parallel_data) + *parallel_data = ompt_multiplex_get_own_parallel_data(*parallel_data); + return ret; +} + +int ompt_multiplex_client_get_parallel_info(int ancestor_level, + ompt_data_t **parallel_data, + int *team_size) { + int ret = ompt_multiplex_get_parallel_info(ancestor_level, parallel_data, + team_size); + if (parallel_data) +#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA + *parallel_data = ompt_multiplex_get_client_ompt_data(*parallel_data); +#else + *parallel_data = + OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(*parallel_data); +#endif + return ret; +} + +OMPT_API_ROUTINE int ompt_multiplex_own_set_callback(ompt_callbacks_t which, + ompt_callback_t callback) { + switch (which) { + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case ompt_##event_name: \ + ompt_multiplex_own_callbacks.ompt_##event_name = (callback_type)callback; \ + if (ompt_multiplex_implementation_status.ompt_##event_name == -1) \ + return ompt_multiplex_implementation_status.ompt_##event_name = \ + ompt_multiplex_set_callback( \ + ompt_##event_name, \ + (ompt_callback_t)&ompt_multiplex_##event_name); \ + else \ + return ompt_multiplex_implementation_status.ompt_##event_name + + OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro + + default: + return ompt_set_error; + } +} + +OMPT_API_ROUTINE int +ompt_multiplex_client_set_callback(ompt_callbacks_t which, + ompt_callback_t callback) { + switch (which) { + +#define ompt_event_macro(event_name, callback_type, event_id) \ + case ompt_##event_name: \ + ompt_multiplex_client_callbacks.ompt_##event_name = \ + (callback_type)callback; \ + if (ompt_multiplex_implementation_status.ompt_##event_name == -1) \ + return ompt_multiplex_implementation_status.ompt_##event_name = \ + ompt_multiplex_set_callback( \ + ompt_##event_name, \ + (ompt_callback_t)&ompt_multiplex_##event_name); \ + else \ + return ompt_multiplex_implementation_status.ompt_##event_name + + OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro + + default: + return ompt_set_error; + } +} + +ompt_interface_fn_t ompt_multiplex_own_lookup(const char *name) { + if (!strcmp(name, "ompt_set_callback")) + return (ompt_interface_fn_t)&ompt_multiplex_own_set_callback; + else if (!strcmp(name, "ompt_get_task_info")) + return (ompt_interface_fn_t)&ompt_multiplex_own_get_task_info; + else if (!strcmp(name, "ompt_get_thread_data")) + return (ompt_interface_fn_t)&ompt_multiplex_own_get_thread_data; + else if (!strcmp(name, "ompt_get_parallel_info")) + return (ompt_interface_fn_t)&ompt_multiplex_own_get_parallel_info; + else + return ompt_multiplex_lookup_function(name); +} + +ompt_interface_fn_t ompt_multiplex_client_lookup(const char *name) { + if (!strcmp(name, "ompt_set_callback")) + return (ompt_interface_fn_t)&ompt_multiplex_client_set_callback; + else if (!strcmp(name, "ompt_get_task_info")) + return (ompt_interface_fn_t)&ompt_multiplex_client_get_task_info; + else if (!strcmp(name, "ompt_get_thread_data")) + return (ompt_interface_fn_t)&ompt_multiplex_client_get_thread_data; + else if (!strcmp(name, "ompt_get_parallel_info")) + return (ompt_interface_fn_t)&ompt_multiplex_client_get_parallel_info; + else + return ompt_multiplex_lookup_function(name); +} + +int ompt_multiplex_initialize(ompt_function_lookup_t lookup, + int initial_device_num, ompt_data_t *data) { + ompt_multiplex_lookup_function = lookup; + ompt_multiplex_set_callback = + (ompt_set_callback_t)lookup("ompt_set_callback"); + ompt_multiplex_get_task_info = + (ompt_get_task_info_t)lookup("ompt_get_task_info"); + ompt_multiplex_get_thread_data = + (ompt_get_thread_data_t)lookup("ompt_get_thread_data"); + ompt_multiplex_get_parallel_info = + (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info"); + + // initialize ompt_multiplex_implementation_status +#define ompt_event_macro(event_name, callback_type, event_id) \ + ompt_multiplex_implementation_status.ompt_##event_name = -1 + + OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro) + +#undef ompt_event_macro + + int ownRet = ompt_multiplex_own_fns->initialize( + ompt_multiplex_own_lookup, initial_device_num, + &(ompt_multiplex_own_fns->tool_data)); + int clientRet = 0; + if (ompt_multiplex_client_fns) + clientRet = ompt_multiplex_client_fns->initialize( + ompt_multiplex_client_lookup, initial_device_num, + &(ompt_multiplex_client_fns->tool_data)); + + return ownRet > clientRet ? ownRet : clientRet; +} + +void ompt_multiplex_finalize(ompt_data_t *fns) { + if (ompt_multiplex_client_fns) + ompt_multiplex_client_fns->finalize( + &(ompt_multiplex_client_fns->tool_data)); + ompt_multiplex_own_fns->finalize(&(ompt_multiplex_own_fns->tool_data)); +} + +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t * +ompt_multiplex_own_start_tool(unsigned int omp_version, + const char *runtime_version); + +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + // try loading client tool + ompt_multiplex_client_fns = NULL; + ompt_start_tool_result_t *(*client_start_tool)(unsigned int, const char *) = + NULL; + + const char *tool_libs = getenv(CLIENT_TOOL_LIBRARIES_VAR); + if (tool_libs) { + // copy environement variable + char *tool_libs_buffer = (char *)malloc(sizeof(char) * strlen(tool_libs)); + if (!tool_libs_buffer) { + printf("malloc Error\n"); + exit(-1); + } + strcpy(tool_libs_buffer, tool_libs); + + int progress = 0; + while (progress < strlen(tool_libs)) { + int tmp_progress = progress; + while (tmp_progress < strlen(tool_libs) && + tool_libs_buffer[tmp_progress] != ':') + tmp_progress++; + if (tmp_progress < strlen(tool_libs)) + tool_libs_buffer[tmp_progress] = 0; + void *h = dlopen(tool_libs_buffer + progress, RTLD_LAZY); + if (h) { + client_start_tool = + (ompt_start_tool_result_t * (*)(unsigned int, const char *)) + dlsym(h, "ompt_start_tool"); + if (client_start_tool && + (ompt_multiplex_client_fns = + (*client_start_tool)(omp_version, runtime_version))) { + break; + } + } else { + printf("Loading %s from %s failed with: %s\n", + tool_libs_buffer + progress, CLIENT_TOOL_LIBRARIES_VAR, + dlerror()); + } + progress = tmp_progress + 1; + } + free(tool_libs_buffer); + } + // load own tool + ompt_multiplex_own_fns = + ompt_multiplex_own_start_tool(omp_version, runtime_version); + + // return multiplexed versions + static ompt_start_tool_result_t ompt_start_tool_result = { + &ompt_multiplex_initialize, &ompt_multiplex_finalize, {0}}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif + +// We rename the ompt_start_tool function of the OMPT tool and call the +// renamed function from the ompt_start_tool function defined above. +#define ompt_start_tool ompt_multiplex_own_start_tool + +#endif /* OMPT_MULTIPLEX_H */ diff --git a/openmp/tools/multiplex/tests/CMakeLists.txt b/openmp/tools/multiplex/tests/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/CMakeLists.txt @@ -0,0 +1,21 @@ +# CMakeLists.txt file for unit testing OMPT multiplex header. +include(CheckFunctionExists) +include(CheckLibraryExists) + +macro(pythonize_bool var) + if (${var}) + set(${var} True) + else() + set(${var} False) + endif() +endmacro() + +set(OMPT_LOAD_CLIENT_TEST_CFLAGS "" CACHE STRING + "Extra compiler flags to send to the test compiler") + +get_target_property(OMPT_PRINT_CALLBACKS_DIR ompt-print-callback INTERFACE_INCLUDE_DIRECTORIES) +add_openmp_testsuite(check-ompt-multiplex "Running OMPT multiplex tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omp) + +# Configure the lit.site.cfg.in file +set(AUTO_GEN_COMMENT "## Autogenerated by OMPT_LOAD_CLIENT configuration.\n# Do not edit!") +configure_file(lit.site.cfg.in lit.site.cfg @ONLY) diff --git a/openmp/tools/multiplex/tests/custom_data_storage/custom_data_storage.c b/openmp/tools/multiplex/tests/custom_data_storage/custom_data_storage.c new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/custom_data_storage/custom_data_storage.c @@ -0,0 +1,313 @@ +// RUN: %libomp-tool -DFIRST_TOOL -o %t.first.tool.so %s && \ +// RUN: %libomp-tool -DSECOND_TOOL -o %t.second.tool.so %s && \ +// RUN: %libomp-compile && \ +// RUN: env OMP_TOOL_LIBRARIES=%t.first.tool.so \ +// RUN: CUSTOM_DATA_STORAGE_TOOL_LIBRARIES=%t.second.tool.so \ +// RUN: %libomp-run | %sort-threads | FileCheck %s + +#if defined(FIRST_TOOL) +#include "first-tool.h" +#elif defined(SECOND_TOOL) +#include "second-tool.h" +#else /* APP */ + +#include "../ompt-signal.h" +#include "omp.h" +#include + +int main() { + int x, s = 0; +#pragma omp parallel num_threads(2) shared(s) + { +#pragma omp master + { +#pragma omp task shared(s) + { + omp_control_tool(5, 1, NULL); + OMPT_SIGNAL(s); + } + } + if (omp_get_thread_num() == 1) + OMPT_WAIT(s, 1); + } + return 0; +} +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback + +// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] +// CHECK: {{^}}0: NULL_POINTER=[[NULL]] +// CHECK: {{^}}0: ompt_event_runtime_shutdown +// CHECK: {{^}}0: ompt_event_runtime_shutdown + +// CHECK: {{^}}[[_1ST_MSTR_TID:[0-9]+]]: _first_tool: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_initial=1, +// CHECK-SAME: thread_id=[[_1ST_MSTR_TID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_initial_task_begin: +// CHECK-SAME: parallel_id=[[_FIRST_INIT_PARALLEL_ID:[0-9]+]], +// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID:[0-9]+]], actual_parallelism=1, + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_parallel_begin: +// CHECK-SAME: parent_task_id=[[_FIRST_INITIAL_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit=(nil), +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID:[0-9]+]], requested_team_size=2, +// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, invoker=2 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// CHECK-SAME: thread_num=0 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_master_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_create: +// CHECK-SAME: parent_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}}, +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[_FIRST_EXPLICIT_TASK_ID:[0-9]+]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, +// CHECK-SAME: has_dependences=no + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_master_end: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_barrier_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_wait_barrier_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[_FIRST_EXPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_switch=7 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_control_tool: +// CHECK-SAME: command=5, modifier=1, arg=(nil), +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 0: +// CHECK-SAME: task_id=[[_FIRST_EXPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 1: +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 2: +// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: +// CHECK-SAME: _first_tool: parallel level 0: parallel_id=[[_FIRST_PARALLEL_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: parallel level 1: +// CHECK-SAME: parallel_id={{[0-9]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: +// CHECK-SAME: _first_tool: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[_FIRST_EXPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_complete=1 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_end: +// CHECK-SAME: task_id=[[_FIRST_EXPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_wait_barrier_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_barrier_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_implicit_task_end: +// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: team_size=2, thread_num=0 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_parallel_end: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID]], invoker=2, +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[_1ST_MSTR_TID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID:[0-9]+]]: second_tool: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_initial=1, +// CHECK-SAME: thread_id=[[_2ND_MSTR_TID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_initial_task_begin: +// CHECK-SAME: parallel_id=[[SECOND_INIT_PARALLEL_ID:[0-9]+]], +// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID:[0-9]+]], actual_parallelism=1, +// CHECK-SAME: index=1, flags=1 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_parallel_begin: +// CHECK-SAME: parent_task_id=[[SECOND_INITIAL_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit=(nil), +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID:[0-9]+]], requested_team_size=2, +// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, invoker=2 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// CHECK-SAME: thread_num=0 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_master_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_create: +// CHECK-SAME: parent_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}}, +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[SECOND_EXPLICIT_TASK_ID:[0-9]+]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, +// CHECK-SAME: has_dependences=no + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_master_end: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_barrier_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_wait_barrier_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[SECOND_EXPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_switch=7 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_control_tool: +// CHECK-SAME: command=5, modifier=1, arg=(nil), +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 0: +// CHECK-SAME: task_id=[[SECOND_EXPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 1: +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 2: +// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: +// CHECK-SAME: second_tool: parallel level 0: parallel_id=[[SECOND_PARALLEL_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: parallel level 1: +// CHECK-SAME: parallel_id={{[0-9]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: +// CHECK-SAME: second_tool: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[SECOND_EXPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_complete=1 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_end: +// CHECK-SAME: task_id=[[SECOND_EXPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_wait_barrier_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_barrier_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_implicit_task_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], team_size=2, +// CHECK-SAME: thread_num=0 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_parallel_end: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID]], invoker=2, +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[_2ND_MSTR_TID]] + +// CHECK: {{^}}[[_1ST_WRKR_TID:[0-9]+]]: _first_tool: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_worker=2, +// CHECK-SAME: thread_id=[[_1ST_WRKR_TID]] + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// CHECK-SAME: thread_num=1 + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_barrier_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_wait_barrier_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_wait_barrier_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_barrier_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_implicit_task_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], team_size=0, +// thread_num=1 + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[_1ST_WRKR_TID]] + +// CHECK: {{^}}[[_2ND_WRKR_TID:[0-9]+]]: second_tool: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_worker=2, +// CHECK-SAME: thread_id=[[_2ND_WRKR_TID]] + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// CHECK-SAME: thread_num=1 + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_barrier_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_wait_barrier_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_wait_barrier_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_barrier_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_implicit_task_end: +// CHECK-SAME: parallel_id=0, +// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], team_size=0, +// CHECK-SAME: thread_num=1 + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[_2ND_WRKR_TID]] + +#endif /* APP */ diff --git a/openmp/tools/multiplex/tests/custom_data_storage/first-tool.h b/openmp/tools/multiplex/tests/custom_data_storage/first-tool.h new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/custom_data_storage/first-tool.h @@ -0,0 +1,293 @@ +#include "omp-tools.h" + +#define ompt_start_tool disable_ompt_start_tool +#define _TOOL_PREFIX " _first_tool:" +#include "callback.h" +#undef _TOOL_PREFIX +#undef ompt_start_tool + +#define CLIENT_TOOL_LIBRARIES_VAR "CUSTOM_DATA_STORAGE_TOOL_LIBRARIES" +static ompt_data_t *custom_get_client_ompt_data(ompt_data_t *); +static void free_data_pair(ompt_data_t *); +#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA custom_get_client_ompt_data +#define OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA free_data_pair +#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA \ + custom_get_client_ompt_data +#define OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA free_data_pair +#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA custom_get_client_ompt_data +#define OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA free_data_pair +#include "ompt-multiplex.h" + +typedef struct custom_data_pair_s { + ompt_data_t own_data; + ompt_data_t client_data; +} custom_data_pair_t; + +static ompt_data_t *custom_get_client_ompt_data(ompt_data_t *data) { + if (data) + return &(((custom_data_pair_t *)(data->ptr))->client_data); + else + return NULL; +} + +static ompt_data_t *get_own_ompt_data(ompt_data_t *data) { + if (data) + return &(((custom_data_pair_t *)(data->ptr))->own_data); + else + return NULL; +} + +static ompt_multiplex_data_pair_t * +allocate_data_pair(ompt_data_t *data_pointer) { + data_pointer->ptr = malloc(sizeof(ompt_multiplex_data_pair_t)); + if (!data_pointer->ptr) { + printf("Malloc ERROR\n"); + exit(-1); + } + ompt_multiplex_data_pair_t *data_pair = + (ompt_multiplex_data_pair_t *)data_pointer->ptr; + data_pair->own_data.ptr = NULL; + data_pair->client_data.ptr = NULL; + return data_pair; +} + +static void free_data_pair(ompt_data_t *data_pointer) { + free((*data_pointer).ptr); +} + +static void on_cds_ompt_callback_sync_region(ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + parallel_data = get_own_ompt_data(parallel_data); + task_data = get_own_ompt_data(task_data); + on_ompt_callback_sync_region(kind, endpoint, parallel_data, task_data, + codeptr_ra); +} + +static void on_cds_ompt_callback_sync_region_wait( + ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, ompt_data_t *task_data, + const void *codeptr_ra) { + parallel_data = get_own_ompt_data(parallel_data); + task_data = get_own_ompt_data(task_data); + on_ompt_callback_sync_region_wait(kind, endpoint, parallel_data, task_data, + codeptr_ra); +} + +static void on_cds_ompt_callback_flush(ompt_data_t *thread_data, + const void *codeptr_ra) { + thread_data = get_own_ompt_data(thread_data); + on_cds_ompt_callback_flush(thread_data, codeptr_ra); +} + +static void on_cds_ompt_callback_cancel(ompt_data_t *task_data, int flags, + const void *codeptr_ra) { + task_data = get_own_ompt_data(task_data); + on_ompt_callback_cancel(task_data, flags, codeptr_ra); +} + +static void on_cds_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int team_size, + unsigned int thread_num, + int type) { + if (endpoint == ompt_scope_begin && (type & ompt_task_initial)) { + allocate_data_pair(parallel_data); + } + if (endpoint == ompt_scope_begin) { + allocate_data_pair(task_data); + } + parallel_data = get_own_ompt_data(parallel_data); + task_data = get_own_ompt_data(task_data); + on_ompt_callback_implicit_task(endpoint, parallel_data, task_data, team_size, + thread_num, type); +} + +static void on_cds_ompt_callback_work(ompt_work_t wstype, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, uint64_t count, + const void *codeptr_ra) { + parallel_data = get_own_ompt_data(parallel_data); + task_data = get_own_ompt_data(task_data); + on_ompt_callback_work(wstype, endpoint, parallel_data, task_data, count, + codeptr_ra); +} + +static void on_cds_ompt_callback_master(ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra) { + parallel_data = get_own_ompt_data(parallel_data); + task_data = get_own_ompt_data(task_data); + on_ompt_callback_master(endpoint, parallel_data, task_data, codeptr_ra); +} + +static void on_cds_ompt_callback_parallel_begin( + ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame, + ompt_data_t *parallel_data, uint32_t requested_team_size, int invoker, + const void *codeptr_ra) { + parent_task_data = get_own_ompt_data(parent_task_data); + if (parallel_data->ptr) + printf("%s\n", "0: parallel_data initially not null"); + allocate_data_pair(parallel_data); + parallel_data = get_own_ompt_data(parallel_data); + on_ompt_callback_parallel_begin(parent_task_data, parent_task_frame, + parallel_data, requested_team_size, invoker, + codeptr_ra); +} + +static void on_cds_ompt_callback_parallel_end(ompt_data_t *parallel_data, + ompt_data_t *task_data, + int invoker, + const void *codeptr_ra) { + task_data = get_own_ompt_data(task_data); + parallel_data = get_own_ompt_data(parallel_data); + on_ompt_callback_parallel_end(parallel_data, task_data, invoker, codeptr_ra); +} + +static void on_cds_ompt_callback_task_create(ompt_data_t *parent_task_data, + const ompt_frame_t *parent_frame, + ompt_data_t *new_task_data, + int type, int has_dependences, + const void *codeptr_ra) { + parent_task_data = get_own_ompt_data(parent_task_data); + if (new_task_data->ptr) + printf("%s\n", "0: new_task_data initially not null"); + allocate_data_pair(new_task_data); + new_task_data = get_own_ompt_data(new_task_data); + on_ompt_callback_task_create(parent_task_data, parent_frame, new_task_data, + type, has_dependences, codeptr_ra); +} + +static void +on_cds_ompt_callback_task_schedule(ompt_data_t *first_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *second_task_data) { + ompt_data_t *original_first_task_data = first_task_data; + first_task_data = get_own_ompt_data(first_task_data); + second_task_data = get_own_ompt_data(second_task_data); + on_ompt_callback_task_schedule(first_task_data, prior_task_status, + second_task_data); +} + +static void on_cds_ompt_callback_dependences(ompt_data_t *task_data, + const ompt_dependence_t *deps, + int ndeps) { + task_data = get_own_ompt_data(task_data); + on_ompt_callback_dependences(task_data, deps, ndeps); +} + +static void +on_cds_ompt_callback_task_dependence(ompt_data_t *first_task_data, + ompt_data_t *second_task_data) { + first_task_data = get_own_ompt_data(first_task_data); + second_task_data = get_own_ompt_data(second_task_data); + on_ompt_callback_task_dependence(first_task_data, second_task_data); +} + +static void on_cds_ompt_callback_thread_begin(ompt_thread_t thread_type, + ompt_data_t *thread_data) { + if (thread_data->ptr) + printf("%s\n", "0: thread_data initially not null"); + allocate_data_pair(thread_data); + thread_data = get_own_ompt_data(thread_data); + on_ompt_callback_thread_begin(thread_type, thread_data); +} + +static void on_cds_ompt_callback_thread_end(ompt_data_t *thread_data) { + thread_data = get_own_ompt_data(thread_data); + on_ompt_callback_thread_end(thread_data); +} + +static int on_cds_ompt_callback_control_tool(uint64_t command, + uint64_t modifier, void *arg, + const void *codeptr_ra) { + printf("%" PRIu64 ": _first_tool: ompt_event_control_tool: command=%" PRIu64 + ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p \n", + ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra); + + // print task data + int task_level = 0; + ompt_data_t *task_data; + while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL, + NULL, NULL)) { + task_data = get_own_ompt_data(task_data); + printf("%" PRIu64 ": _first_tool: task level %d: task_id=%" PRIu64 "\n", + ompt_get_thread_data()->value, task_level, task_data->value); + task_level++; + } + + // print parallel data + int parallel_level = 0; + ompt_data_t *parallel_data; + while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)¶llel_data, + NULL)) { + parallel_data = get_own_ompt_data(parallel_data); + printf("%" PRIu64 ": _first_tool: parallel level %d: parallel_id=%" PRIu64 + "\n", + ompt_get_thread_data()->value, parallel_level, parallel_data->value); + parallel_level++; + } + return 0; // success +} + +static ompt_get_thread_data_t ompt_cds_get_thread_data; +ompt_data_t *ompt_get_own_thread_data() { + return get_own_ompt_data(ompt_cds_get_thread_data()); +} + +#define register_callback2_t(name, type) \ + do { \ + type f_##name = &on_cds_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_callback2(name) register_callback2_t(name, name##_t) + +int ompt_cds_initialize(ompt_function_lookup_t lookup, int initial_device_num, + ompt_data_t *tool_data) { + ompt_initialize(lookup, initial_device_num, tool_data); + ompt_cds_get_thread_data = ompt_get_thread_data; + ompt_get_thread_data = ompt_get_own_thread_data; + + register_callback(ompt_callback_mutex_acquire); + register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t); + register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t); + register_callback(ompt_callback_nest_lock); + register_callback2(ompt_callback_sync_region); + register_callback2_t(ompt_callback_sync_region_wait, + ompt_callback_sync_region_t); + register_callback2(ompt_callback_control_tool); + register_callback2(ompt_callback_flush); + register_callback2(ompt_callback_cancel); + register_callback2(ompt_callback_implicit_task); + register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t); + register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t); + register_callback2(ompt_callback_work); + register_callback2(ompt_callback_master); + register_callback2(ompt_callback_parallel_begin); + register_callback2(ompt_callback_parallel_end); + register_callback2(ompt_callback_task_create); + register_callback2(ompt_callback_task_schedule); + register_callback2(ompt_callback_dependences); + register_callback2(ompt_callback_task_dependence); + register_callback2(ompt_callback_thread_begin); + register_callback2(ompt_callback_thread_end); + return 1; // success +} + +void ompt_cds_finalize(ompt_data_t *tool_data) { + printf("0: ompt_event_runtime_shutdown\n"); +} + +ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version, + const char *runtime_version) { + static ompt_start_tool_result_t ompt_start_tool_result = { + &ompt_cds_initialize, &ompt_cds_finalize, 0}; + return &ompt_start_tool_result; +} diff --git a/openmp/tools/multiplex/tests/custom_data_storage/second-tool.h b/openmp/tools/multiplex/tests/custom_data_storage/second-tool.h new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/custom_data_storage/second-tool.h @@ -0,0 +1,5 @@ +#define CLIENT_TOOL_LIBRARIES_VAR "PRINT_EMBEDDED_TOOL_LIBRARIES" +#include "ompt-multiplex.h" +#define _TOOL_PREFIX " second_tool:" +#include "callback.h" +#undef _TOOL_PREFIX diff --git a/openmp/tools/multiplex/tests/lit.cfg b/openmp/tools/multiplex/tests/lit.cfg new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/lit.cfg @@ -0,0 +1,92 @@ +# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +# Configuration file for the 'lit' test runner. + +import os +import re +import subprocess +import lit.formats + +# Tell pylint that we know config and lit_config exist somewhere. +if 'PYLINT_IMPORT' in os.environ: + config = object() + lit_config = object() + +def append_dynamic_library_path(path): + if config.operating_system == 'Windows': + name = 'PATH' + sep = ';' + elif config.operating_system == 'Darwin': + name = 'DYLD_LIBRARY_PATH' + sep = ':' + else: + name = 'LD_LIBRARY_PATH' + sep = ':' + if name in config.environment: + config.environment[name] = path + sep + config.environment[name] + else: + config.environment[name] = path + +# name: The name of this test suite. +config.name = 'OMPT multiplex' + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = ['.c'] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The root object directory where output is placed +config.test_exec_root = config.test_obj_root + +# test format +config.test_format = lit.formats.ShTest() + +# compiler flags +config.test_flags = " -I " + config.test_source_root + "/.."\ + " -I " + config.omp_header_dir + \ + " -L " + config.omp_library_dir + \ + " -I " + config.ompt_print_callback_dir + \ + " -Wl,-rpath," + config.omp_library_dir + \ + " " + config.test_openmp_flags + +# Allow XFAIL to work +config.target_triple = [ ] +for feature in config.test_compiler_features: + config.available_features.add(feature) + +# Setup environment to find dynamic library at runtime +append_dynamic_library_path(config.omp_library_dir) +append_dynamic_library_path(config.test_obj_root+"/..") + +# Rpath modifications for Darwin +if config.operating_system == 'Darwin': + config.test_flags += " -Wl,-rpath," + config.omp_library_dir + +# Find the SDK on Darwin +if config.operating_system == 'Darwin': + cmd = subprocess.Popen(['xcrun', '--show-sdk-path'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = cmd.communicate() + out = out.strip() + res = cmd.wait() + if res == 0 and out: + config.test_flags += " -isysroot " + out + +if 'Linux' in config.operating_system: + config.available_features.add("linux") + +# substitutions +config.substitutions.append(("FileCheck", "tee %%t.out | %s" % config.test_filecheck)) +config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable")) + +config.substitutions.append(("%libomp-compile-and-run", \ + "%libomp-compile && %libomp-run")) +config.substitutions.append(("%libomp-compile", \ + "%clang %cflags %s -o %t")) +config.substitutions.append(("%libomp-tool", \ + "%clang %cflags -shared -fPIC -g")) +config.substitutions.append(("%libomp-run", "%t")) +config.substitutions.append(("%clang", config.test_c_compiler)) +config.substitutions.append(("%openmp_flag", config.test_openmp_flags)) +config.substitutions.append(("%cflags", config.test_flags)) + diff --git a/openmp/tools/multiplex/tests/lit.site.cfg.in b/openmp/tools/multiplex/tests/lit.site.cfg.in new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/lit.site.cfg.in @@ -0,0 +1,16 @@ +@AUTO_GEN_COMMENT@ + +config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@" +config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@" +config.test_compiler_features = @OPENMP_TEST_COMPILER_FEATURES@ +config.test_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@" +config.test_openmp_flags = "@OPENMP_TEST_OPENMP_FLAGS@" +config.test_extra_flags = "@OPENMP_TEST_FLAGS@" +config.test_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" +config.omp_library_dir = "@LIBOMP_LIBRARY_DIR@" +config.omp_header_dir = "@LIBOMP_INCLUDE_DIR@" +config.ompt_print_callback_dir = "@OMPT_PRINT_CALLBACKS_DIR@" +config.operating_system = "@CMAKE_SYSTEM_NAME@" + +# Let the main config do the real work. +lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/openmp/tools/multiplex/tests/ompt-signal.h b/openmp/tools/multiplex/tests/ompt-signal.h new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/ompt-signal.h @@ -0,0 +1,23 @@ +// These functions are used to provide a signal-wait mechanism to enforce +// expected scheduling for the test cases. Conditional variable (s) needs to be +// shared! Initialize to 0 +#include + +#define OMPT_SIGNAL(s) ompt_signal(&s) +// inline +void ompt_signal(int *s) { +#pragma omp atomic + (*s)++; +} + +#define OMPT_WAIT(s, v) ompt_wait(&s, v) +// wait for s >= v +// inline +void ompt_wait(int *s, int v) { + int wait = 0; + do { + usleep(10); +#pragma omp atomic read + wait = (*s); + } while (wait < v); +} diff --git a/openmp/tools/multiplex/tests/print/first-tool.h b/openmp/tools/multiplex/tests/print/first-tool.h new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/print/first-tool.h @@ -0,0 +1,5 @@ +#define CLIENT_TOOL_LIBRARIES_VAR "PRINT_TOOL_LIBRARIES" +#include "ompt-multiplex.h" +#define _TOOL_PREFIX " _first_tool:" +#include "callback.h" +#undef _TOOL_PREFIX diff --git a/openmp/tools/multiplex/tests/print/print.c b/openmp/tools/multiplex/tests/print/print.c new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/print/print.c @@ -0,0 +1,304 @@ +// RUN: %libomp-tool -DFIRST_TOOL -o %t.first.tool.so %s && \ +// RUN: %libomp-tool -DSECOND_TOOL -o %t.second.tool.so %s && \ +// RUN: %libomp-compile && \ +// RUN: env OMP_TOOL_LIBRARIES=%t.first.tool.so \ +// RUN: PRINT_TOOL_LIBRARIES=%t.second.tool.so \ +// RUN: %libomp-run | %sort-threads | FileCheck %s + +#if defined(FIRST_TOOL) +#include "first-tool.h" +#elif defined(SECOND_TOOL) +#include "second-tool.h" +#else /* APP */ + +#include "../ompt-signal.h" +#include "omp.h" +#include + +int main() { + int x, s = 0; +#pragma omp parallel num_threads(2) shared(s) + { +#pragma omp master + { +#pragma omp task shared(s) + { + omp_control_tool(5, 1, NULL); + OMPT_SIGNAL(s); + } + } + if (omp_get_thread_num() == 1) + OMPT_WAIT(s, 1); + } + return 0; +} + +// Check if libomp supports the callbacks for this test. +// CHECK-NOT: {{^}}0: Could not register callback + +// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]] +// CHECK: {{^}}0: NULL_POINTER=[[NULL]] +// CHECK: {{^}}0: ompt_event_runtime_shutdown +// CHECK: {{^}}0: ompt_event_runtime_shutdown + +// CHECK: {{^}}[[_1ST_MSTR_TID:[0-9]+]]: _first_tool: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[_1ST_MSTR_TID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_initial_task_begin: +// CHECK-SAME: parallel_id=[[_FIRST_INIT_PARALLEL_ID:[0-9]+]], +// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID:[0-9]+]], +// CHECK-SAME: actual_parallelism=1, index=1, flags=1 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_parallel_begin: +// CHECK-SAME: parent_task_id=[[_FIRST_INITIAL_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit=(nil), +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID:[0-9]+]], +// CHECK-SAME: requested_team_size=2, codeptr_ra={{0x[0-f]+}}, invoker=2 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// CHECK-SAME: thread_num=0 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_master_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_create: +// CHECK-SAME: parent_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}}, +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[_FIRST_EXPLICIT_TASK_ID:[0-9]+]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, +// CHECK-SAME: has_dependences=no + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_master_end: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_barrier_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_wait_barrier_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[_FIRST_EXPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_switch=7 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_control_tool: +// CHECK-SAME: command=5, modifier=1, arg=(nil), codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 0: +// CHECK-SAME: task_id=[[_FIRST_EXPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 1: +// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 2: +// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: parallel level 0: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: parallel level 1: +// CHECK-SAME: parallel_id={{[0-9]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[_FIRST_EXPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_complete=1 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_end: +// CHECK-SAME: task_id=[[_FIRST_EXPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_wait_barrier_end: +// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_barrier_end: +// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_implicit_task_end: +// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: team_size=2, thread_num=0 + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_parallel_end: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID]], invoker=2, +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[_1ST_MSTR_TID]] +// CHECK: {{^}}[[_2ND_MSTR_TID:[0-9]+]]: second_tool: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[_2ND_MSTR_TID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_initial_task_begin: +// CHECK-SAME: parallel_id=[[SECOND_INIT_PARALLEL_ID:[0-9]+]], +// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID:[0-9]+]], +// CHECK-SAME: actual_parallelism=1, index=1, flags=1 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_parallel_begin: +// CHECK-SAME: parent_task_id=[[SECOND_INITIAL_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit=(nil), +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID:[0-9]+]], +// CHECK-SAME: requested_team_size=2, codeptr_ra={{0x[0-f]+}}, invoker=2 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// CHECK-SAME: thread_num=0 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_master_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_create: +// CHECK-SAME: parent_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}}, +// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}}, +// CHECK-SAME: new_task_id=[[SECOND_EXPLICIT_TASK_ID:[0-9]+]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4, +// CHECK-SAME: has_dependences=no + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_master_end: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_barrier_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_wait_barrier_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[SECOND_EXPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_switch=7 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_control_tool: +// CHECK-SAME: command=5, modifier=1, arg=(nil), codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 0: +// CHECK-SAME: task_id=[[SECOND_EXPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 1: +// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 2: +// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: parallel level 0: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: parallel level 1: +// CHECK-SAME: parallel_id={{[0-9]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_schedule: +// CHECK-SAME: first_task_id=[[SECOND_EXPLICIT_TASK_ID]], +// CHECK-SAME: second_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: prior_task_status=ompt_task_complete=1 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_end: +// CHECK-SAME: task_id=[[SECOND_EXPLICIT_TASK_ID]] + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_wait_barrier_end: +// CHECK-SAME: parallel_id=0, task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_barrier_end: +// CHECK-SAME: parallel_id=0, task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_implicit_task_end: +// CHECK-SAME: parallel_id=0, task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], +// CHECK-SAME: team_size=2, thread_num=0 + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_parallel_end: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID]], invoker=2, +// CHECK-SAME: codeptr_ra={{0x[0-f]+}} + +// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[_2ND_MSTR_TID]] + +// CHECK: {{^}}[[_1ST_WRKR_TID:[0-9]+]]: _first_tool: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[_1ST_WRKR_TID]] + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2, +// CHECK-SAME: thread_num=1 + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_barrier_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_wait_barrier_begin: +// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]], +// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_wait_barrier_end: +// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_barrier_end: +// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra=(nil) + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_implicit_task_end: +// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], +// CHECK-SAME: team_size=0, thread_num=1 + +// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[_1ST_WRKR_TID]] + +// CHECK: {{^}}[[_2ND_WRKR_TID:[0-9]+]]: second_tool: ompt_event_thread_begin: +// CHECK-SAME: thread_type=ompt_thread_worker=2, +// CHECK-SAME: thread_id=[[_2ND_WRKR_TID]] + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_implicit_task_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID:[0-9]+]], +// CHECK-SAME: team_size=2, thread_num=1 + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_barrier_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_wait_barrier_begin: +// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]], +// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_wait_barrier_end: +// CHECK-SAME: parallel_id=0, task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_barrier_end: +// CHECK-SAME: parallel_id=0, task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], +// CHECK-SAME: codeptr_ra=(nil) + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_implicit_task_end: +// CHECK-SAME: parallel_id=0, task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], +// CHECK-SAME: team_size=0, thread_num=1 + +// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_thread_end: +// CHECK-SAME: thread_id=[[_2ND_WRKR_TID]] + +#endif /* APP */ diff --git a/openmp/tools/multiplex/tests/print/second-tool.h b/openmp/tools/multiplex/tests/print/second-tool.h new file mode 100644 --- /dev/null +++ b/openmp/tools/multiplex/tests/print/second-tool.h @@ -0,0 +1,5 @@ +#define CLIENT_TOOL_LIBRARIES_VAR "PRINT_EMBEDDED_TOOL_LIBRARIES" +#include "ompt-multiplex.h" +#define _TOOL_PREFIX " second_tool:" +#include "callback.h" +#undef _TOOL_PREFIX