Index: openmp/trunk/runtime/cmake/LibompExports.cmake =================================================================== --- openmp/trunk/runtime/cmake/LibompExports.cmake +++ openmp/trunk/runtime/cmake/LibompExports.cmake @@ -57,7 +57,7 @@ ) if(${LIBOMP_OMPT_SUPPORT}) add_custom_command(TARGET omp POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ompt.h ${LIBOMP_EXPORTS_CMN_DIR} + COMMAND ${CMAKE_COMMAND} -E copy omp-tools.h ${LIBOMP_EXPORTS_CMN_DIR} ) endif() if(${LIBOMP_FORTRAN_MODULES}) Index: openmp/trunk/runtime/src/CMakeLists.txt =================================================================== --- openmp/trunk/runtime/src/CMakeLists.txt +++ openmp/trunk/runtime/src/CMakeLists.txt @@ -9,11 +9,11 @@ #//===----------------------------------------------------------------------===// # -# Configure omp.h, kmp_config.h and ompt.h if necessary +# Configure omp.h, kmp_config.h and omp-tools.h if necessary configure_file(${LIBOMP_INC_DIR}/omp.h.var omp.h @ONLY) configure_file(kmp_config.h.cmake kmp_config.h @ONLY) if(${LIBOMP_OMPT_SUPPORT}) - configure_file(${LIBOMP_INC_DIR}/ompt.h.var ompt.h @ONLY) + configure_file(${LIBOMP_INC_DIR}/omp-tools.h.var omp-tools.h @ONLY) endif() # Generate message catalog files: kmp_i18n_id.inc and kmp_i18n_default.inc @@ -319,7 +319,9 @@ DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} ) if(${LIBOMP_OMPT_SUPPORT}) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ompt.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH}) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH}) + # install under legacy name ompt.h + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) endif() if(${LIBOMP_FORTRAN_MODULES}) install(FILES Index: openmp/trunk/runtime/src/include/50/omp-tools.h.var =================================================================== --- openmp/trunk/runtime/src/include/50/omp-tools.h.var +++ openmp/trunk/runtime/src/include/50/omp-tools.h.var @@ -0,0 +1,1083 @@ +/* + * include/50/omp-tools.h.var + */ + +//===----------------------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// + +#ifndef __OMPT__ +#define __OMPT__ + +/***************************************************************************** + * system include files + *****************************************************************************/ + +#include +#include + +/***************************************************************************** + * iteration macros + *****************************************************************************/ + +#define FOREACH_OMPT_INQUIRY_FN(macro) \ + macro (ompt_enumerate_states) \ + macro (ompt_enumerate_mutex_impls) \ + \ + macro (ompt_set_callback) \ + macro (ompt_get_callback) \ + \ + macro (ompt_get_state) \ + \ + macro (ompt_get_parallel_info) \ + macro (ompt_get_task_info) \ + macro (ompt_get_task_memory) \ + macro (ompt_get_thread_data) \ + macro (ompt_get_unique_id) \ + macro (ompt_finalize_tool) \ + \ + macro(ompt_get_num_procs) \ + macro(ompt_get_num_places) \ + macro(ompt_get_place_proc_ids) \ + macro(ompt_get_place_num) \ + macro(ompt_get_partition_place_nums) \ + macro(ompt_get_proc_id) \ + \ + macro(ompt_get_target_info) \ + macro(ompt_get_num_devices) + +#define FOREACH_OMPT_STATE(macro) \ + \ + /* first available state */ \ + macro (ompt_state_undefined, 0x102) /* undefined thread state */ \ + \ + /* work states (0..15) */ \ + macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \ + macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \ + macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \ + \ + /* barrier wait states (16..31) */ \ + macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \ + macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \ + /* implicit barrier at the end of parallel region */\ + macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \ + /* implicit barrier at the end of worksharing */ \ + macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \ + macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \ + \ + /* task wait states (32..63) */ \ + macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \ + macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \ + \ + /* mutex wait states (64..127) */ \ + macro (ompt_state_wait_mutex, 0x040) \ + macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \ + macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \ + macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \ + macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \ + \ + /* target wait states (128..255) */ \ + macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \ + macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \ + macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \ + \ + /* misc (256..511) */ \ + macro (ompt_state_idle, 0x100) /* waiting for work */ \ + macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \ + \ + /* implementation-specific states (512..) */ + + +#define FOREACH_KMP_MUTEX_IMPL(macro) \ + macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \ + macro (kmp_mutex_impl_spin, 1) /* based on spin */ \ + macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ + macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ + +#define FOREACH_OMPT_EVENT(macro) \ + \ + /*--- Mandatory Events ---*/ \ + macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ + macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \ + \ + macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \ + macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \ + \ + macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \ + macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ + macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ + \ + macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ + macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ + macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ + \ + macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ + \ + macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ + macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ + \ + macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ + macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ + \ + /* Optional Events */ \ + macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \ + \ + macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \ + \ + macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \ + macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \ + \ + macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \ + \ + macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \ + \ + macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ + \ + macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \ + \ + macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \ + macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \ + \ + macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \ + macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \ + \ + macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \ + \ + macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \ + \ + macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \ + \ + macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \ + \ + macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */ + +/***************************************************************************** + * implementation specific types + *****************************************************************************/ + +typedef enum kmp_mutex_impl_t { +#define kmp_mutex_impl_macro(impl, code) impl = code, + FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro) +#undef kmp_mutex_impl_macro +} kmp_mutex_impl_t; + +/***************************************************************************** + * definitions generated from spec + *****************************************************************************/ + +typedef enum ompt_callbacks_t { + ompt_callback_thread_begin = 1, + ompt_callback_thread_end = 2, + ompt_callback_parallel_begin = 3, + ompt_callback_parallel_end = 4, + ompt_callback_task_create = 5, + ompt_callback_task_schedule = 6, + ompt_callback_implicit_task = 7, + ompt_callback_target = 8, + ompt_callback_target_data_op = 9, + ompt_callback_target_submit = 10, + ompt_callback_control_tool = 11, + ompt_callback_device_initialize = 12, + ompt_callback_device_finalize = 13, + ompt_callback_device_load = 14, + ompt_callback_device_unload = 15, + ompt_callback_sync_region_wait = 16, + ompt_callback_mutex_released = 17, + ompt_callback_dependences = 18, + ompt_callback_task_dependence = 19, + ompt_callback_work = 20, + ompt_callback_master = 21, + ompt_callback_target_map = 22, + ompt_callback_sync_region = 23, + ompt_callback_lock_init = 24, + ompt_callback_lock_destroy = 25, + ompt_callback_mutex_acquire = 26, + ompt_callback_mutex_acquired = 27, + ompt_callback_nest_lock = 28, + ompt_callback_flush = 29, + ompt_callback_cancel = 30, + ompt_callback_reduction = 31, + ompt_callback_dispatch = 32 +} ompt_callbacks_t; + +typedef enum ompt_record_t { + ompt_record_ompt = 1, + ompt_record_native = 2, + ompt_record_invalid = 3 +} ompt_record_t; + +typedef enum ompt_record_native_t { + ompt_record_native_info = 1, + ompt_record_native_event = 2 +} ompt_record_native_t; + +typedef enum ompt_set_result_t { + ompt_set_error = 0, + ompt_set_never = 1, + ompt_set_impossible = 2, + ompt_set_sometimes = 3, + ompt_set_sometimes_paired = 4, + ompt_set_always = 5 +} ompt_set_result_t; + +typedef uint64_t ompt_id_t; + +typedef uint64_t ompt_device_time_t; + +typedef uint64_t ompt_buffer_cursor_t; + +typedef enum ompt_thread_t { + ompt_thread_initial = 1, + ompt_thread_worker = 2, + ompt_thread_other = 3, + ompt_thread_unknown = 4 +} ompt_thread_t; + +typedef enum ompt_scope_endpoint_t { + ompt_scope_begin = 1, + ompt_scope_end = 2 +} ompt_scope_endpoint_t; + +typedef enum ompt_dispatch_t { + ompt_dispatch_iteration = 1, + ompt_dispatch_section = 2 +} ompt_dispatch_t; + +typedef enum ompt_sync_region_t { + ompt_sync_region_barrier = 1, + ompt_sync_region_barrier_implicit = 2, + ompt_sync_region_barrier_explicit = 3, + ompt_sync_region_barrier_implementation = 4, + ompt_sync_region_taskwait = 5, + ompt_sync_region_taskgroup = 6, + ompt_sync_region_reduction = 7 +} ompt_sync_region_t; + +typedef enum ompt_target_data_op_t { + ompt_target_data_alloc = 1, + ompt_target_data_transfer_to_device = 2, + ompt_target_data_transfer_from_device = 3, + ompt_target_data_delete = 4, + ompt_target_data_associate = 5, + ompt_target_data_disassociate = 6 +} ompt_target_data_op_t; + +typedef enum ompt_work_t { + ompt_work_loop = 1, + ompt_work_sections = 2, + ompt_work_single_executor = 3, + ompt_work_single_other = 4, + ompt_work_workshare = 5, + ompt_work_distribute = 6, + ompt_work_taskloop = 7 +} ompt_work_t; + +typedef enum ompt_mutex_t { + ompt_mutex_lock = 1, + ompt_mutex_test_lock = 2, + ompt_mutex_nest_lock = 3, + ompt_mutex_test_nest_lock = 4, + ompt_mutex_critical = 5, + ompt_mutex_atomic = 6, + ompt_mutex_ordered = 7 +} ompt_mutex_t; + +typedef enum ompt_native_mon_flag_t { + ompt_native_data_motion_explicit = 0x01, + ompt_native_data_motion_implicit = 0x02, + ompt_native_kernel_invocation = 0x04, + ompt_native_kernel_execution = 0x08, + ompt_native_driver = 0x10, + ompt_native_runtime = 0x20, + ompt_native_overhead = 0x40, + ompt_native_idleness = 0x80 +} ompt_native_mon_flag_t; + +typedef enum ompt_task_flag_t { + ompt_task_initial = 0x00000001, + ompt_task_implicit = 0x00000002, + ompt_task_explicit = 0x00000004, + ompt_task_target = 0x00000008, + ompt_task_undeferred = 0x08000000, + ompt_task_untied = 0x10000000, + ompt_task_final = 0x20000000, + ompt_task_mergeable = 0x40000000, + ompt_task_merged = 0x80000000 +} ompt_task_flag_t; + +typedef enum ompt_task_status_t { + ompt_task_complete = 1, + ompt_task_yield = 2, + ompt_task_cancel = 3, + ompt_task_detach = 4, + ompt_task_early_fulfill = 5, + ompt_task_late_fulfill = 6, + ompt_task_switch = 7 +} ompt_task_status_t; + +typedef enum ompt_target_t { + ompt_target = 1, + ompt_target_enter_data = 2, + ompt_target_exit_data = 3, + ompt_target_update = 4 +} ompt_target_t; + +typedef enum ompt_parallel_flag_t { + ompt_parallel_invoker_program = 0x00000001, + ompt_parallel_invoker_runtime = 0x00000002, + ompt_parallel_league = 0x40000000, + ompt_parallel_team = 0x80000000 +} ompt_parallel_flag_t; + +typedef enum ompt_target_map_flag_t { + ompt_target_map_flag_to = 0x01, + ompt_target_map_flag_from = 0x02, + ompt_target_map_flag_alloc = 0x04, + ompt_target_map_flag_release = 0x08, + ompt_target_map_flag_delete = 0x10, + ompt_target_map_flag_implicit = 0x20 +} ompt_target_map_flag_t; + +typedef enum ompt_dependence_type_t { + ompt_dependence_type_in = 1, + ompt_dependence_type_out = 2, + ompt_dependence_type_inout = 3, + ompt_dependence_type_mutexinoutset = 4, + ompt_dependence_type_source = 5, + ompt_dependence_type_sink = 6 +} ompt_dependence_type_t; + +typedef enum ompt_cancel_flag_t { + ompt_cancel_parallel = 0x01, + ompt_cancel_sections = 0x02, + ompt_cancel_loop = 0x04, + ompt_cancel_taskgroup = 0x08, + ompt_cancel_activated = 0x10, + ompt_cancel_detected = 0x20, + ompt_cancel_discarded_task = 0x40 +} ompt_cancel_flag_t; + +typedef uint64_t ompt_hwid_t; + +typedef uint64_t ompt_wait_id_t; + +typedef enum ompt_frame_flag_t { + ompt_frame_runtime = 0x00, + ompt_frame_application = 0x01, + ompt_frame_cfa = 0x10, + ompt_frame_framepointer = 0x20, + ompt_frame_stackaddress = 0x30 +} ompt_frame_flag_t; + +typedef enum ompt_state_t { + ompt_state_work_serial = 0x000, + ompt_state_work_parallel = 0x001, + ompt_state_work_reduction = 0x002, + + ompt_state_wait_barrier = 0x010, + ompt_state_wait_barrier_implicit_parallel = 0x011, + ompt_state_wait_barrier_implicit_workshare = 0x012, + ompt_state_wait_barrier_implicit = 0x013, + ompt_state_wait_barrier_explicit = 0x014, + + ompt_state_wait_taskwait = 0x020, + ompt_state_wait_taskgroup = 0x021, + + ompt_state_wait_mutex = 0x040, + ompt_state_wait_lock = 0x041, + ompt_state_wait_critical = 0x042, + ompt_state_wait_atomic = 0x043, + ompt_state_wait_ordered = 0x044, + + ompt_state_wait_target = 0x080, + ompt_state_wait_target_map = 0x081, + ompt_state_wait_target_update = 0x082, + + ompt_state_idle = 0x100, + ompt_state_overhead = 0x101, + ompt_state_undefined = 0x102 +} ompt_state_t; + +typedef uint64_t (*ompt_get_unique_id_t) (void); + +typedef uint64_t ompd_size_t; + +typedef uint64_t ompd_wait_id_t; + +typedef uint64_t ompd_addr_t; +typedef int64_t ompd_word_t; +typedef uint64_t ompd_seg_t; + +typedef uint64_t ompd_device_t; + +typedef uint64_t ompd_thread_id_t; + +typedef enum ompd_scope_t { + ompd_scope_global = 1, + ompd_scope_address_space = 2, + ompd_scope_thread = 3, + ompd_scope_parallel = 4, + ompd_scope_implicit_task = 5, + ompd_scope_task = 6 +} ompd_scope_t; + +typedef uint64_t ompd_icv_id_t; + +typedef enum ompd_rc_t { + ompd_rc_ok = 0, + ompd_rc_unavailable = 1, + ompd_rc_stale_handle = 2, + ompd_rc_bad_input = 3, + ompd_rc_error = 4, + ompd_rc_unsupported = 5, + ompd_rc_needs_state_tracking = 6, + ompd_rc_incompatible = 7, + ompd_rc_device_read_error = 8, + ompd_rc_device_write_error = 9, + ompd_rc_nomem = 10, +} ompd_rc_t; + +typedef void (*ompt_interface_fn_t) (void); + +typedef ompt_interface_fn_t (*ompt_function_lookup_t) ( + const char *interface_function_name +); + +typedef union ompt_data_t { + uint64_t value; + void *ptr; +} ompt_data_t; + +typedef struct ompt_frame_t { + ompt_data_t exit_frame; + ompt_data_t enter_frame; + int exit_frame_flags; + int enter_frame_flags; +} ompt_frame_t; + +typedef void (*ompt_callback_t) (void); + +typedef void ompt_device_t; + +typedef void ompt_buffer_t; + +typedef void (*ompt_callback_buffer_request_t) ( + int device_num, + ompt_buffer_t **buffer, + size_t *bytes +); + +typedef void (*ompt_callback_buffer_complete_t) ( + int device_num, + ompt_buffer_t *buffer, + size_t bytes, + ompt_buffer_cursor_t begin, + int buffer_owned +); + +typedef void (*ompt_finalize_t) ( + ompt_data_t *tool_data +); + +typedef int (*ompt_initialize_t) ( + ompt_function_lookup_t lookup, + int initial_device_num, + ompt_data_t *tool_data +); + +typedef struct ompt_start_tool_result_t { + ompt_initialize_t initialize; + ompt_finalize_t finalize; + ompt_data_t tool_data; +} ompt_start_tool_result_t; + +typedef struct ompt_record_abstract_t { + ompt_record_native_t rclass; + const char *type; + ompt_device_time_t start_time; + ompt_device_time_t end_time; + ompt_hwid_t hwid; +} ompt_record_abstract_t; + +typedef struct ompt_dependence_t { + ompt_data_t variable; + ompt_dependence_type_t dependence_type; +} ompt_dependence_t; + +typedef int (*ompt_enumerate_states_t) ( + int current_state, + int *next_state, + const char **next_state_name +); + +typedef int (*ompt_enumerate_mutex_impls_t) ( + int current_impl, + int *next_impl, + const char **next_impl_name +); + +typedef ompt_set_result_t (*ompt_set_callback_t) ( + ompt_callbacks_t event, + ompt_callback_t callback +); + +typedef int (*ompt_get_callback_t) ( + ompt_callbacks_t event, + ompt_callback_t *callback +); + +typedef ompt_data_t *(*ompt_get_thread_data_t) (void); + +typedef int (*ompt_get_num_procs_t) (void); + +typedef int (*ompt_get_num_places_t) (void); + +typedef int (*ompt_get_place_proc_ids_t) ( + int place_num, + int ids_size, + int *ids +); + +typedef int (*ompt_get_place_num_t) (void); + +typedef int (*ompt_get_partition_place_nums_t) ( + int place_nums_size, + int *place_nums +); + +typedef int (*ompt_get_proc_id_t) (void); + +typedef int (*ompt_get_state_t) ( + ompt_wait_id_t *wait_id +); + +typedef int (*ompt_get_parallel_info_t) ( + int ancestor_level, + ompt_data_t **parallel_data, + int *team_size +); + +typedef int (*ompt_get_task_info_t) ( + int ancestor_level, + int *flags, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num +); + +typedef int (*ompt_get_task_memory_t)( + void **addr, + size_t *size, + int block +); + +typedef int (*ompt_get_target_info_t) ( + uint64_t *device_num, + ompt_id_t *target_id, + ompt_id_t *host_op_id +); + +typedef int (*ompt_get_num_devices_t) (void); + +typedef void (*ompt_finalize_tool_t) (void); + +typedef int (*ompt_get_device_num_procs_t) ( + ompt_device_t *device +); + +typedef ompt_device_time_t (*ompt_get_device_time_t) ( + ompt_device_t *device +); + +typedef double (*ompt_translate_time_t) ( + ompt_device_t *device, + ompt_device_time_t time +); + +typedef ompt_set_result_t (*ompt_set_trace_ompt_t) ( + ompt_device_t *device, + unsigned int enable, + unsigned int etype +); + +typedef ompt_set_result_t (*ompt_set_trace_native_t) ( + ompt_device_t *device, + int enable, + int flags +); + +typedef int (*ompt_start_trace_t) ( + ompt_device_t *device, + ompt_callback_buffer_request_t request, + ompt_callback_buffer_complete_t complete +); + +typedef int (*ompt_pause_trace_t) ( + ompt_device_t *device, + int begin_pause +); + +typedef int (*ompt_flush_trace_t) ( + ompt_device_t *device +); + +typedef int (*ompt_stop_trace_t) ( + ompt_device_t *device +); + +typedef int (*ompt_advance_buffer_cursor_t) ( + ompt_device_t *device, + ompt_buffer_t *buffer, + size_t size, + ompt_buffer_cursor_t current, + ompt_buffer_cursor_t *next +); + +typedef ompt_record_t (*ompt_get_record_type_t) ( + ompt_buffer_t *buffer, + ompt_buffer_cursor_t current +); + +typedef void *(*ompt_get_record_native_t) ( + ompt_buffer_t *buffer, + ompt_buffer_cursor_t current, + ompt_id_t *host_op_id +); + +typedef ompt_record_abstract_t * +(*ompt_get_record_abstract_t) ( + void *native_record +); + +typedef void (*ompt_callback_thread_begin_t) ( + ompt_thread_t thread_type, + ompt_data_t *thread_data +); + +typedef struct ompt_record_thread_begin_t { + ompt_thread_t thread_type; +} ompt_record_thread_begin_t; + +typedef void (*ompt_callback_thread_end_t) ( + ompt_data_t *thread_data +); + +typedef void (*ompt_callback_parallel_begin_t) ( + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, + ompt_data_t *parallel_data, + unsigned int requested_parallelism, + int flags, + const void *codeptr_ra +); + +typedef struct ompt_record_parallel_begin_t { + ompt_id_t encountering_task_id; + ompt_id_t parallel_id; + unsigned int requested_parallelism; + int flags; + const void *codeptr_ra; +} ompt_record_parallel_begin_t; + +typedef void (*ompt_callback_parallel_end_t) ( + ompt_data_t *parallel_data, + ompt_data_t *encountering_task_data, + int flags, + const void *codeptr_ra +); + +typedef struct ompt_record_parallel_end_t { + ompt_id_t parallel_id; + ompt_id_t encountering_task_id; + int flags; + const void *codeptr_ra; +} ompt_record_parallel_end_t; + +typedef void (*ompt_callback_work_t) ( + ompt_work_t wstype, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + uint64_t count, + const void *codeptr_ra +); + +typedef struct ompt_record_work_t { + ompt_work_t wstype; + ompt_scope_endpoint_t endpoint; + ompt_id_t parallel_id; + ompt_id_t task_id; + uint64_t count; + const void *codeptr_ra; +} ompt_record_work_t; + +typedef void (*ompt_callback_dispatch_t) ( + ompt_data_t *parallel_data, + ompt_data_t *task_data, + ompt_dispatch_t kind, + ompt_data_t instance +); + +typedef struct ompt_record_dispatch_t { + ompt_id_t parallel_id; + ompt_id_t task_id; + ompt_dispatch_t kind; + ompt_data_t instance; +} ompt_record_dispatch_t; + +typedef void (*ompt_callback_task_create_t) ( + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, + ompt_data_t *new_task_data, + int flags, + int has_dependences, + const void *codeptr_ra +); + +typedef struct ompt_record_task_create_t { + ompt_id_t encountering_task_id; + ompt_id_t new_task_id; + int flags; + int has_dependences; + const void *codeptr_ra; +} ompt_record_task_create_t; + +typedef void (*ompt_callback_dependences_t) ( + ompt_data_t *task_data, + const ompt_dependence_t *deps, + int ndeps +); + +typedef struct ompt_record_dependences_t { + ompt_id_t task_id; + ompt_dependence_t dep; + int ndeps; +} ompt_record_dependences_t; + +typedef void (*ompt_callback_task_dependence_t) ( + ompt_data_t *src_task_data, + ompt_data_t *sink_task_data +); + +typedef struct ompt_record_task_dependence_t { + ompt_id_t src_task_id; + ompt_id_t sink_task_id; +} ompt_record_task_dependence_t; + +typedef void (*ompt_callback_task_schedule_t) ( + ompt_data_t *prior_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *next_task_data +); + +typedef struct ompt_record_task_schedule_t { + ompt_id_t prior_task_id; + ompt_task_status_t prior_task_status; + ompt_id_t next_task_id; +} ompt_record_task_schedule_t; + +typedef void (*ompt_callback_implicit_task_t) ( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int actual_parallelism, + unsigned int index, + int flags +); + +typedef struct ompt_record_implicit_task_t { + ompt_scope_endpoint_t endpoint; + ompt_id_t parallel_id; + ompt_id_t task_id; + unsigned int actual_parallelism; + unsigned int index; + int flags; +} ompt_record_implicit_task_t; + +typedef void (*ompt_callback_master_t) ( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra +); + +typedef struct ompt_record_master_t { + ompt_scope_endpoint_t endpoint; + ompt_id_t parallel_id; + ompt_id_t task_id; + const void *codeptr_ra; +} ompt_record_master_t; + +typedef void (*ompt_callback_sync_region_t) ( + ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra +); + +typedef struct ompt_record_sync_region_t { + ompt_sync_region_t kind; + ompt_scope_endpoint_t endpoint; + ompt_id_t parallel_id; + ompt_id_t task_id; + const void *codeptr_ra; +} ompt_record_sync_region_t; + +typedef void (*ompt_callback_mutex_acquire_t) ( + ompt_mutex_t kind, + unsigned int hint, + unsigned int impl, + ompt_wait_id_t wait_id, + const void *codeptr_ra +); + +typedef struct ompt_record_mutex_acquire_t { + ompt_mutex_t kind; + unsigned int hint; + unsigned int impl; + ompt_wait_id_t wait_id; + const void *codeptr_ra; +} ompt_record_mutex_acquire_t; + +typedef void (*ompt_callback_mutex_t) ( + ompt_mutex_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra +); + +typedef struct ompt_record_mutex_t { + ompt_mutex_t kind; + ompt_wait_id_t wait_id; + const void *codeptr_ra; +} ompt_record_mutex_t; + +typedef void (*ompt_callback_nest_lock_t) ( + ompt_scope_endpoint_t endpoint, + ompt_wait_id_t wait_id, + const void *codeptr_ra +); + +typedef struct ompt_record_nest_lock_t { + ompt_scope_endpoint_t endpoint; + ompt_wait_id_t wait_id; + const void *codeptr_ra; +} ompt_record_nest_lock_t; + +typedef void (*ompt_callback_flush_t) ( + ompt_data_t *thread_data, + const void *codeptr_ra +); + +typedef struct ompt_record_flush_t { + const void *codeptr_ra; +} ompt_record_flush_t; + +typedef void (*ompt_callback_cancel_t) ( + ompt_data_t *task_data, + int flags, + const void *codeptr_ra +); + +typedef struct ompt_record_cancel_t { + ompt_id_t task_id; + int flags; + const void *codeptr_ra; +} ompt_record_cancel_t; + +typedef void (*ompt_callback_device_initialize_t) ( + int device_num, + const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation +); + +typedef void (*ompt_callback_device_finalize_t) ( + int device_num +); + +typedef void (*ompt_callback_device_load_t) ( + int device_num, + const char *filename, + int64_t offset_in_file, + void *vma_in_file, + size_t bytes, + void *host_addr, + void *device_addr, + uint64_t module_id +); + +typedef void (*ompt_callback_device_unload_t) ( + int device_num, + uint64_t module_id +); + +typedef void (*ompt_callback_target_data_op_t) ( + ompt_id_t target_id, + ompt_id_t host_op_id, + ompt_target_data_op_t optype, + void *src_addr, + int src_device_num, + void *dest_addr, + int dest_device_num, + size_t bytes, + const void *codeptr_ra +); + +typedef struct ompt_record_target_data_op_t { + ompt_id_t host_op_id; + ompt_target_data_op_t optype; + void *src_addr; + int src_device_num; + void *dest_addr; + int dest_device_num; + size_t bytes; + ompt_device_time_t end_time; + const void *codeptr_ra; +} ompt_record_target_data_op_t; + +typedef void (*ompt_callback_target_t) ( + ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, + ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra +); + +typedef struct ompt_record_target_t { + ompt_target_t kind; + ompt_scope_endpoint_t endpoint; + int device_num; + ompt_id_t task_id; + ompt_id_t target_id; + const void *codeptr_ra; +} ompt_record_target_t; + +typedef void (*ompt_callback_target_map_t) ( + ompt_id_t target_id, + unsigned int nitems, + void **host_addr, + void **device_addr, + size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra +); + +typedef struct ompt_record_target_map_t { + ompt_id_t target_id; + unsigned int nitems; + void **host_addr; + void **device_addr; + size_t *bytes; + unsigned int *mapping_flags; + const void *codeptr_ra; +} ompt_record_target_map_t; + +typedef void (*ompt_callback_target_submit_t) ( + ompt_id_t target_id, + ompt_id_t host_op_id, + unsigned int requested_num_teams +); + +typedef struct ompt_record_target_kernel_t { + ompt_id_t host_op_id; + unsigned int requested_num_teams; + unsigned int granted_num_teams; + ompt_device_time_t end_time; +} ompt_record_target_kernel_t; + +typedef int (*ompt_callback_control_tool_t) ( + uint64_t command, + uint64_t modifier, + void *arg, + const void *codeptr_ra +); + +typedef struct ompt_record_control_tool_t { + uint64_t command; + uint64_t modifier; + const void *codeptr_ra; +} ompt_record_control_tool_t; + +typedef struct ompd_address_t { + ompd_seg_t segment; + ompd_addr_t address; +} ompd_address_t; + +typedef struct ompd_frame_info_t { + ompd_address_t frame_address; + ompd_word_t frame_flag; +} ompd_frame_info_t; + +typedef struct _ompd_aspace_handle ompd_address_space_handle_t; +typedef struct _ompd_thread_handle ompd_thread_handle_t; +typedef struct _ompd_parallel_handle ompd_parallel_handle_t; +typedef struct _ompd_task_handle ompd_task_handle_t; + +typedef struct _ompd_aspace_cont ompd_address_space_context_t; +typedef struct _ompd_thread_cont ompd_thread_context_t; + +typedef struct ompd_device_type_sizes_t { + uint8_t sizeof_char; + uint8_t sizeof_short; + uint8_t sizeof_int; + uint8_t sizeof_long; + uint8_t sizeof_long_long; + uint8_t sizeof_pointer; +} ompd_device_type_sizes_t; + +typedef struct ompt_record_ompt_t { + ompt_callbacks_t type; + ompt_device_time_t time; + ompt_id_t thread_id; + ompt_id_t target_id; + union { + ompt_record_thread_begin_t thread_begin; + ompt_record_parallel_begin_t parallel_begin; + ompt_record_parallel_end_t parallel_end; + ompt_record_work_t work; + ompt_record_dispatch_t dispatch; + ompt_record_task_create_t task_create; + ompt_record_dependences_t dependences; + ompt_record_task_dependence_t task_dependence; + ompt_record_task_schedule_t task_schedule; + ompt_record_implicit_task_t implicit_task; + ompt_record_master_t master; + ompt_record_sync_region_t sync_region; + ompt_record_mutex_acquire_t mutex_acquire; + ompt_record_mutex_t mutex; + ompt_record_nest_lock_t nest_lock; + ompt_record_flush_t flush; + ompt_record_cancel_t cancel; + ompt_record_target_t target; + ompt_record_target_data_op_t target_data_op; + ompt_record_target_map_t target_map; + ompt_record_target_kernel_t target_kernel; + ompt_record_control_tool_t control_tool; + } record; +} ompt_record_ompt_t; + +typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) ( + ompt_buffer_t *buffer, + ompt_buffer_cursor_t current +); + +#define ompt_id_none 0 +#define ompt_data_none {0} +#define ompt_time_none 0 +#define ompt_hwid_none 0 +#define ompt_addr_none ~0 +#define ompt_mutex_impl_none 0 +#define ompt_wait_id_none 0 + +#define ompd_segment_none 0 + +#endif /* __OMPT__ */ Index: openmp/trunk/runtime/src/include/50/ompt.h.var =================================================================== --- openmp/trunk/runtime/src/include/50/ompt.h.var +++ openmp/trunk/runtime/src/include/50/ompt.h.var @@ -1,735 +0,0 @@ -/* - * include/50/ompt.h.var - */ - -//===----------------------------------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.txt for details. -// -//===----------------------------------------------------------------------===// - -#ifndef __OMPT__ -#define __OMPT__ - -/***************************************************************************** - * system include files - *****************************************************************************/ - -#include -#include - - - -/***************************************************************************** - * iteration macros - *****************************************************************************/ - -#define FOREACH_OMPT_INQUIRY_FN(macro) \ - macro (ompt_enumerate_states) \ - macro (ompt_enumerate_mutex_impls) \ - \ - macro (ompt_set_callback) \ - macro (ompt_get_callback) \ - \ - macro (ompt_get_state) \ - \ - macro (ompt_get_parallel_info) \ - macro (ompt_get_task_info) \ - macro (ompt_get_task_memory) \ - macro (ompt_get_thread_data) \ - macro (ompt_get_unique_id) \ - macro (ompt_finalize_tool) \ - \ - macro(ompt_get_num_procs) \ - macro(ompt_get_num_places) \ - macro(ompt_get_place_proc_ids) \ - macro(ompt_get_place_num) \ - macro(ompt_get_partition_place_nums) \ - macro(ompt_get_proc_id) \ - \ - macro(ompt_get_target_info) \ - macro(ompt_get_num_devices) - -#define FOREACH_OMPT_STATE(macro) \ - \ - /* first available state */ \ - macro (ompt_state_undefined, 0x102) /* undefined thread state */ \ - \ - /* work states (0..15) */ \ - macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \ - macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \ - macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \ - \ - /* barrier wait states (16..31) */ \ - macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \ - macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \ - /* implicit barrier at the end of parallel region */\ - macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \ - /* implicit barrier at the end of worksharing */ \ - macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \ - macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \ - \ - /* task wait states (32..63) */ \ - macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \ - macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \ - \ - /* mutex wait states (64..127) */ \ - macro (ompt_state_wait_mutex, 0x040) \ - macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \ - macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \ - macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \ - macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \ - \ - /* target wait states (128..255) */ \ - macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \ - macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \ - macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \ - \ - /* misc (256..511) */ \ - macro (ompt_state_idle, 0x100) /* waiting for work */ \ - macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \ - \ - /* implementation-specific states (512..) */ - - -#define FOREACH_KMP_MUTEX_IMPL(macro) \ - macro (ompt_mutex_impl_none, 0) /* unknown implementation */ \ - macro (kmp_mutex_impl_spin, 1) /* based on spin */ \ - macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ - macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ - -#define FOREACH_OMPT_EVENT(macro) \ - \ - /*--- Mandatory Events ---*/ \ - macro (ompt_callback_thread_begin, ompt_callback_thread_begin_t, 1) /* thread begin */ \ - macro (ompt_callback_thread_end, ompt_callback_thread_end_t, 2) /* thread end */ \ - \ - macro (ompt_callback_parallel_begin, ompt_callback_parallel_begin_t, 3) /* parallel begin */ \ - macro (ompt_callback_parallel_end, ompt_callback_parallel_end_t, 4) /* parallel end */ \ - \ - macro (ompt_callback_task_create, ompt_callback_task_create_t, 5) /* task begin */ \ - macro (ompt_callback_task_schedule, ompt_callback_task_schedule_t, 6) /* task schedule */ \ - macro (ompt_callback_implicit_task, ompt_callback_implicit_task_t, 7) /* implicit task */ \ - \ - macro (ompt_callback_target, ompt_callback_target_t, 8) /* target */ \ - macro (ompt_callback_target_data_op, ompt_callback_target_data_op_t, 9) /* target data op */ \ - macro (ompt_callback_target_submit, ompt_callback_target_submit_t, 10) /* target submit */ \ - \ - macro (ompt_callback_control_tool, ompt_callback_control_tool_t, 11) /* control tool */ \ - \ - macro (ompt_callback_device_initialize, ompt_callback_device_initialize_t, 12) /* device initialize */ \ - macro (ompt_callback_device_finalize, ompt_callback_device_finalize_t, 13) /* device finalize */ \ - \ - macro (ompt_callback_device_load, ompt_callback_device_load_t, 14) /* device load */ \ - macro (ompt_callback_device_unload, ompt_callback_device_unload_t, 15) /* device unload */ \ - \ - /* Optional Events */ \ - macro (ompt_callback_sync_region_wait, ompt_callback_sync_region_t, 16) /* sync region wait begin or end */ \ - \ - macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \ - \ - macro (ompt_callback_task_dependences, ompt_callback_task_dependences_t, 18) /* report task dependences */ \ - macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \ - \ - macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \ - \ - macro (ompt_callback_master, ompt_callback_master_t, 21) /* task at master begin or end */ \ - \ - macro (ompt_callback_target_map, ompt_callback_target_map_t, 22) /* target map */ \ - \ - macro (ompt_callback_sync_region, ompt_callback_sync_region_t, 23) /* sync region begin or end */ \ - \ - macro (ompt_callback_lock_init, ompt_callback_mutex_acquire_t, 24) /* lock init */ \ - macro (ompt_callback_lock_destroy, ompt_callback_mutex_t, 25) /* lock destroy */ \ - \ - macro (ompt_callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26) /* mutex acquire */ \ - macro (ompt_callback_mutex_acquired, ompt_callback_mutex_t, 27) /* mutex acquired */ \ - \ - macro (ompt_callback_nest_lock, ompt_callback_nest_lock_t, 28) /* nest lock */ \ - \ - macro (ompt_callback_flush, ompt_callback_flush_t, 29) /* after executing flush */ \ - \ - macro (ompt_callback_cancel, ompt_callback_cancel_t, 30) /* cancel innermost binding region */ \ - \ - macro (ompt_callback_reduction, ompt_callback_sync_region_t, 31) /* reduction */ \ - \ - macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */ - - - -/***************************************************************************** - * data types - *****************************************************************************/ - -/*--------------------- - * identifiers - *---------------------*/ - -typedef uint64_t ompt_id_t; -#define ompt_id_none 0 - -typedef union ompt_data_t { - uint64_t value; /* data initialized by runtime to unique id */ - void *ptr; /* pointer under tool control */ -} ompt_data_t; - -static const ompt_data_t ompt_data_none = {0}; - -typedef uint64_t ompt_wait_id_t; -static const ompt_wait_id_t omp_wait_id_none = 0; - -typedef void ompt_device_t; - - -/*--------------------- - * dependences types - *---------------------*/ - -typedef enum ompt_task_dependence_type_t { - // a two bit field for the dependence type - ompt_task_dependence_type_in = 1, - ompt_task_dependence_type_out = 2, - ompt_task_dependence_type_inout = 3, - ompt_task_dependence_type_mutexinoutset = 4 -} ompt_task_dependence_type_t; - -typedef struct ompt_task_dependence_t { - void *variable_addr; - ompt_task_dependence_type_t dependence_type; -} ompt_task_dependence_t; - - -/***************************************************************************** - * enumerations for thread states and runtime events - *****************************************************************************/ - -/*--------------------- - * runtime states - *---------------------*/ - -typedef enum { -#define ompt_state_macro(state, code) state = code, - FOREACH_OMPT_STATE(ompt_state_macro) -#undef ompt_state_macro -} ompt_state_t; - -typedef enum ompt_frame_flag_t { - ompt_frame_runtime = 0x00, - ompt_frame_application = 0x01, - ompt_frame_cfa = 0x10, - ompt_frame_framepointer = 0x20, - ompt_frame_stackaddress = 0x30 -} ompt_frame_flag_t; - - -/*--------------------- - * runtime events - *---------------------*/ - -typedef enum ompt_callbacks_e{ -#define ompt_event_macro(event, callback, eventid) event = eventid, - FOREACH_OMPT_EVENT(ompt_event_macro) -#undef ompt_event_macro -} ompt_callbacks_t; - - -/*--------------------- - * set callback results - *---------------------*/ -typedef enum ompt_set_result_t { - ompt_set_error = 0, - ompt_set_never = 1, - ompt_set_sometimes = 2, - ompt_set_sometimes_paired = 3, - ompt_set_always = 4 -} ompt_set_result_t; - - -/*---------------------- - * mutex implementations - *----------------------*/ -typedef enum kmp_mutex_impl_t { -#define kmp_mutex_impl_macro(impl, code) impl = code, - FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro) -#undef kmp_mutex_impl_macro -} kmp_mutex_impl_t; - - -/***************************************************************************** - * callback signatures - *****************************************************************************/ - -/* initialization */ -typedef void (*ompt_interface_fn_t)(void); - -typedef ompt_interface_fn_t (*ompt_function_lookup_t)( - const char * /* entry point to look up */ -); - -/* threads */ -typedef enum ompt_thread_t { - ompt_thread_initial = 1, // start the enumeration at 1 - ompt_thread_worker = 2, - ompt_thread_other = 3, - ompt_thread_unknown = 4 -} ompt_thread_t; - -typedef struct ompt_frame_t { - ompt_data_t exit_frame; - ompt_data_t enter_frame; - int exit_frame_flags; - int enter_frame_flags; -} ompt_frame_t; -typedef enum ompt_parallel_flag_t { - ompt_parallel_invoker_program = 0x00000001, /* program invokes master task */ - ompt_parallel_invoker_runtime = 0x00000002, /* runtime invokes master task */ - ompt_parallel_league = 0x40000000, - ompt_parallel_team = 0x80000000 -} ompt_parallel_flag_t; - -typedef void (*ompt_callback_thread_begin_t) ( - ompt_thread_t thread_type, /* type of thread */ - ompt_data_t *thread_data /* data of thread */ -); - -typedef void (*ompt_callback_thread_end_t) ( - ompt_data_t *thread_data /* data of thread */ -); - -typedef void (*ompt_wait_callback_t) ( - ompt_wait_id_t wait_id /* wait data */ -); - -/* parallel and workshares */ -typedef enum ompt_scope_endpoint_t { - ompt_scope_begin = 1, - ompt_scope_end = 2 -} ompt_scope_endpoint_t; - - -/* implicit task */ -typedef void (*ompt_callback_implicit_task_t) ( - ompt_scope_endpoint_t endpoint, /* endpoint of implicit task */ - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *task_data, /* data of implicit task */ - unsigned int actual_parallelism, /* team size */ - unsigned int index /* thread number of calling thread */ -); - -typedef void (*ompt_callback_parallel_begin_t) ( - ompt_data_t *encountering_task_data, /* data of encountering task */ - const ompt_frame_t *encountering_task_frame, /* frame data of encountering task */ - ompt_data_t *parallel_data, /* data of parallel region */ - unsigned int requested_team_size, /* requested number of threads in team */ - int flag, /* flag for additional information */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef void (*ompt_callback_parallel_end_t) ( - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *encountering_task_data, /* data of encountering task */ - int flag, /* flag for additional information */ - const void *codeptr_ra /* return address of runtime call */ -); - -/* tasks */ -typedef enum ompt_task_flag_t { - ompt_task_initial = 0x1, - ompt_task_implicit = 0x2, - ompt_task_explicit = 0x4, - ompt_task_target = 0x8, - ompt_task_undeferred = 0x8000000, - ompt_task_untied = 0x10000000, - ompt_task_final = 0x20000000, - ompt_task_mergeable = 0x40000000, - ompt_task_merged = 0x80000000 -} ompt_task_flag_t; - -typedef enum ompt_task_status_t { - ompt_task_complete = 1, - ompt_task_yield = 2, - ompt_task_cancel = 3, - ompt_task_switch = 4 -} ompt_task_status_t; - -typedef void (*ompt_callback_task_schedule_t) ( - ompt_data_t *prior_task_data, /* data of prior task */ - ompt_task_status_t prior_task_status, /* status of prior task */ - ompt_data_t *next_task_data /* data of next task */ -); - -typedef void (*ompt_callback_task_create_t) ( - ompt_data_t *encountering_task_data, /* data of parent task */ - const ompt_frame_t *encountering_task_frame, /* frame data for parent task */ - ompt_data_t *new_task_data, /* data of created task */ - int flag, /* type of created task */ - int has_dependences, /* created task has dependences */ - const void *codeptr_ra /* return address of runtime call */ -); - -/* task dependences */ -typedef void (*ompt_callback_task_dependences_t) ( - ompt_data_t *task_data, /* data of task */ - const ompt_task_dependence_t *deps, /* dependences of task */ - int ndeps /* dependences count of task */ -); - -typedef void (*ompt_callback_task_dependence_t) ( - ompt_data_t *src_task_data, /* data of source task */ - ompt_data_t *sink_task_data /* data of sink task */ -); - -/* target and device */ -typedef enum ompt_target_t { - ompt_target = 1, - ompt_target_enter_data = 2, - ompt_target_exit_data = 3, - ompt_target_update = 4 -} ompt_target_t; - -typedef void (*ompt_callback_target_t) ( - ompt_target_t kind, - ompt_scope_endpoint_t endpoint, - uint64_t device_num, - ompt_data_t *task_data, - ompt_id_t target_id, - const void *codeptr_ra -); - -typedef enum ompt_target_data_op_t { - ompt_target_data_alloc = 1, - ompt_target_data_transfer_to_dev = 2, - ompt_target_data_transfer_from_dev = 3, - ompt_target_data_delete = 4 -} ompt_target_data_op_t; - -typedef void (*ompt_callback_target_data_op_t) ( - ompt_id_t target_id, - ompt_id_t host_op_id, - ompt_target_data_op_t optype, - void *src_addr, - int src_device_num, - void *dest_addr, - int dest_device_num, - size_t bytes, - const void *codeptr_ra -); - -typedef void (*ompt_callback_target_submit_t) ( - ompt_id_t target_id, - ompt_id_t host_op_id, - unsigned int requested_num_teams -); - -typedef void (*ompt_callback_target_map_t) ( - ompt_id_t target_id, - unsigned int nitems, - void **host_addr, - void **device_addr, - size_t *bytes, - unsigned int *mapping_flags, - const void *codeptr_ra -); - -typedef void (*ompt_callback_device_initialize_t) ( - uint64_t device_num, - const char *type, - ompt_device_t *device, - ompt_function_lookup_t lookup, - const char *documentation -); - -typedef void (*ompt_callback_device_finalize_t) ( - uint64_t device_num -); - -typedef void (*ompt_callback_device_load_t) ( - uint64_t device_num, - const char * filename, - int64_t offset_in_file, - void * vma_in_file, - size_t bytes, - void * host_addr, - void * device_addr, - uint64_t module_id -); - -#define ompt_addr_unknown ((void *) ~0) - -typedef void (*ompt_callback_device_unload_t) ( - uint64_t device_num, - uint64_t module_id -); - -/* control_tool */ -typedef int (*ompt_callback_control_tool_t) ( - uint64_t command, /* command of control call */ - uint64_t modifier, /* modifier of control call */ - void *arg, /* argument of control call */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef enum ompt_mutex_t { - ompt_mutex_lock = 1, - ompt_mutex_nest_lock = 2, - ompt_mutex_critical = 3, - ompt_mutex_atomic = 4, - ompt_mutex_ordered = 5 -} ompt_mutex_t; - -typedef void (*ompt_callback_mutex_acquire_t) ( - ompt_mutex_t kind, /* mutex kind */ - unsigned int hint, /* mutex hint */ - unsigned int impl, /* mutex implementation */ - ompt_wait_id_t wait_id, /* id of object being awaited */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef void (*ompt_callback_mutex_t) ( - ompt_mutex_t kind, /* mutex kind */ - ompt_wait_id_t wait_id, /* id of object being awaited */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef void (*ompt_callback_nest_lock_t) ( - ompt_scope_endpoint_t endpoint, /* endpoint of nested lock */ - ompt_wait_id_t wait_id, /* id of object being awaited */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef void (*ompt_callback_master_t) ( - ompt_scope_endpoint_t endpoint, /* endpoint of master region */ - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *task_data, /* data of task */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef enum ompt_work_t { - ompt_work_loop = 1, - ompt_work_sections = 2, - ompt_work_single_executor = 3, - ompt_work_single_other = 4, - ompt_work_workshare = 5, - ompt_work_distribute = 6, - ompt_work_taskloop = 7 -} ompt_work_t; - -typedef void (*ompt_callback_work_t) ( - ompt_work_t wstype, /* type of work region */ - ompt_scope_endpoint_t endpoint, /* endpoint of work region */ - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *task_data, /* data of task */ - uint64_t count, /* quantity of work */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef enum ompt_sync_region_t { - ompt_sync_region_barrier = 1, - ompt_sync_region_barrier_implicit = 2, - ompt_sync_region_barrier_explicit = 3, - ompt_sync_region_barrier_implementation = 4, - ompt_sync_region_taskwait = 5, - ompt_sync_region_taskgroup = 6, - ompt_sync_region_reduction = 7 -} ompt_sync_region_t; - -typedef void (*ompt_callback_sync_region_t) ( - ompt_sync_region_t kind, /* kind of sync region */ - ompt_scope_endpoint_t endpoint, /* endpoint of sync region */ - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *task_data, /* data of task */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef enum ompt_cancel_flag_t { - ompt_cancel_parallel = 0x01, - ompt_cancel_sections = 0x02, - ompt_cancel_loop = 0x04, - ompt_cancel_taskgroup = 0x08, - ompt_cancel_activated = 0x10, - ompt_cancel_detected = 0x20, - ompt_cancel_discarded_task = 0x40 -} ompt_cancel_flag_t; - -typedef void (*ompt_callback_cancel_t) ( - ompt_data_t *task_data, /* data of task */ - int flags, /* cancel flags */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef void (*ompt_callback_flush_t) ( - ompt_data_t *thread_data, /* data of thread */ - const void *codeptr_ra /* return address of runtime call */ -); - -typedef enum ompt_dispatch_t { - ompt_dispatch_iteration = 1, - ompt_dispatch_section = 2 -} ompt_dispatch_t; - -typedef void (*ompt_callback_dispatch_t) ( - ompt_data_t *parallel_data, - ompt_data_t *task_data, - ompt_dispatch_t kind, - ompt_data_t instance -); - -/**************************************************************************** - * ompt API - ***************************************************************************/ - -#ifdef __cplusplus -extern "C" { -#endif - -#define OMPT_API_FNTYPE(fn) fn##_t - -#define OMPT_API_FUNCTION(return_type, fn, args) \ - typedef return_type (*OMPT_API_FNTYPE(fn)) args - - - -/**************************************************************************** - * INQUIRY FUNCTIONS - ***************************************************************************/ - -/* state */ -OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( - ompt_wait_id_t *wait_id -)); - -/* thread */ -OMPT_API_FUNCTION(ompt_data_t*, ompt_get_thread_data, (void)); - -/* parallel region */ -OMPT_API_FUNCTION(int, ompt_get_parallel_info, ( - int ancestor_level, - ompt_data_t **parallel_data, - int *team_size -)); - -/* task */ -OMPT_API_FUNCTION(int, ompt_get_task_info, ( - int ancestor_level, - int *type, - ompt_data_t **task_data, - ompt_frame_t **task_frame, - ompt_data_t **parallel_data, - int *thread_num -)); - -OMPT_API_FUNCTION(int, ompt_get_task_memory, ( - void **addr, - size_t *size, - int block -)); - -/* procs */ -OMPT_API_FUNCTION(int, ompt_get_num_procs, (void)); - -/* places */ -OMPT_API_FUNCTION(int, ompt_get_num_places, (void)); - -OMPT_API_FUNCTION(int, ompt_get_place_proc_ids, ( - int place_num, - int ids_size, - int *ids -)); - -OMPT_API_FUNCTION(int, ompt_get_place_num, (void)); - -OMPT_API_FUNCTION(int, ompt_get_partition_place_nums, ( - int place_nums_size, - int *place_nums -)); - -/* proc_id */ -OMPT_API_FUNCTION(int, ompt_get_proc_id, (void)); - - -/**************************************************************************** - * INITIALIZATION FUNCTIONS - ***************************************************************************/ - -OMPT_API_FUNCTION(int, ompt_initialize, ( - ompt_function_lookup_t lookup, - ompt_data_t *tool_data -)); - -OMPT_API_FUNCTION(void, ompt_finalize, ( - ompt_data_t *tool_data -)); - -typedef struct ompt_start_tool_result_t { - ompt_initialize_t initialize; - ompt_finalize_t finalize; - ompt_data_t tool_data; -} ompt_start_tool_result_t; - -/* initialization interface to be defined by tool */ -#ifdef _WIN32 -__declspec(dllexport) -#endif -ompt_start_tool_result_t * ompt_start_tool( - unsigned int omp_version, - const char * runtime_version -); - -typedef void (*ompt_callback_t)(void); - -OMPT_API_FUNCTION(int, ompt_set_callback, ( - ompt_callbacks_t which, - ompt_callback_t callback -)); - -OMPT_API_FUNCTION(int, ompt_get_callback, ( - ompt_callbacks_t which, - ompt_callback_t *callback -)); - - - -/**************************************************************************** - * MISCELLANEOUS FUNCTIONS - ***************************************************************************/ - -/* state enumeration */ -OMPT_API_FUNCTION(int, ompt_enumerate_states, ( - int current_state, - int *next_state, - const char **next_state_name -)); - -/* mutex implementation enumeration */ -OMPT_API_FUNCTION(int, ompt_enumerate_mutex_impls, ( - int current_impl, - int *next_impl, - const char **next_impl_name -)); - -/* get_unique_id */ -OMPT_API_FUNCTION(uint64_t, ompt_get_unique_id, (void)); - -/* finalize tool */ -OMPT_API_FUNCTION(void, ompt_finalize_tool, (void)); - -#ifdef __cplusplus -}; -#endif - -/**************************************************************************** - * TARGET - ***************************************************************************/ - - OMPT_API_FUNCTION(int, ompt_get_target_info, ( - uint64_t *device_num, - ompt_id_t *target_id, - ompt_id_t *host_op_id -)); - - OMPT_API_FUNCTION(int, ompt_get_num_devices, (void)); - -#endif /* __OMPT__ */ Index: openmp/trunk/runtime/src/kmp_barrier.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_barrier.cpp +++ openmp/trunk/runtime/src/kmp_barrier.cpp @@ -1910,7 +1910,7 @@ #endif if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, task_data, 0, ds_tid); + ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } } #endif Index: openmp/trunk/runtime/src/kmp_csupport.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_csupport.cpp +++ openmp/trunk/runtime/src/kmp_csupport.cpp @@ -511,7 +511,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, - OMPT_CUR_TASK_INFO(this_thr)->thread_num); + OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit); } // reset clear the task id only after unlinking the task @@ -1292,7 +1292,7 @@ return kmp_mutex_impl_speculative; #endif default: - return ompt_mutex_impl_none; + return kmp_mutex_impl_none; } ilock = KMP_LOOKUP_I_LOCK(user_lock); } @@ -1316,7 +1316,7 @@ case locktag_nested_drdpa: return kmp_mutex_impl_queuing; default: - return ompt_mutex_impl_none; + return kmp_mutex_impl_none; } } #else @@ -1339,7 +1339,7 @@ return kmp_mutex_impl_speculative; #endif default: - return ompt_mutex_impl_none; + return kmp_mutex_impl_none; } } #endif // KMP_USE_DYNAMIC_LOCK Index: openmp/trunk/runtime/src/kmp_gsupport.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_gsupport.cpp +++ openmp/trunk/runtime/src/kmp_gsupport.cpp @@ -400,7 +400,7 @@ ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid)); + &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? task_info->thread_num = __kmp_tid_from_gtid(gtid); } thr->th.ompt_thread_info.state = ompt_state_work_parallel; Index: openmp/trunk/runtime/src/kmp_runtime.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_runtime.cpp +++ openmp/trunk/runtime/src/kmp_runtime.cpp @@ -1429,7 +1429,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), - OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid)); + OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(this_thr) ->thread_num = __kmp_tid_from_gtid(global_tid); } @@ -1595,7 +1595,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); + implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); } @@ -1625,7 +1625,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, implicit_task_data, 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num); + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } __ompt_lw_taskteam_unlink(master_th); @@ -1807,7 +1807,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid)); + &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); } @@ -1837,7 +1837,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num); + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } __ompt_lw_taskteam_unlink(master_th); @@ -1908,7 +1908,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); + implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); } @@ -1937,7 +1937,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num); + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); @@ -2541,7 +2541,7 @@ int ompt_team_size = team->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, - OMPT_CUR_TASK_INFO(master_th)->thread_num); + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } task_info->frame.exit_frame = ompt_data_none; @@ -7041,7 +7041,7 @@ ompt_team_size = team->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, - __kmp_tid_from_gtid(gtid)); + __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); } #endif @@ -7291,7 +7291,7 @@ #endif if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, task_data, 0, ds_tid); + ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } } #endif Index: openmp/trunk/runtime/src/kmp_taskdeps.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_taskdeps.cpp +++ openmp/trunk/runtime/src/kmp_taskdeps.cpp @@ -485,43 +485,43 @@ #if OMPT_OPTIONAL /* OMPT grab all dependences if requested by the tool */ if (ndeps + ndeps_noalias > 0 && - ompt_enabled.ompt_callback_task_dependences) { + ompt_enabled.ompt_callback_dependences) { kmp_int32 i; new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias; new_taskdata->ompt_task_info.deps = - (ompt_task_dependence_t *)KMP_OMPT_DEPS_ALLOC( - thread, (ndeps + ndeps_noalias) * sizeof(ompt_task_dependence_t)); + (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC( + thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t)); KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL); for (i = 0; i < ndeps; i++) { - new_taskdata->ompt_task_info.deps[i].variable_addr = + new_taskdata->ompt_task_info.deps[i].variable.ptr = (void *)dep_list[i].base_addr; if (dep_list[i].flags.in && dep_list[i].flags.out) new_taskdata->ompt_task_info.deps[i].dependence_type = - ompt_task_dependence_type_inout; + ompt_dependence_type_inout; else if (dep_list[i].flags.out) new_taskdata->ompt_task_info.deps[i].dependence_type = - ompt_task_dependence_type_out; + ompt_dependence_type_out; else if (dep_list[i].flags.in) new_taskdata->ompt_task_info.deps[i].dependence_type = - ompt_task_dependence_type_in; + ompt_dependence_type_in; } for (i = 0; i < ndeps_noalias; i++) { - new_taskdata->ompt_task_info.deps[ndeps + i].variable_addr = + new_taskdata->ompt_task_info.deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr; if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type = - ompt_task_dependence_type_inout; + ompt_dependence_type_inout; else if (noalias_dep_list[i].flags.out) new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type = - ompt_task_dependence_type_out; + ompt_dependence_type_out; else if (noalias_dep_list[i].flags.in) new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type = - ompt_task_dependence_type_in; + ompt_dependence_type_in; } - ompt_callbacks.ompt_callback(ompt_callback_task_dependences)( + ompt_callbacks.ompt_callback(ompt_callback_dependences)( &(new_taskdata->ompt_task_info.task_data), new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps); /* We can now free the allocated memory for the dependencies */ Index: openmp/trunk/runtime/src/kmp_wait_release.h =================================================================== --- openmp/trunk/runtime/src/kmp_wait_release.h +++ openmp/trunk/runtime/src/kmp_wait_release.h @@ -140,7 +140,7 @@ if (!KMP_MASTER_TID(ds_tid)) { if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, tId, 0, ds_tid); + ompt_scope_end, NULL, tId, 0, ds_tid, ompt_task_implicit); } // return to idle state this_thr->th.ompt_thread_info.state = ompt_state_idle; Index: openmp/trunk/runtime/src/ompt-event-specific.h =================================================================== --- openmp/trunk/runtime/src/ompt-event-specific.h +++ openmp/trunk/runtime/src/ompt-event-specific.h @@ -78,11 +78,11 @@ #define ompt_callback_mutex_released_implemented ompt_event_MAY_ALWAYS_OPTIONAL #if OMP_40_ENABLED -#define ompt_callback_task_dependences_implemented \ +#define ompt_callback_dependences_implemented \ ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_task_dependence_implemented ompt_event_MAY_ALWAYS_OPTIONAL #else -#define ompt_callback_task_dependences_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_dependences_implemented ompt_event_UNIMPLEMENTED #define ompt_callback_task_dependence_implemented ompt_event_UNIMPLEMENTED #endif /* OMP_40_ENABLED */ Index: openmp/trunk/runtime/src/ompt-general.cpp =================================================================== --- openmp/trunk/runtime/src/ompt-general.cpp +++ openmp/trunk/runtime/src/ompt-general.cpp @@ -327,6 +327,8 @@ #endif } +extern "C" int omp_get_initial_device(void); + void ompt_post_init() { //-------------------------------------------------- // Execute the post-initialization logic only once. @@ -343,7 +345,7 @@ //-------------------------------------------------- if (ompt_start_tool_result) { ompt_enabled.enabled = !!ompt_start_tool_result->initialize( - ompt_fn_lookup, &(ompt_start_tool_result->tool_data)); + ompt_fn_lookup, omp_get_initial_device(), &(ompt_start_tool_result->tool_data)); if (!ompt_enabled.enabled) { // tool not enabled, zero out the bitmap, and done @@ -422,7 +424,7 @@ * callbacks ****************************************************************************/ -OMPT_API_ROUTINE int ompt_set_callback(ompt_callbacks_t which, +OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which, ompt_callback_t callback) { switch (which) { @@ -482,8 +484,8 @@ team_size); } -OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *wait_id) { - ompt_state_t thread_state = __ompt_get_state_internal(wait_id); +OMPT_API_ROUTINE int ompt_get_state(ompt_wait_id_t *wait_id) { + int thread_state = __ompt_get_state_internal(wait_id); if (thread_state == ompt_state_undefined) { thread_state = ompt_state_work_serial; Index: openmp/trunk/runtime/src/ompt-internal.h =================================================================== --- openmp/trunk/runtime/src/ompt-internal.h +++ openmp/trunk/runtime/src/ompt-internal.h @@ -15,7 +15,7 @@ #define __OMPT_INTERNAL_H__ #include "ompt-event-specific.h" -#include "ompt.h" +#include "omp-tools.h" #define OMPT_VERSION 1 @@ -60,7 +60,7 @@ int thread_num; #if OMP_40_ENABLED int ndeps; - ompt_task_dependence_t *deps; + ompt_dependence_t *deps; #endif /* OMP_40_ENABLED */ } ompt_task_info_t; Index: openmp/trunk/runtime/src/ompt-specific.cpp =================================================================== --- openmp/trunk/runtime/src/ompt-specific.cpp +++ openmp/trunk/runtime/src/ompt-specific.cpp @@ -214,7 +214,7 @@ ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable; } -ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *omp_wait_id) { +int __ompt_get_state_internal(ompt_wait_id_t *omp_wait_id) { kmp_info_t *ti = ompt_get_thread(); if (ti) { Index: openmp/trunk/runtime/test/lit.cfg =================================================================== --- openmp/trunk/runtime/test/lit.cfg +++ openmp/trunk/runtime/test/lit.cfg @@ -127,7 +127,7 @@ config.substitutions.append(("%python", '"%s"' % (sys.executable))) if config.has_ompt: - config.substitutions.append(("FileCheck", config.test_filecheck)) + config.substitutions.append(("FileCheck", "tee %%s.out | %s" % config.test_filecheck)) config.substitutions.append(("%sort-threads", "sort -n -s")) if config.operating_system == 'Windows': # No such environment variable on Windows. Index: openmp/trunk/runtime/test/ompt/callback.h =================================================================== --- openmp/trunk/runtime/test/ompt/callback.h +++ openmp/trunk/runtime/test/ompt/callback.h @@ -8,7 +8,7 @@ #endif #include #include -#include +#include #include "ompt-signal.h" // Used to detect architecture @@ -23,10 +23,13 @@ static const char* ompt_task_status_t_values[] = { NULL, - "ompt_task_complete", - "ompt_task_yield", - "ompt_task_cancel", - "ompt_task_others" + "ompt_task_complete", // 1 + "ompt_task_yield", // 2 + "ompt_task_cancel", // 3 + "ompt_task_detach", // 4 + "ompt_task_early_fulfill", // 5 + "ompt_task_late_fulfill", // 6 + "ompt_task_switch" // 7 }; static const char* ompt_cancel_flag_t_values[] = { "ompt_cancel_parallel", @@ -439,7 +442,8 @@ ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int team_size, - unsigned int thread_num) + unsigned int thread_num, + int flags) { switch(endpoint) { @@ -651,9 +655,9 @@ } static void -on_ompt_callback_task_dependences( +on_ompt_callback_dependences( ompt_data_t *task_data, - const ompt_task_dependence_t *deps, + const ompt_dependence_t *deps, int ndeps) { printf("%" PRIu64 ": ompt_event_task_dependences: task_id=%" PRIu64 ", deps=%p, ndeps=%d\n", ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps); @@ -710,6 +714,7 @@ int ompt_initialize( ompt_function_lookup_t lookup, + int initial_device_num, ompt_data_t *tool_data) { ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); @@ -747,7 +752,7 @@ register_callback(ompt_callback_parallel_end); register_callback(ompt_callback_task_create); register_callback(ompt_callback_task_schedule); - register_callback(ompt_callback_task_dependences); + register_callback(ompt_callback_dependences); register_callback(ompt_callback_task_dependence); register_callback(ompt_callback_thread_begin); register_callback(ompt_callback_thread_end); @@ -760,6 +765,9 @@ printf("0: ompt_event_runtime_shutdown\n"); } +#ifdef __cplusplus +extern "C" { +#endif ompt_start_tool_result_t* ompt_start_tool( unsigned int omp_version, const char *runtime_version) @@ -767,3 +775,6 @@ static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; return &ompt_start_tool_result; } +#ifdef __cplusplus +} +#endif Index: openmp/trunk/runtime/test/ompt/cancel/cancel_taskgroup.c =================================================================== --- openmp/trunk/runtime/test/ompt/cancel/cancel_taskgroup.c +++ openmp/trunk/runtime/test/ompt/cancel/cancel_taskgroup.c @@ -75,7 +75,7 @@ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[CANCEL_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_switch=7 // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[CANCEL_TASK_ID]], flags=ompt_cancel_taskgroup|ompt_cancel_activated=24, codeptr_ra={{0x[0-f]*}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[CANCEL_TASK_ID]], second_task_id=[[PARENT_TASK_ID]], prior_task_status=ompt_task_cancel=3 Index: openmp/trunk/runtime/test/ompt/loadtool/tool_available/tool_available.c =================================================================== --- openmp/trunk/runtime/test/ompt/loadtool/tool_available/tool_available.c +++ openmp/trunk/runtime/test/ompt/loadtool/tool_available/tool_available.c @@ -49,7 +49,7 @@ #ifdef TOOL #include -#include +#include int ompt_initialize( ompt_function_lookup_t lookup, Index: openmp/trunk/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c =================================================================== --- openmp/trunk/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c +++ openmp/trunk/runtime/test/ompt/loadtool/tool_available_search/tool_available_search.c @@ -18,7 +18,7 @@ #ifdef CODE #include "stdio.h" #include "omp.h" -#include "ompt.h" +#include "omp-tools.h" int main() { @@ -52,7 +52,7 @@ #ifdef TOOL -#include +#include #include "stdio.h" #ifdef SECOND_TOOL Index: openmp/trunk/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c =================================================================== --- openmp/trunk/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c +++ openmp/trunk/runtime/test/ompt/loadtool/tool_not_available/tool_not_available.c @@ -28,7 +28,7 @@ #ifdef CODE #include "stdio.h" #include "omp.h" -#include "ompt.h" +#include "omp-tools.h" int main() { @@ -56,7 +56,7 @@ #ifdef TOOL -#include +#include #include "stdio.h" ompt_start_tool_result_t* ompt_start_tool( Index: openmp/trunk/runtime/test/ompt/parallel/parallel_if0.c =================================================================== --- openmp/trunk/runtime/test/ompt/parallel/parallel_if0.c +++ openmp/trunk/runtime/test/ompt/parallel/parallel_if0.c @@ -57,7 +57,7 @@ // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_switch=7 // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} Index: openmp/trunk/runtime/test/ompt/parallel/serialized.c =================================================================== --- openmp/trunk/runtime/test/ompt/parallel/serialized.c +++ openmp/trunk/runtime/test/ompt/parallel/serialized.c @@ -57,7 +57,7 @@ // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_switch=7 // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} Index: openmp/trunk/runtime/test/ompt/synchronization/barrier/implicit_task_data.c =================================================================== --- openmp/trunk/runtime/test/ompt/synchronization/barrier/implicit_task_data.c +++ openmp/trunk/runtime/test/ompt/synchronization/barrier/implicit_task_data.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include static const char* ompt_thread_t_values[] = { NULL, Index: openmp/trunk/runtime/test/ompt/tasks/explicit_task.c =================================================================== --- openmp/trunk/runtime/test/ompt/tasks/explicit_task.c +++ openmp/trunk/runtime/test/ompt/tasks/explicit_task.c @@ -1,4 +1,4 @@ -// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | tee %s.out | FileCheck %s // REQUIRES: ompt // UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 #define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN Index: openmp/trunk/runtime/test/ompt/tasks/taskyield.c =================================================================== --- openmp/trunk/runtime/test/ompt/tasks/taskyield.c +++ openmp/trunk/runtime/test/ompt/tasks/taskyield.c @@ -51,7 +51,7 @@ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[MAIN_TASK]], prior_task_status=ompt_task_yield=2 // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[MAIN_TASK]], second_task_id=[[IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 - // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_switch=7 // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[WORKER_TASK]], second_task_id={{[0-9]+}}, prior_task_status=ompt_task_complete=1