Index: runtime/src/include/50/ompt.h.var =================================================================== --- runtime/src/include/50/ompt.h.var +++ runtime/src/include/50/ompt.h.var @@ -21,8 +21,6 @@ #include #include - - /***************************************************************************** * iteration macros *****************************************************************************/ @@ -96,7 +94,7 @@ #define FOREACH_KMP_MUTEX_IMPL(macro) \ - macro (ompt_mutex_impl_none, 0) /* unknown implementation */ \ + macro (kmp_mutex_impl_none, 0) /* unknown implementation */ \ macro (kmp_mutex_impl_spin, 1) /* based on spin */ \ macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ @@ -131,7 +129,7 @@ \ macro (ompt_callback_mutex_released, ompt_callback_mutex_t, 17) /* mutex released */ \ \ - macro (ompt_callback_task_dependences, ompt_callback_task_dependences_t, 18) /* report task dependences */ \ + macro (ompt_callback_dependences, ompt_callback_dependences_t, 18) /* report task dependences */ \ macro (ompt_callback_task_dependence, ompt_callback_task_dependence_t, 19) /* report task dependence */ \ \ macro (ompt_callback_work, ompt_callback_work_t, 20) /* task at work begin or end */ \ @@ -158,63 +156,215 @@ \ macro (ompt_callback_dispatch, ompt_callback_dispatch_t, 32) /* dispatch of work */ +/***************************************************************************** + * implementation specific types + *****************************************************************************/ +typedef enum kmp_mutex_impl_t { +#define kmp_mutex_impl_macro(impl, code) impl = code, + FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro) +#undef kmp_mutex_impl_macro +} kmp_mutex_impl_t; /***************************************************************************** - * data types + * definitions generated from spec *****************************************************************************/ -/*--------------------- - * identifiers - *---------------------*/ +typedef enum ompt_callbacks_t { + ompt_callback_thread_begin = 1, + ompt_callback_thread_end = 2, + ompt_callback_parallel_begin = 3, + ompt_callback_parallel_end = 4, + ompt_callback_task_create = 5, + ompt_callback_task_schedule = 6, + ompt_callback_implicit_task = 7, + ompt_callback_target = 8, + ompt_callback_target_data_op = 9, + ompt_callback_target_submit = 10, + ompt_callback_control_tool = 11, + ompt_callback_device_initialize = 12, + ompt_callback_device_finalize = 13, + ompt_callback_device_load = 14, + ompt_callback_device_unload = 15, + ompt_callback_sync_region_wait = 16, + ompt_callback_mutex_released = 17, + ompt_callback_dependences = 18, + ompt_callback_task_dependence = 19, + ompt_callback_work = 20, + ompt_callback_master = 21, + ompt_callback_target_map = 22, + ompt_callback_sync_region = 23, + ompt_callback_lock_init = 24, + ompt_callback_lock_destroy = 25, + ompt_callback_mutex_acquire = 26, + ompt_callback_mutex_acquired = 27, + ompt_callback_nest_lock = 28, + ompt_callback_flush = 29, + ompt_callback_cancel = 30, + ompt_callback_reduction = 31, + ompt_callback_dispatch = 32 +} ompt_callbacks_t; + +typedef enum ompt_record_t { + ompt_record_ompt = 1, + ompt_record_native = 2, + ompt_record_invalid = 3 +} ompt_record_t; + +typedef enum ompt_record_native_t { + ompt_record_native_info = 1, + ompt_record_native_event = 2 +} ompt_record_native_t; + +typedef enum ompt_set_result_t { + ompt_set_error = 0, + ompt_set_never = 1, + ompt_set_impossible = 2, + ompt_set_sometimes = 3, + ompt_set_sometimes_paired = 4, + ompt_set_always = 5 +} ompt_set_result_t; typedef uint64_t ompt_id_t; -#define ompt_id_none 0 -typedef union ompt_data_t { - uint64_t value; /* data initialized by runtime to unique id */ - void *ptr; /* pointer under tool control */ -} ompt_data_t; +typedef uint64_t ompt_device_time_t; -static const ompt_data_t ompt_data_none = {0}; +typedef uint64_t ompt_buffer_cursor_t; -typedef uint64_t ompt_wait_id_t; -static const ompt_wait_id_t omp_wait_id_none = 0; +typedef enum ompt_thread_t { + ompt_thread_initial = 1, + ompt_thread_worker = 2, + ompt_thread_other = 3, + ompt_thread_unknown = 4 +} ompt_thread_t; -typedef void ompt_device_t; +typedef enum ompt_scope_endpoint_t { + ompt_scope_begin = 1, + ompt_scope_end = 2 +} ompt_scope_endpoint_t; +typedef enum ompt_dispatch_t { + ompt_dispatch_iteration = 1, + ompt_dispatch_section = 2 +} ompt_dispatch_t; -/*--------------------- - * dependences types - *---------------------*/ +typedef enum ompt_sync_region_t { + ompt_sync_region_barrier = 1, + ompt_sync_region_barrier_implicit = 2, + ompt_sync_region_barrier_explicit = 3, + ompt_sync_region_barrier_implementation = 4, + ompt_sync_region_taskwait = 5, + ompt_sync_region_taskgroup = 6, + ompt_sync_region_reduction = 7 +} ompt_sync_region_t; -typedef enum ompt_task_dependence_type_t { - // a two bit field for the dependence type - ompt_task_dependence_type_in = 1, - ompt_task_dependence_type_out = 2, - ompt_task_dependence_type_inout = 3, - ompt_task_dependence_type_mutexinoutset = 4 -} ompt_task_dependence_type_t; +typedef enum ompt_target_data_op_t { + ompt_target_data_alloc = 1, + ompt_target_data_transfer_to_device = 2, + ompt_target_data_transfer_from_device = 3, + ompt_target_data_delete = 4, + ompt_target_data_associate = 5, + ompt_target_data_disassociate = 6 +} ompt_target_data_op_t; -typedef struct ompt_task_dependence_t { - void *variable_addr; - ompt_task_dependence_type_t dependence_type; -} ompt_task_dependence_t; +typedef enum ompt_work_t { + ompt_work_loop = 1, + ompt_work_sections = 2, + ompt_work_single_executor = 3, + ompt_work_single_other = 4, + ompt_work_workshare = 5, + ompt_work_distribute = 6, + ompt_work_taskloop = 7 +} ompt_work_t; +typedef enum ompt_mutex_t { + ompt_mutex_lock = 1, + ompt_mutex_test_lock = 2, + ompt_mutex_nest_lock = 3, + ompt_mutex_test_nest_lock = 4, + ompt_mutex_critical = 5, + ompt_mutex_atomic = 6, + ompt_mutex_ordered = 7 +} ompt_mutex_t; -/***************************************************************************** - * enumerations for thread states and runtime events - *****************************************************************************/ +typedef enum ompt_native_mon_flag_t { + ompt_native_data_motion_explicit = 0x01, + ompt_native_data_motion_implicit = 0x02, + ompt_native_kernel_invocation = 0x04, + ompt_native_kernel_execution = 0x08, + ompt_native_driver = 0x10, + ompt_native_runtime = 0x20, + ompt_native_overhead = 0x40, + ompt_native_idleness = 0x80 +} ompt_native_mon_flag_t; -/*--------------------- - * runtime states - *---------------------*/ +typedef enum ompt_task_flag_t { + ompt_task_initial = 0x00000001, + ompt_task_implicit = 0x00000002, + ompt_task_explicit = 0x00000004, + ompt_task_target = 0x00000008, + ompt_task_undeferred = 0x08000000, + ompt_task_untied = 0x10000000, + ompt_task_final = 0x20000000, + ompt_task_mergeable = 0x40000000, + ompt_task_merged = 0x80000000 +} ompt_task_flag_t; -typedef enum { -#define ompt_state_macro(state, code) state = code, - FOREACH_OMP_STATE(ompt_state_macro) -#undef ompt_state_macro -} ompt_state_t; +typedef enum ompt_task_status_t { + ompt_task_complete = 1, + ompt_task_yield = 2, + ompt_task_cancel = 3, + ompt_task_detach = 4, + ompt_task_early_fulfill = 5, + ompt_task_late_fulfill = 6, + ompt_task_switch = 7 +} ompt_task_status_t; + +typedef enum ompt_target_t { + ompt_target = 1, + ompt_target_enter_data = 2, + ompt_target_exit_data = 3, + ompt_target_update = 4 +} ompt_target_t; + +typedef enum ompt_parallel_flag_t { + ompt_parallel_invoker_program = 0x00000001, + ompt_parallel_invoker_runtime = 0x00000002, + ompt_parallel_league = 0x40000000, + ompt_parallel_team = 0x80000000 +} ompt_parallel_flag_t; + +typedef enum ompt_target_map_flag_t { + ompt_target_map_flag_to = 0x01, + ompt_target_map_flag_from = 0x02, + ompt_target_map_flag_alloc = 0x04, + ompt_target_map_flag_release = 0x08, + ompt_target_map_flag_delete = 0x10, + ompt_target_map_flag_implicit = 0x20 +} ompt_target_map_flag_t; + +typedef enum ompt_dependence_type_t { + ompt_dependence_type_in = 1, + ompt_dependence_type_out = 2, + ompt_dependence_type_inout = 3, + ompt_dependence_type_mutexinoutset = 4, + ompt_dependence_type_source = 5, + ompt_dependence_type_sink = 6 +} ompt_dependence_type_t; + +typedef enum ompt_cancel_flag_t { + ompt_cancel_parallel = 0x01, + ompt_cancel_sections = 0x02, + ompt_cancel_loop = 0x04, + ompt_cancel_taskgroup = 0x08, + ompt_cancel_activated = 0x10, + ompt_cancel_detected = 0x20, + ompt_cancel_discarded_task = 0x40 +} ompt_cancel_flag_t; + +typedef uint64_t ompt_hwid_t; + +typedef uint64_t ompt_wait_id_t; typedef enum ompt_frame_flag_t { ompt_frame_runtime = 0x00, @@ -224,58 +374,84 @@ ompt_frame_stackaddress = 0x30 } ompt_frame_flag_t; +typedef enum ompt_state_t { + ompt_state_work_serial = 0x000, + ompt_state_work_parallel = 0x001, + ompt_state_work_reduction = 0x002, + + ompt_state_wait_barrier = 0x010, + ompt_state_wait_barrier_implicit_parallel = 0x011, + ompt_state_wait_barrier_implicit_workshare = 0x012, + ompt_state_wait_barrier_implicit = 0x013, + ompt_state_wait_barrier_explicit = 0x014, + + ompt_state_wait_taskwait = 0x020, + ompt_state_wait_taskgroup = 0x021, + + ompt_state_wait_mutex = 0x040, + ompt_state_wait_lock = 0x041, + ompt_state_wait_critical = 0x042, + ompt_state_wait_atomic = 0x043, + ompt_state_wait_ordered = 0x044, + + ompt_state_wait_target = 0x080, + ompt_state_wait_target_map = 0x081, + ompt_state_wait_target_update = 0x082, + + ompt_state_idle = 0x100, + ompt_state_overhead = 0x101, + ompt_state_undefined = 0x102 +} ompt_state_t; -/*--------------------- - * runtime events - *---------------------*/ +typedef uint64_t (*ompt_get_unique_id_t) (void); -typedef enum ompt_callbacks_e{ -#define ompt_event_macro(event, callback, eventid) event = eventid, - FOREACH_OMPT_EVENT(ompt_event_macro) -#undef ompt_event_macro -} ompt_callbacks_t; +typedef uint64_t ompd_size_t; +typedef uint64_t ompd_wait_id_t; -/*--------------------- - * set callback results - *---------------------*/ -typedef enum ompt_set_result_t { - ompt_set_error = 0, - ompt_set_never = 1, - ompt_set_sometimes = 2, - ompt_set_sometimes_paired = 3, - ompt_set_always = 4 -} ompt_set_result_t; +typedef uint64_t ompd_addr_t; +typedef int64_t ompd_word_t; +typedef uint64_t ompd_seg_t; +typedef uint64_t ompd_device_t; -/*---------------------- - * mutex implementations - *----------------------*/ -typedef enum kmp_mutex_impl_t { -#define kmp_mutex_impl_macro(impl, code) impl = code, - FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro) -#undef kmp_mutex_impl_macro -} kmp_mutex_impl_t; +typedef uint64_t ompd_thread_id_t; +typedef enum ompd_scope_t { + ompd_scope_global = 1, + ompd_scope_address_space = 2, + ompd_scope_thread = 3, + ompd_scope_parallel = 4, + ompd_scope_implicit_task = 5, + ompd_scope_task = 6 +} ompd_scope_t; -/***************************************************************************** - * callback signatures - *****************************************************************************/ +typedef uint64_t ompd_icv_id_t; -/* initialization */ -typedef void (*ompt_interface_fn_t)(void); +typedef enum ompd_rc_t { + ompd_rc_ok = 0, + ompd_rc_unavailable = 1, + ompd_rc_stale_handle = 2, + ompd_rc_bad_input = 3, + ompd_rc_error = 4, + ompd_rc_unsupported = 5, + ompd_rc_needs_state_tracking = 6, + ompd_rc_incompatible = 7, + ompd_rc_device_read_error = 8, + ompd_rc_device_write_error = 9, + ompd_rc_nomem = 10, +} ompd_rc_t; -typedef ompt_interface_fn_t (*ompt_function_lookup_t)( - const char * /* entry point to look up */ +typedef void (*ompt_interface_fn_t) (void); + +typedef ompt_interface_fn_t (*ompt_function_lookup_t) ( + const char *interface_function_name ); -/* threads */ -typedef enum ompt_thread_t { - ompt_thread_initial = 1, // start the enumeration at 1 - ompt_thread_worker = 2, - ompt_thread_other = 3, - ompt_thread_unknown = 4 -} ompt_thread_t; +typedef union ompt_data_t { + uint64_t value; + void *ptr; +} ompt_data_t; typedef struct ompt_frame_t { ompt_data_t exit_frame; @@ -283,453 +459,625 @@ int exit_frame_flags; int enter_frame_flags; } ompt_frame_t; -typedef enum ompt_parallel_flag_t { - ompt_parallel_invoker_program = 0x00000001, /* program invokes master task */ - ompt_parallel_invoker_runtime = 0x00000002, /* runtime invokes master task */ - ompt_parallel_league = 0x40000000, - ompt_parallel_team = 0x80000000 -} ompt_parallel_flag_t; -typedef void (*ompt_callback_thread_begin_t) ( - ompt_thread_t thread_type, /* type of thread */ - ompt_data_t *thread_data /* data of thread */ +typedef void (*ompt_callback_t) (void); + +typedef void ompt_device_t; + +typedef void ompt_buffer_t; + +typedef void (*ompt_callback_buffer_request_t) ( + int device_num, + ompt_buffer_t **buffer, + size_t *bytes ); -typedef void (*ompt_callback_thread_end_t) ( - ompt_data_t *thread_data /* data of thread */ +typedef void (*ompt_callback_buffer_complete_t) ( + int device_num, + ompt_buffer_t *buffer, + size_t bytes, + ompt_buffer_cursor_t begin, + int buffer_owned ); -typedef void (*ompt_wait_callback_t) ( - ompt_wait_id_t wait_id /* wait data */ +typedef void (*ompt_finalize_t) ( + ompt_data_t *tool_data ); -/* parallel and workshares */ -typedef enum ompt_scope_endpoint_t { - ompt_scope_begin = 1, - ompt_scope_end = 2 -} ompt_scope_endpoint_t; +typedef int (*ompt_initialize_t) ( + ompt_function_lookup_t lookup, + int initial_device_num, + ompt_data_t *tool_data +); +typedef struct ompt_start_tool_result_t { + ompt_initialize_t initialize; + ompt_finalize_t finalize; + ompt_data_t tool_data; +} ompt_start_tool_result_t; -/* implicit task */ -typedef void (*ompt_callback_implicit_task_t) ( - ompt_scope_endpoint_t endpoint, /* endpoint of implicit task */ - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *task_data, /* data of implicit task */ - unsigned int actual_parallelism, /* team size */ - unsigned int index /* thread number of calling thread */ +typedef struct ompt_record_abstract_t { + ompt_record_native_t rclass; + const char *type; + ompt_device_time_t start_time; + ompt_device_time_t end_time; + ompt_hwid_t hwid; +} ompt_record_abstract_t; + +typedef struct ompt_dependence_t { + ompt_data_t variable; + ompt_dependence_type_t dependence_type; +} ompt_dependence_t; + +typedef int (*ompt_enumerate_states_t) ( + int current_state, + int *next_state, + const char **next_state_name ); -typedef void (*ompt_callback_parallel_begin_t) ( - ompt_data_t *encountering_task_data, /* data of encountering task */ - const ompt_frame_t *encountering_task_frame, /* frame data of encountering task */ - ompt_data_t *parallel_data, /* data of parallel region */ - unsigned int requested_team_size, /* requested number of threads in team */ - int flag, /* flag for additional information */ - const void *codeptr_ra /* return address of runtime call */ +typedef int (*ompt_enumerate_mutex_impls_t) ( + int current_impl, + int *next_impl, + const char **next_impl_name ); -typedef void (*ompt_callback_parallel_end_t) ( - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *encountering_task_data, /* data of encountering task */ - int flag, /* flag for additional information */ - const void *codeptr_ra /* return address of runtime call */ +typedef ompt_set_result_t (*ompt_set_callback_t) ( + ompt_callbacks_t event, + ompt_callback_t callback ); -/* tasks */ -typedef enum ompt_task_flag_t { - ompt_task_initial = 0x1, - ompt_task_implicit = 0x2, - ompt_task_explicit = 0x4, - ompt_task_target = 0x8, - ompt_task_undeferred = 0x8000000, - ompt_task_untied = 0x10000000, - ompt_task_final = 0x20000000, - ompt_task_mergeable = 0x40000000, - ompt_task_merged = 0x80000000 -} ompt_task_flag_t; +typedef int (*ompt_get_callback_t) ( + ompt_callbacks_t event, + ompt_callback_t *callback +); -typedef enum ompt_task_status_t { - ompt_task_complete = 1, - ompt_task_yield = 2, - ompt_task_cancel = 3, - ompt_task_switch = 4 -} ompt_task_status_t; +typedef ompt_data_t *(*ompt_get_thread_data_t) (void); -typedef void (*ompt_callback_task_schedule_t) ( - ompt_data_t *prior_task_data, /* data of prior task */ - ompt_task_status_t prior_task_status, /* status of prior task */ - ompt_data_t *next_task_data /* data of next task */ +typedef int (*ompt_get_num_procs_t) (void); + +typedef int (*ompt_get_num_places_t) (void); + +typedef int (*ompt_get_place_proc_ids_t) ( + int place_num, + int ids_size, + int *ids ); -typedef void (*ompt_callback_task_create_t) ( - ompt_data_t *encountering_task_data, /* data of parent task */ - const ompt_frame_t *encountering_task_frame, /* frame data for parent task */ - ompt_data_t *new_task_data, /* data of created task */ - int flag, /* type of created task */ - int has_dependences, /* created task has dependences */ - const void *codeptr_ra /* return address of runtime call */ +typedef int (*ompt_get_place_num_t) (void); + +typedef int (*ompt_get_partition_place_nums_t) ( + int place_nums_size, + int *place_nums ); -/* task dependences */ -typedef void (*ompt_callback_task_dependences_t) ( - ompt_data_t *task_data, /* data of task */ - const ompt_task_dependence_t *deps, /* dependences of task */ - int ndeps /* dependences count of task */ +typedef int (*ompt_get_proc_id_t) (void); + +typedef int (*ompt_get_state_t) ( + ompt_wait_id_t *wait_id ); -typedef void (*ompt_callback_task_dependence_t) ( - ompt_data_t *src_task_data, /* data of source task */ - ompt_data_t *sink_task_data /* data of sink task */ +typedef int (*ompt_get_parallel_info_t) ( + int ancestor_level, + ompt_data_t **parallel_data, + int *team_size ); -/* target and device */ -typedef enum ompt_target_t { - ompt_target = 1, - ompt_target_enter_data = 2, - ompt_target_exit_data = 3, - ompt_target_update = 4 -} ompt_target_t; +typedef int (*ompt_get_task_info_t) ( + int ancestor_level, + int *flags, + ompt_data_t **task_data, + ompt_frame_t **task_frame, + ompt_data_t **parallel_data, + int *thread_num +); -typedef void (*ompt_callback_target_t) ( - ompt_target_t kind, - ompt_scope_endpoint_t endpoint, - uint64_t device_num, - ompt_data_t *task_data, - ompt_id_t target_id, - const void *codeptr_ra +typedef int (*ompt_get_task_memory_t)( + void **addr, + size_t *size, + int block ); -typedef enum ompt_target_data_op_t { - ompt_target_data_alloc = 1, - ompt_target_data_transfer_to_dev = 2, - ompt_target_data_transfer_from_dev = 3, - ompt_target_data_delete = 4 -} ompt_target_data_op_t; +typedef int (*ompt_get_target_info_t) ( + uint64_t *device_num, + ompt_id_t *target_id, + ompt_id_t *host_op_id +); -typedef void (*ompt_callback_target_data_op_t) ( - ompt_id_t target_id, - ompt_id_t host_op_id, - ompt_target_data_op_t optype, - void *src_addr, - int src_device_num, - void *dest_addr, - int dest_device_num, - size_t bytes, - const void *codeptr_ra +typedef int (*ompt_get_num_devices_t) (void); + +typedef void (*ompt_finalize_tool_t) (void); + +typedef int (*ompt_get_device_num_procs_t) ( + ompt_device_t *device ); -typedef void (*ompt_callback_target_submit_t) ( - ompt_id_t target_id, - ompt_id_t host_op_id, - unsigned int requested_num_teams +typedef ompt_device_time_t (*ompt_get_device_time_t) ( + ompt_device_t *device ); -typedef void (*ompt_callback_target_map_t) ( - ompt_id_t target_id, - unsigned int nitems, - void **host_addr, - void **device_addr, - size_t *bytes, - unsigned int *mapping_flags, - const void *codeptr_ra +typedef double (*ompt_translate_time_t) ( + ompt_device_t *device, + ompt_device_time_t time ); -typedef void (*ompt_callback_device_initialize_t) ( - uint64_t device_num, - const char *type, - ompt_device_t *device, - ompt_function_lookup_t lookup, - const char *documentation +typedef ompt_set_result_t (*ompt_set_trace_ompt_t) ( + ompt_device_t *device, + unsigned int enable, + unsigned int etype ); -typedef void (*ompt_callback_device_finalize_t) ( - uint64_t device_num +typedef ompt_set_result_t (*ompt_set_trace_native_t) ( + ompt_device_t *device, + int enable, + int flags ); -typedef void (*ompt_callback_device_load_t) ( - uint64_t device_num, - const char * filename, - int64_t offset_in_file, - void * vma_in_file, - size_t bytes, - void * host_addr, - void * device_addr, - uint64_t module_id +typedef int (*ompt_start_trace_t) ( + ompt_device_t *device, + ompt_callback_buffer_request_t request, + ompt_callback_buffer_complete_t complete ); -#define ompt_addr_unknown ((void *) ~0) +typedef int (*ompt_pause_trace_t) ( + ompt_device_t *device, + int begin_pause +); -typedef void (*ompt_callback_device_unload_t) ( - uint64_t device_num, - uint64_t module_id +typedef int (*ompt_flush_trace_t) ( + ompt_device_t *device ); -/* control_tool */ -typedef int (*ompt_callback_control_tool_t) ( - uint64_t command, /* command of control call */ - uint64_t modifier, /* modifier of control call */ - void *arg, /* argument of control call */ - const void *codeptr_ra /* return address of runtime call */ +typedef int (*ompt_stop_trace_t) ( + ompt_device_t *device ); -typedef enum ompt_mutex_t { - ompt_mutex_lock = 1, - ompt_mutex_nest_lock = 2, - ompt_mutex_critical = 3, - ompt_mutex_atomic = 4, - ompt_mutex_ordered = 5 -} ompt_mutex_t; +typedef int (*ompt_advance_buffer_cursor_t) ( + ompt_device_t *device, + ompt_buffer_t *buffer, + size_t size, + ompt_buffer_cursor_t current, + ompt_buffer_cursor_t *next +); -typedef void (*ompt_callback_mutex_acquire_t) ( - ompt_mutex_t kind, /* mutex kind */ - unsigned int hint, /* mutex hint */ - unsigned int impl, /* mutex implementation */ - ompt_wait_id_t wait_id, /* id of object being awaited */ - const void *codeptr_ra /* return address of runtime call */ +typedef ompt_record_t (*ompt_get_record_type_t) ( + ompt_buffer_t *buffer, + ompt_buffer_cursor_t current ); -typedef void (*ompt_callback_mutex_t) ( - ompt_mutex_t kind, /* mutex kind */ - ompt_wait_id_t wait_id, /* id of object being awaited */ - const void *codeptr_ra /* return address of runtime call */ +typedef void *(*ompt_get_record_native_t) ( + ompt_buffer_t *buffer, + ompt_buffer_cursor_t current, + ompt_id_t *host_op_id ); -typedef void (*ompt_callback_nest_lock_t) ( - ompt_scope_endpoint_t endpoint, /* endpoint of nested lock */ - ompt_wait_id_t wait_id, /* id of object being awaited */ - const void *codeptr_ra /* return address of runtime call */ +typedef ompt_record_abstract_t * +(*ompt_get_record_abstract_t) ( + void *native_record ); -typedef void (*ompt_callback_master_t) ( - ompt_scope_endpoint_t endpoint, /* endpoint of master region */ - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *task_data, /* data of task */ - const void *codeptr_ra /* return address of runtime call */ +typedef void (*ompt_callback_thread_begin_t) ( + ompt_thread_t thread_type, + ompt_data_t *thread_data ); -typedef enum ompt_work_t { - ompt_work_loop = 1, - ompt_work_sections = 2, - ompt_work_single_executor = 3, - ompt_work_single_other = 4, - ompt_work_workshare = 5, - ompt_work_distribute = 6, - ompt_work_taskloop = 7 -} ompt_work_t; +typedef struct ompt_record_thread_begin_t { + ompt_thread_t thread_type; +} ompt_record_thread_begin_t; -typedef void (*ompt_callback_work_t) ( - ompt_work_t wstype, /* type of work region */ - ompt_scope_endpoint_t endpoint, /* endpoint of work region */ - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *task_data, /* data of task */ - uint64_t count, /* quantity of work */ - const void *codeptr_ra /* return address of runtime call */ +typedef void (*ompt_callback_thread_end_t) ( + ompt_data_t *thread_data ); -typedef enum ompt_sync_region_t { - ompt_sync_region_barrier = 1, - ompt_sync_region_barrier_implicit = 2, - ompt_sync_region_barrier_explicit = 3, - ompt_sync_region_barrier_implementation = 4, - ompt_sync_region_taskwait = 5, - ompt_sync_region_taskgroup = 6, - ompt_sync_region_reduction = 7 -} ompt_sync_region_t; - -typedef void (*ompt_callback_sync_region_t) ( - ompt_sync_region_t kind, /* kind of sync region */ - ompt_scope_endpoint_t endpoint, /* endpoint of sync region */ - ompt_data_t *parallel_data, /* data of parallel region */ - ompt_data_t *task_data, /* data of task */ - const void *codeptr_ra /* return address of runtime call */ +typedef void (*ompt_callback_parallel_begin_t) ( + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, + ompt_data_t *parallel_data, + unsigned int requested_parallelism, + int flags, + const void *codeptr_ra ); -typedef enum ompt_cancel_flag_t { - ompt_cancel_parallel = 0x01, - ompt_cancel_sections = 0x02, - ompt_cancel_loop = 0x04, - ompt_cancel_taskgroup = 0x08, - ompt_cancel_activated = 0x10, - ompt_cancel_detected = 0x20, - ompt_cancel_discarded_task = 0x40 -} ompt_cancel_flag_t; +typedef struct ompt_record_parallel_begin_t { + ompt_id_t encountering_task_id; + ompt_id_t parallel_id; + unsigned int requested_parallelism; + int flags; + const void *codeptr_ra; +} ompt_record_parallel_begin_t; -typedef void (*ompt_callback_cancel_t) ( - ompt_data_t *task_data, /* data of task */ - int flags, /* cancel flags */ - const void *codeptr_ra /* return address of runtime call */ +typedef void (*ompt_callback_parallel_end_t) ( + ompt_data_t *parallel_data, + ompt_data_t *encountering_task_data, + int flags, + const void *codeptr_ra ); -typedef void (*ompt_callback_flush_t) ( - ompt_data_t *thread_data, /* data of thread */ - const void *codeptr_ra /* return address of runtime call */ +typedef struct ompt_record_parallel_end_t { + ompt_id_t parallel_id; + ompt_id_t encountering_task_id; + int flags; + const void *codeptr_ra; +} ompt_record_parallel_end_t; + +typedef void (*ompt_callback_work_t) ( + ompt_work_t wstype, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + uint64_t count, + const void *codeptr_ra ); -typedef enum ompt_dispatch_t { - ompt_dispatch_iteration = 1, - ompt_dispatch_section = 2 -} ompt_dispatch_t; +typedef struct ompt_record_work_t { + ompt_work_t wstype; + ompt_scope_endpoint_t endpoint; + ompt_id_t parallel_id; + ompt_id_t task_id; + uint64_t count; + const void *codeptr_ra; +} ompt_record_work_t; typedef void (*ompt_callback_dispatch_t) ( - ompt_data_t *parallel_data, - ompt_data_t *task_data, - ompt_dispatch_t kind, - ompt_data_t instance + ompt_data_t *parallel_data, + ompt_data_t *task_data, + ompt_dispatch_t kind, + ompt_data_t instance ); -/**************************************************************************** - * ompt API - ***************************************************************************/ +typedef struct ompt_record_dispatch_t { + ompt_id_t parallel_id; + ompt_id_t task_id; + ompt_dispatch_t kind; + ompt_data_t instance; +} ompt_record_dispatch_t; -#ifdef __cplusplus -extern "C" { -#endif +typedef void (*ompt_callback_task_create_t) ( + ompt_data_t *encountering_task_data, + const ompt_frame_t *encountering_task_frame, + ompt_data_t *new_task_data, + int flags, + int has_dependences, + const void *codeptr_ra +); -#define OMPT_API_FNTYPE(fn) fn##_t +typedef struct ompt_record_task_create_t { + ompt_id_t encountering_task_id; + ompt_id_t new_task_id; + int flags; + int has_dependences; + const void *codeptr_ra; +} ompt_record_task_create_t; + +typedef void (*ompt_callback_dependences_t) ( + ompt_data_t *task_data, + const ompt_dependence_t *deps, + int ndeps +); -#define OMPT_API_FUNCTION(return_type, fn, args) \ - typedef return_type (*OMPT_API_FNTYPE(fn)) args +typedef struct ompt_record_dependences_t { + ompt_id_t task_id; + ompt_dependence_t dep; + int ndeps; +} ompt_record_dependences_t; +typedef void (*ompt_callback_task_dependence_t) ( + ompt_data_t *src_task_data, + ompt_data_t *sink_task_data +); +typedef struct ompt_record_task_dependence_t { + ompt_id_t src_task_id; + ompt_id_t sink_task_id; +} ompt_record_task_dependence_t; -/**************************************************************************** - * INQUIRY FUNCTIONS - ***************************************************************************/ +typedef void (*ompt_callback_task_schedule_t) ( + ompt_data_t *prior_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *next_task_data +); -/* state */ -OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( - ompt_wait_id_t *wait_id -)); +typedef struct ompt_record_task_schedule_t { + ompt_id_t prior_task_id; + ompt_task_status_t prior_task_status; + ompt_id_t next_task_id; +} ompt_record_task_schedule_t; -/* thread */ -OMPT_API_FUNCTION(ompt_data_t*, ompt_get_thread_data, (void)); +typedef void (*ompt_callback_implicit_task_t) ( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int actual_parallelism, + unsigned int index, + int flags +); -/* parallel region */ -OMPT_API_FUNCTION(int, ompt_get_parallel_info, ( - int ancestor_level, - ompt_data_t **parallel_data, - int *team_size -)); +typedef struct ompt_record_implicit_task_t { + ompt_scope_endpoint_t endpoint; + ompt_id_t parallel_id; + ompt_id_t task_id; + unsigned int actual_parallelism; + unsigned int index; + int flags; +} ompt_record_implicit_task_t; -/* task */ -OMPT_API_FUNCTION(int, ompt_get_task_info, ( - int ancestor_level, - int *type, - ompt_data_t **task_data, - ompt_frame_t **task_frame, - ompt_data_t **parallel_data, - int *thread_num -)); +typedef void (*ompt_callback_master_t) ( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra +); -OMPT_API_FUNCTION(int, ompt_get_task_memory, ( - void **addr, - size_t *size, - int block -)); +typedef struct ompt_record_master_t { + ompt_scope_endpoint_t endpoint; + ompt_id_t parallel_id; + ompt_id_t task_id; + const void *codeptr_ra; +} ompt_record_master_t; -/* procs */ -OMPT_API_FUNCTION(int, ompt_get_num_procs, (void)); +typedef void (*ompt_callback_sync_region_t) ( + ompt_sync_region_t kind, + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + const void *codeptr_ra +); -/* places */ -OMPT_API_FUNCTION(int, ompt_get_num_places, (void)); +typedef struct ompt_record_sync_region_t { + ompt_sync_region_t kind; + ompt_scope_endpoint_t endpoint; + ompt_id_t parallel_id; + ompt_id_t task_id; + const void *codeptr_ra; +} ompt_record_sync_region_t; -OMPT_API_FUNCTION(int, ompt_get_place_proc_ids, ( - int place_num, - int ids_size, - int *ids -)); +typedef void (*ompt_callback_mutex_acquire_t) ( + ompt_mutex_t kind, + unsigned int hint, + unsigned int impl, + ompt_wait_id_t wait_id, + const void *codeptr_ra +); -OMPT_API_FUNCTION(int, ompt_get_place_num, (void)); +typedef struct ompt_record_mutex_acquire_t { + ompt_mutex_t kind; + unsigned int hint; + unsigned int impl; + ompt_wait_id_t wait_id; + const void *codeptr_ra; +} ompt_record_mutex_acquire_t; -OMPT_API_FUNCTION(int, ompt_get_partition_place_nums, ( - int place_nums_size, - int *place_nums -)); +typedef void (*ompt_callback_mutex_t) ( + ompt_mutex_t kind, + ompt_wait_id_t wait_id, + const void *codeptr_ra +); -/* proc_id */ -OMPT_API_FUNCTION(int, ompt_get_proc_id, (void)); +typedef struct ompt_record_mutex_t { + ompt_mutex_t kind; + ompt_wait_id_t wait_id; + const void *codeptr_ra; +} ompt_record_mutex_t; +typedef void (*ompt_callback_nest_lock_t) ( + ompt_scope_endpoint_t endpoint, + ompt_wait_id_t wait_id, + const void *codeptr_ra +); -/**************************************************************************** - * INITIALIZATION FUNCTIONS - ***************************************************************************/ +typedef struct ompt_record_nest_lock_t { + ompt_scope_endpoint_t endpoint; + ompt_wait_id_t wait_id; + const void *codeptr_ra; +} ompt_record_nest_lock_t; -OMPT_API_FUNCTION(int, ompt_initialize, ( - ompt_function_lookup_t lookup, - ompt_data_t *tool_data -)); +typedef void (*ompt_callback_flush_t) ( + ompt_data_t *thread_data, + const void *codeptr_ra +); -OMPT_API_FUNCTION(void, ompt_finalize, ( - ompt_data_t *tool_data -)); +typedef struct ompt_record_flush_t { + const void *codeptr_ra; +} ompt_record_flush_t; -typedef struct ompt_start_tool_result_t { - ompt_initialize_t initialize; - ompt_finalize_t finalize; - ompt_data_t tool_data; -} ompt_start_tool_result_t; +typedef void (*ompt_callback_cancel_t) ( + ompt_data_t *task_data, + int flags, + const void *codeptr_ra +); -/* initialization interface to be defined by tool */ -#ifdef _WIN32 -__declspec(dllexport) -#endif -ompt_start_tool_result_t * ompt_start_tool( - unsigned int omp_version, - const char * runtime_version +typedef struct ompt_record_cancel_t { + ompt_id_t task_id; + int flags; + const void *codeptr_ra; +} ompt_record_cancel_t; + +typedef void (*ompt_callback_device_initialize_t) ( + int device_num, + const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation ); -typedef void (*ompt_callback_t)(void); +typedef void (*ompt_callback_device_finalize_t) ( + int device_num +); -OMPT_API_FUNCTION(int, ompt_set_callback, ( - ompt_callbacks_t which, - ompt_callback_t callback -)); +typedef void (*ompt_callback_device_load_t) ( + int device_num, + const char *filename, + int64_t offset_in_file, + void *vma_in_file, + size_t bytes, + void *host_addr, + void *device_addr, + uint64_t module_id +); -OMPT_API_FUNCTION(int, ompt_get_callback, ( - ompt_callbacks_t which, - ompt_callback_t *callback -)); +typedef void (*ompt_callback_device_unload_t) ( + int device_num, + uint64_t module_id +); +typedef void (*ompt_callback_target_data_op_t) ( + ompt_id_t target_id, + ompt_id_t host_op_id, + ompt_target_data_op_t optype, + void *src_addr, + int src_device_num, + void *dest_addr, + int dest_device_num, + size_t bytes, + const void *codeptr_ra +); +typedef struct ompt_record_target_data_op_t { + ompt_id_t host_op_id; + ompt_target_data_op_t optype; + void *src_addr; + int src_device_num; + void *dest_addr; + int dest_device_num; + size_t bytes; + ompt_device_time_t end_time; + const void *codeptr_ra; +} ompt_record_target_data_op_t; -/**************************************************************************** - * MISCELLANEOUS FUNCTIONS - ***************************************************************************/ +typedef void (*ompt_callback_target_t) ( + ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, + ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra +); -/* state enumeration */ -OMPT_API_FUNCTION(int, ompt_enumerate_states, ( - int current_state, - int *next_state, - const char **next_state_name -)); +typedef struct ompt_record_target_t { + ompt_target_t kind; + ompt_scope_endpoint_t endpoint; + int device_num; + ompt_id_t task_id; + ompt_id_t target_id; + const void *codeptr_ra; +} ompt_record_target_t; -/* mutex implementation enumeration */ -OMPT_API_FUNCTION(int, ompt_enumerate_mutex_impls, ( - int current_impl, - int *next_impl, - const char **next_impl_name -)); +typedef void (*ompt_callback_target_map_t) ( + ompt_id_t target_id, + unsigned int nitems, + void **host_addr, + void **device_addr, + size_t *bytes, + unsigned int *mapping_flags, + const void *codeptr_ra +); -/* get_unique_id */ -OMPT_API_FUNCTION(uint64_t, ompt_get_unique_id, (void)); +typedef struct ompt_record_target_map_t { + ompt_id_t target_id; + unsigned int nitems; + void **host_addr; + void **device_addr; + size_t *bytes; + unsigned int *mapping_flags; + const void *codeptr_ra; +} ompt_record_target_map_t; -/* finalize tool */ -OMPT_API_FUNCTION(void, ompt_finalize_tool, (void)); +typedef void (*ompt_callback_target_submit_t) ( + ompt_id_t target_id, + ompt_id_t host_op_id, + unsigned int requested_num_teams +); -#ifdef __cplusplus -}; -#endif +typedef struct ompt_record_target_kernel_t { + ompt_id_t host_op_id; + unsigned int requested_num_teams; + unsigned int granted_num_teams; + ompt_device_time_t end_time; +} ompt_record_target_kernel_t; -/**************************************************************************** - * TARGET - ***************************************************************************/ +typedef int (*ompt_callback_control_tool_t) ( + uint64_t command, + uint64_t modifier, + void *arg, + const void *codeptr_ra +); - OMPT_API_FUNCTION(int, ompt_get_target_info, ( - uint64_t *device_num, - ompt_id_t *target_id, - ompt_id_t *host_op_id -)); +typedef struct ompt_record_control_tool_t { + uint64_t command; + uint64_t modifier; + const void *codeptr_ra; +} ompt_record_control_tool_t; + +typedef struct ompd_address_t { + ompd_seg_t segment; + ompd_addr_t address; +} ompd_address_t; + +typedef struct ompd_frame_info_t { + ompd_address_t frame_address; + ompd_word_t frame_flag; +} ompd_frame_info_t; + +typedef struct _ompd_aspace_handle ompd_address_space_handle_t; +typedef struct _ompd_thread_handle ompd_thread_handle_t; +typedef struct _ompd_parallel_handle ompd_parallel_handle_t; +typedef struct _ompd_task_handle ompd_task_handle_t; + +typedef struct _ompd_aspace_cont ompd_address_space_context_t; +typedef struct _ompd_thread_cont ompd_thread_context_t; + +typedef struct ompd_device_type_sizes_t { + uint8_t sizeof_char; + uint8_t sizeof_short; + uint8_t sizeof_int; + uint8_t sizeof_long; + uint8_t sizeof_long_long; + uint8_t sizeof_pointer; +} ompd_device_type_sizes_t; + +typedef struct ompt_record_ompt_t { + ompt_callbacks_t type; + ompt_device_time_t time; + ompt_id_t thread_id; + ompt_id_t target_id; + union { + ompt_record_thread_begin_t thread_begin; + ompt_record_parallel_begin_t parallel_begin; + ompt_record_parallel_end_t parallel_end; + ompt_record_work_t work; + ompt_record_dispatch_t dispatch; + ompt_record_task_create_t task_create; + ompt_record_dependences_t dependences; + ompt_record_task_dependence_t task_dependence; + ompt_record_task_schedule_t task_schedule; + ompt_record_implicit_task_t implicit_task; + ompt_record_master_t master; + ompt_record_sync_region_t sync_region; + ompt_record_mutex_acquire_t mutex_acquire; + ompt_record_mutex_t mutex; + ompt_record_nest_lock_t nest_lock; + ompt_record_flush_t flush; + ompt_record_cancel_t cancel; + ompt_record_target_t target; + ompt_record_target_data_op_t target_data_op; + ompt_record_target_map_t target_map; + ompt_record_target_kernel_t target_kernel; + ompt_record_control_tool_t control_tool; + } record; +} ompt_record_ompt_t; + +typedef ompt_record_ompt_t *(*ompt_get_record_ompt_t) ( + ompt_buffer_t *buffer, + ompt_buffer_cursor_t current +); - OMPT_API_FUNCTION(int, ompt_get_num_devices, (void)); +#define ompt_id_none 0 +#define ompt_data_none {0} +#define ompt_time_none 0 +#define ompt_hwid_none 0 +#define ompt_addr_none ~0 +#define ompt_mutex_impl_none 0 +#define ompt_wait_id_none 0 + +#define ompd_segment_none 0 #endif /* __OMPT__ */ Index: runtime/src/kmp_barrier.cpp =================================================================== --- runtime/src/kmp_barrier.cpp +++ runtime/src/kmp_barrier.cpp @@ -1905,7 +1905,7 @@ #endif if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, task_data, 0, ds_tid); + ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } } #endif Index: runtime/src/kmp_csupport.cpp =================================================================== --- runtime/src/kmp_csupport.cpp +++ runtime/src/kmp_csupport.cpp @@ -511,7 +511,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, - OMPT_CUR_TASK_INFO(this_thr)->thread_num); + OMPT_CUR_TASK_INFO(this_thr)->thread_num, 0); } // reset clear the task id only after unlinking the task @@ -1292,7 +1292,7 @@ return kmp_mutex_impl_speculative; #endif default: - return ompt_mutex_impl_none; + return kmp_mutex_impl_none; } ilock = KMP_LOOKUP_I_LOCK(user_lock); } @@ -1316,7 +1316,7 @@ case locktag_nested_drdpa: return kmp_mutex_impl_queuing; default: - return ompt_mutex_impl_none; + return kmp_mutex_impl_none; } } #else @@ -1339,7 +1339,7 @@ return kmp_mutex_impl_speculative; #endif default: - return ompt_mutex_impl_none; + return kmp_mutex_impl_none; } } #endif // KMP_USE_DYNAMIC_LOCK Index: runtime/src/kmp_gsupport.cpp =================================================================== --- runtime/src/kmp_gsupport.cpp +++ runtime/src/kmp_gsupport.cpp @@ -400,7 +400,7 @@ ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, &(team_info->parallel_data), - &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid)); + &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? task_info->thread_num = __kmp_tid_from_gtid(gtid); } thr->th.ompt_thread_info.state = ompt_state_work_parallel; Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -1402,7 +1402,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr), - OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid)); + OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(this_thr) ->thread_num = __kmp_tid_from_gtid(global_tid); } @@ -1568,7 +1568,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); + implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); } @@ -1598,7 +1598,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, implicit_task_data, 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num); + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } __ompt_lw_taskteam_unlink(master_th); @@ -1780,7 +1780,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid)); + &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); } @@ -1810,7 +1810,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num); + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } __ompt_lw_taskteam_unlink(master_th); @@ -1881,7 +1881,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), - implicit_task_data, 1, __kmp_tid_from_gtid(gtid)); + implicit_task_data, 1, __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(master_th) ->thread_num = __kmp_tid_from_gtid(gtid); } @@ -1910,7 +1910,7 @@ if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), 1, - OMPT_CUR_TASK_INFO(master_th)->thread_num); + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th); @@ -2514,7 +2514,7 @@ int ompt_team_size = team->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size, - OMPT_CUR_TASK_INFO(master_th)->thread_num); + OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } task_info->frame.exit_frame.ptr = NULL; @@ -6975,7 +6975,7 @@ ompt_team_size = team->t.t_nproc; ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size, - __kmp_tid_from_gtid(gtid)); + __kmp_tid_from_gtid(gtid), ompt_task_implicit); // TODO: Can this be ompt_task_initial? OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid); } #endif @@ -7225,7 +7225,7 @@ #endif if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, task_data, 0, ds_tid); + ompt_scope_end, NULL, task_data, 0, ds_tid, ompt_task_implicit); // TODO: Can this be ompt_task_initial? } } #endif Index: runtime/src/kmp_taskdeps.cpp =================================================================== --- runtime/src/kmp_taskdeps.cpp +++ runtime/src/kmp_taskdeps.cpp @@ -485,43 +485,43 @@ #if OMPT_OPTIONAL /* OMPT grab all dependences if requested by the tool */ if (ndeps + ndeps_noalias > 0 && - ompt_enabled.ompt_callback_task_dependences) { + ompt_enabled.ompt_callback_dependences) { kmp_int32 i; new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias; new_taskdata->ompt_task_info.deps = - (ompt_task_dependence_t *)KMP_OMPT_DEPS_ALLOC( - thread, (ndeps + ndeps_noalias) * sizeof(ompt_task_dependence_t)); + (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC( + thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t)); KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL); for (i = 0; i < ndeps; i++) { - new_taskdata->ompt_task_info.deps[i].variable_addr = + new_taskdata->ompt_task_info.deps[i].variable.ptr = (void *)dep_list[i].base_addr; if (dep_list[i].flags.in && dep_list[i].flags.out) new_taskdata->ompt_task_info.deps[i].dependence_type = - ompt_task_dependence_type_inout; + ompt_dependence_type_inout; else if (dep_list[i].flags.out) new_taskdata->ompt_task_info.deps[i].dependence_type = - ompt_task_dependence_type_out; + ompt_dependence_type_out; else if (dep_list[i].flags.in) new_taskdata->ompt_task_info.deps[i].dependence_type = - ompt_task_dependence_type_in; + ompt_dependence_type_in; } for (i = 0; i < ndeps_noalias; i++) { - new_taskdata->ompt_task_info.deps[ndeps + i].variable_addr = + new_taskdata->ompt_task_info.deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr; if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type = - ompt_task_dependence_type_inout; + ompt_dependence_type_inout; else if (noalias_dep_list[i].flags.out) new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type = - ompt_task_dependence_type_out; + ompt_dependence_type_out; else if (noalias_dep_list[i].flags.in) new_taskdata->ompt_task_info.deps[ndeps + i].dependence_type = - ompt_task_dependence_type_in; + ompt_dependence_type_in; } - ompt_callbacks.ompt_callback(ompt_callback_task_dependences)( + ompt_callbacks.ompt_callback(ompt_callback_dependences)( &(new_taskdata->ompt_task_info.task_data), new_taskdata->ompt_task_info.deps, new_taskdata->ompt_task_info.ndeps); /* We can now free the allocated memory for the dependencies */ Index: runtime/src/kmp_wait_release.h =================================================================== --- runtime/src/kmp_wait_release.h +++ runtime/src/kmp_wait_release.h @@ -140,7 +140,7 @@ if (!KMP_MASTER_TID(ds_tid)) { if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( - ompt_scope_end, NULL, tId, 0, ds_tid); + ompt_scope_end, NULL, tId, 0, ds_tid, 0); } // return to idle state this_thr->th.ompt_thread_info.state = ompt_state_idle; Index: runtime/src/ompt-event-specific.h =================================================================== --- runtime/src/ompt-event-specific.h +++ runtime/src/ompt-event-specific.h @@ -78,11 +78,11 @@ #define ompt_callback_mutex_released_implemented ompt_event_MAY_ALWAYS_OPTIONAL #if OMP_40_ENABLED -#define ompt_callback_task_dependences_implemented \ +#define ompt_callback_dependences_implemented \ ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_task_dependence_implemented ompt_event_MAY_ALWAYS_OPTIONAL #else -#define ompt_callback_task_dependences_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_dependences_implemented ompt_event_UNIMPLEMENTED #define ompt_callback_task_dependence_implemented ompt_event_UNIMPLEMENTED #endif /* OMP_40_ENABLED */ Index: runtime/src/ompt-general.cpp =================================================================== --- runtime/src/ompt-general.cpp +++ runtime/src/ompt-general.cpp @@ -342,8 +342,9 @@ // Initialize the tool if so indicated. //-------------------------------------------------- if (ompt_start_tool_result) { +// extern int omp_get_initial_device(void); ompt_enabled.enabled = !!ompt_start_tool_result->initialize( - ompt_fn_lookup, &(ompt_start_tool_result->tool_data)); + ompt_fn_lookup, /*omp_get_initial_device()*/-10, &(ompt_start_tool_result->tool_data)); if (!ompt_enabled.enabled) { // tool not enabled, zero out the bitmap, and done @@ -422,7 +423,7 @@ * callbacks ****************************************************************************/ -OMPT_API_ROUTINE int ompt_set_callback(ompt_callbacks_t which, +OMPT_API_ROUTINE ompt_set_result_t ompt_set_callback(ompt_callbacks_t which, ompt_callback_t callback) { switch (which) { @@ -482,7 +483,7 @@ team_size); } -OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *wait_id) { +OMPT_API_ROUTINE int ompt_get_state(ompt_wait_id_t *wait_id) { ompt_state_t thread_state = __ompt_get_state_internal(wait_id); if (thread_state == ompt_state_undefined) { Index: runtime/src/ompt-internal.h =================================================================== --- runtime/src/ompt-internal.h +++ runtime/src/ompt-internal.h @@ -60,7 +60,7 @@ int thread_num; #if OMP_40_ENABLED int ndeps; - ompt_task_dependence_t *deps; + ompt_dependence_t *deps; #endif /* OMP_40_ENABLED */ } ompt_task_info_t; Index: runtime/test/lit.cfg =================================================================== --- runtime/test/lit.cfg +++ runtime/test/lit.cfg @@ -115,7 +115,7 @@ config.substitutions.append(("%flags", config.test_flags)) if config.has_ompt: - config.substitutions.append(("FileCheck", config.test_filecheck)) + config.substitutions.append(("FileCheck", "tee %%s.out | %s" % config.test_filecheck)) config.substitutions.append(("%sort-threads", "sort -n -s")) if config.operating_system == 'Windows': # No such environment variable on Windows. Index: runtime/test/ompt/callback.h =================================================================== --- runtime/test/ompt/callback.h +++ runtime/test/ompt/callback.h @@ -23,10 +23,13 @@ static const char* ompt_task_status_t_values[] = { NULL, - "ompt_task_complete", - "ompt_task_yield", - "ompt_task_cancel", - "ompt_task_others" + "ompt_task_complete", // 1 + "ompt_task_yield", // 2 + "ompt_task_cancel", // 3 + "ompt_task_detach", // 4 + "ompt_task_early_fulfill", // 5 + "ompt_task_late_fulfill", // 6 + "ompt_task_switch" // 7 }; static const char* ompt_cancel_flag_t_values[] = { "ompt_cancel_parallel", @@ -439,7 +442,8 @@ ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int team_size, - unsigned int thread_num) + unsigned int thread_num, + int flags) { switch(endpoint) { @@ -651,9 +655,9 @@ } static void -on_ompt_callback_task_dependences( +on_ompt_callback_dependences( ompt_data_t *task_data, - const ompt_task_dependence_t *deps, + const ompt_dependence_t *deps, int ndeps) { printf("%" PRIu64 ": ompt_event_task_dependences: task_id=%" PRIu64 ", deps=%p, ndeps=%d\n", ompt_get_thread_data()->value, task_data->value, (void *)deps, ndeps); @@ -710,6 +714,7 @@ int ompt_initialize( ompt_function_lookup_t lookup, + int initial_device_num, ompt_data_t *tool_data) { ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); @@ -747,7 +752,7 @@ register_callback(ompt_callback_parallel_end); register_callback(ompt_callback_task_create); register_callback(ompt_callback_task_schedule); - register_callback(ompt_callback_task_dependences); + register_callback(ompt_callback_dependences); register_callback(ompt_callback_task_dependence); register_callback(ompt_callback_thread_begin); register_callback(ompt_callback_thread_end); @@ -760,6 +765,9 @@ printf("0: ompt_event_runtime_shutdown\n"); } +#ifdef __cplusplus +extern "C" { +#endif ompt_start_tool_result_t* ompt_start_tool( unsigned int omp_version, const char *runtime_version) @@ -767,3 +775,6 @@ static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; return &ompt_start_tool_result; } +#ifdef __cplusplus +} +#endif \ No newline at end of file Index: runtime/test/ompt/cancel/cancel_taskgroup.c =================================================================== --- runtime/test/ompt/cancel/cancel_taskgroup.c +++ runtime/test/ompt/cancel/cancel_taskgroup.c @@ -75,7 +75,7 @@ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[THIRD_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit=4, has_dependences=no // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[PARENT_TASK_ID]], parent_task_frame.exit={{0x[0-f]*}}, parent_task_frame.reenter={{0x[0-f]*}}, new_task_id=[[CANCEL_TASK_ID:[0-9]+]], codeptr_ra={{0x[0-f]*}}, task_type=ompt_task_explicit|ompt_task_undeferred=134217732, has_dependences=no - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[PARENT_TASK_ID]], second_task_id=[[CANCEL_TASK_ID]], prior_task_status=ompt_task_switch=7 // CHECK: {{^}}[[MASTER_ID]]: ompt_event_cancel: task_data=[[CANCEL_TASK_ID]], flags=ompt_cancel_taskgroup|ompt_cancel_activated=24, codeptr_ra={{0x[0-f]*}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[CANCEL_TASK_ID]], second_task_id=[[PARENT_TASK_ID]], prior_task_status=ompt_task_cancel=3 Index: runtime/test/ompt/parallel/parallel_if0.c =================================================================== --- runtime/test/ompt/parallel/parallel_if0.c +++ runtime/test/ompt/parallel/parallel_if0.c @@ -57,7 +57,7 @@ // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_switch=7 // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} Index: runtime/test/ompt/parallel/serialized.c =================================================================== --- runtime/test/ompt/parallel/serialized.c +++ runtime/test/ompt/parallel/serialized.c @@ -57,7 +57,7 @@ // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame={{0x[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[NESTED_IMPLICIT_TASK_ID]], parent_task_frame.exit={{0x[0-f]+}}, parent_task_frame.reenter={{0x[0-f]+}}, new_task_id=[[EXPLICIT_TASK_ID:[0-9]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[NESTED_IMPLICIT_TASK_ID]], second_task_id=[[EXPLICIT_TASK_ID]], prior_task_status=ompt_task_switch=7 // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[EXPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame=[[NULL]] // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame={{0x[0-f]+}}, reenter_frame={{0x[0-f]+}} Index: runtime/test/ompt/tasks/explicit_task.c =================================================================== --- runtime/test/ompt/tasks/explicit_task.c +++ runtime/test/ompt/tasks/explicit_task.c @@ -1,4 +1,4 @@ -// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// RUN: %libomp-compile-and-run | %sort-threads | tee %s.out | FileCheck %s // REQUIRES: ompt // UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 #define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN Index: runtime/test/ompt/tasks/taskyield.c =================================================================== --- runtime/test/ompt/tasks/taskyield.c +++ runtime/test/ompt/tasks/taskyield.c @@ -51,7 +51,7 @@ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[MAIN_TASK]], prior_task_status=ompt_task_yield=2 // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule: first_task_id=[[MAIN_TASK]], second_task_id=[[IMPLICIT_TASK_ID]], prior_task_status=ompt_task_complete=1 - // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_others=4 + // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_task_schedule: first_task_id={{[0-9]+}}, second_task_id=[[WORKER_TASK]], prior_task_status=ompt_task_switch=7 // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[WORKER_TASK]], second_task_id={{[0-9]+}}, prior_task_status=ompt_task_complete=1