Index: runtime/src/dllexports =================================================================== --- runtime/src/dllexports +++ runtime/src/dllexports @@ -547,6 +547,14 @@ omp_get_default_allocator 893 omp_alloc 894 omp_free 895 + omp_set_affinity_format 748 + omp_get_affinity_format 749 + omp_display_affinity 750 + omp_capture_affinity 751 + ompc_set_affinity_format 752 + ompc_get_affinity_format 753 + ompc_display_affinity 754 + ompc_capture_affinity 755 OMP_NULL_ALLOCATOR DATA omp_default_mem_alloc DATA Index: runtime/src/i18n/en_US.txt =================================================================== --- runtime/src/i18n/en_US.txt +++ runtime/src/i18n/en_US.txt @@ -425,6 +425,7 @@ AffHWSubsetManyTiles "KMP_HW_SUBSET ignored: too many L2 Caches requested." AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested." HierSchedInvalid "Hierarchy ignored: unsupported level: %1$s." +AffFormatDefault "OMP: pid %1$s tid %2$s thread %3$s bound to OS proc set {%4$s}" # -------------------------------------------------------------------------------------------------- Index: runtime/src/include/50/omp.h.var =================================================================== --- runtime/src/include/50/omp.h.var +++ runtime/src/include/50/omp.h.var @@ -25,6 +25,11 @@ extern "C" { # endif +# define omp_set_affinity_format ompc_set_affinity_format +# define omp_get_affinity_format ompc_get_affinity_format +# define omp_display_affinity ompc_display_affinity +# define omp_capture_affinity ompc_capture_affinity + # if defined(_WIN32) # define __KAI_KMPC_CONVENTION __cdecl # ifndef __KMP_IMP @@ -235,6 +240,12 @@ extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator); #endif + /* OpenMP 5.0 Affinity Format */ + extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *); + extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t); + extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *); + extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *); + # undef __KAI_KMPC_CONVENTION # undef __KMP_IMP Index: runtime/src/include/50/omp_lib.h.var =================================================================== --- runtime/src/include/50/omp_lib.h.var +++ runtime/src/include/50/omp_lib.h.var @@ -424,6 +424,27 @@ integer (kind=omp_allocator_kind) omp_get_default_allocator end function omp_get_default_allocator + subroutine omp_set_affinity_format(format) + character (len=*) :: format + end subroutine omp_set_affinity_format + + function omp_get_affinity_format(buffer) + import + character (len=*) :: buffer + integer (kind=kmp_size_t_kind) :: omp_get_affinity_format + end function omp_get_affinity_format + + subroutine omp_display_affinity(format) + character (len=*) :: format + end subroutine omp_display_affinity + + function omp_capture_affinity(buffer, format) + import + character (len=*) :: format + character (len=*) :: buffer + integer (kind=kmp_size_t_kind) :: omp_capture_affinity + end function omp_capture_affinity + ! *** ! *** kmp_* entry points ! *** @@ -637,6 +658,10 @@ !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_task_priority +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_affinity_format +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_affinity_format +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_display_affinity +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_capture_affinity !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime @@ -710,6 +735,10 @@ !$omp declare target(omp_unset_nest_lock ) !$omp declare target(omp_test_nest_lock ) !$omp declare target(omp_get_max_task_priority ) +!$omp declare target(omp_set_affinity_format ) +!$omp declare target(omp_get_affinity_format ) +!$omp declare target(omp_display_affinity ) +!$omp declare target(omp_capture_affinity ) !$omp declare target(kmp_set_stacksize ) !$omp declare target(kmp_set_stacksize_s ) !$omp declare target(kmp_set_blocktime ) Index: runtime/src/include/50/omp_lib.f.var =================================================================== --- runtime/src/include/50/omp_lib.f.var +++ runtime/src/include/50/omp_lib.f.var @@ -375,6 +375,27 @@ integer (kind=omp_allocator_kind) omp_get_default_allocator end function omp_get_default_allocator + subroutine omp_set_affinity_format(format) + character (len=*) format + end subroutine omp_set_affinity_format + + function omp_get_affinity_format(buffer) + use omp_lib_kinds + character (len=*) buffer + integer (kind=kmp_size_t_kind) omp_get_affinity_format + end function omp_get_affinity_format + + subroutine omp_display_affinity(format) + character (len=*) format + end subroutine omp_display_affinity + + function omp_capture_affinity(buffer, format) + use omp_lib_kinds + character (len=*) format + character (len=*) buffer + integer (kind=kmp_size_t_kind) omp_capture_affinity + end function omp_capture_affinity + ! *** ! *** kmp_* entry points ! *** @@ -594,6 +615,10 @@ !dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device !dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority !dec$ attributes alias:'OMP_CONTROL_TOOL' :: omp_control_tool +!dec$ attributes alias:'OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format +!dec$ attributes alias:'OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format +!dec$ attributes alias:'OMP_DISPLAY_AFFINITY' :: omp_display_affinity +!dec$ attributes alias:'OMP_CAPTURE_AFFINITY' :: omp_capture_affinity !dec$ attributes alias:'omp_init_lock' :: omp_init_lock !dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint @@ -675,6 +700,10 @@ !dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device !dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority !dec$ attributes alias:'_OMP_CONTROL_TOOL' :: omp_control_tool +!dec$ attributes alias:'_OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format +!dec$ attributes alias:'_OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format +!dec$ attributes alias:'_OMP_DISPLAY_AFFINITY' :: omp_display_affinity +!dec$ attributes alias:'_OMP_CAPTURE_AFFINITY' :: omp_capture_affinity !dec$ attributes alias:'_omp_init_lock' :: omp_init_lock !dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint @@ -758,6 +787,10 @@ !dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation !dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device !dec$ attributes alias:'omp_get_max_task_priority_'::omp_get_max_task_priority +!dec$ attributes alias:'omp_set_affinity_format_' :: omp_set_affinity_format +!dec$ attributes alias:'omp_get_affinity_format_' :: omp_get_affinity_format +!dec$ attributes alias:'omp_display_affinity_' :: omp_display_affinity +!dec$ attributes alias:'omp_capture_affinity_' :: omp_capture_affinity !dec$ attributes alias:'omp_init_lock_'::omp_init_lock !dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint @@ -852,6 +885,10 @@ !dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock !dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock !dec$ attributes alias:'_omp_control_tool_'::omp_control_tool +!dec$ attributes alias:'_omp_set_affinity_format_' :: omp_set_affinity_format +!dec$ attributes alias:'_omp_get_affinity_format_' :: omp_get_affinity_format +!dec$ attributes alias:'_omp_display_affinity_' :: omp_display_affinity +!dec$ attributes alias:'_omp_capture_affinity_' :: omp_capture_affinity !dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize !dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s Index: runtime/src/include/50/omp_lib.f90.var =================================================================== --- runtime/src/include/50/omp_lib.f90.var +++ runtime/src/include/50/omp_lib.f90.var @@ -391,6 +391,27 @@ integer (kind=omp_allocator_kind) omp_get_default_allocator end function omp_get_default_allocator + subroutine omp_set_affinity_format(format) + character (len=*) :: format + end subroutine omp_set_affinity_format + + function omp_get_affinity_format(buffer) + use omp_lib_kinds + character (len=*) :: buffer + integer (kind=kmp_size_t_kind) :: omp_get_affinity_format + end function omp_get_affinity_format + + subroutine omp_display_affinity(format) + character (len=*) :: format + end subroutine omp_display_affinity + + function omp_capture_affinity(buffer, format) + use omp_lib_kinds + character (len=*) :: format + character (len=*) :: buffer + integer (kind=kmp_size_t_kind) :: omp_capture_affinity + end function omp_capture_affinity + ! *** ! *** kmp_* entry points ! *** Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -129,6 +129,11 @@ #include "ompt-internal.h" #endif +#if OMP_50_ENABLED +// Affinity format function +#include "kmp_str.h" +#endif + // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. // 3 - fast allocation using sync, non-sync free lists of any size, non-self // free lists of limited size. @@ -793,6 +798,12 @@ #endif /* OMP_40_ENABLED */ +#if OMP_50_ENABLED +extern int __kmp_display_affinity; +extern char *__kmp_affinity_format; +static const size_t KMP_AFFINITY_FORMAT_SIZE = 512; +#endif // OMP_50_ENABLED + #if KMP_AFFINITY_SUPPORTED #define KMP_PLACE_ALL (-1) #define KMP_PLACE_UNDEFINED (-2) @@ -2495,6 +2506,10 @@ int th_last_place; /* last place in partition */ #endif #endif +#if OMP_50_ENABLED + int th_prev_level; /* previous level for affinity format */ + int th_prev_num_threads; /* previous num_threads for affinity format */ +#endif #if USE_ITT_BUILD kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ @@ -2688,6 +2703,9 @@ int t_first_place; // first & last place in parent thread's partition. int t_last_place; // Restore these values to master after par region. #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED +#if OMP_50_ENABLED + int t_display_affinity; +#endif int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via // omp_set_num_threads() call #if OMP_50_ENABLED @@ -3371,6 +3389,8 @@ #if KMP_AFFINITY_SUPPORTED extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask); +extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, + kmp_affin_mask_t *mask); extern void __kmp_affinity_initialize(void); extern void __kmp_affinity_uninitialize(void); extern void __kmp_affinity_set_init_mask( @@ -3390,6 +3410,14 @@ extern int kmp_set_thread_affinity_mask_initial(void); #endif #endif /* KMP_AFFINITY_SUPPORTED */ +#if OMP_50_ENABLED +// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the +// format string is for affinity, so platforms that do not support +// affinity can still use the other fields, e.g., %n for num_threads +extern size_t __kmp_aux_capture_affinity(int gtid, const char *format, + kmp_str_buf_t *buffer); +extern void __kmp_aux_display_affinity(int gtid, const char *format); +#endif extern void __kmp_cleanup_hierarchy(); extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); @@ -3542,6 +3570,8 @@ #if OMP_40_ENABLED extern int __kmp_invoke_teams_master(int gtid); extern void __kmp_teams_master(int gtid); +extern int __kmp_aux_get_team_num(); +extern int __kmp_aux_get_num_teams(); #endif extern void __kmp_save_internal_controls(kmp_info_t *thread); extern void __kmp_user_set_library(enum library_type arg); Index: runtime/src/kmp_affinity.cpp =================================================================== --- runtime/src/kmp_affinity.cpp +++ runtime/src/kmp_affinity.cpp @@ -83,55 +83,135 @@ } } +#define KMP_ADVANCE_SCAN(scan) \ + while (*scan != '\0') { \ + scan++; \ + } + // Print the affinity mask to the character array in a pretty format. +// The format is a comma separated list of non-negative integers or integer +// ranges: e.g., 1,2,3-5,7,9-15 +// The format can also be the string "{}" if no bits are set in mask char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) { + int start = 0, finish = 0, previous = 0; + bool first_range; + KMP_ASSERT(buf); KMP_ASSERT(buf_len >= 40); + KMP_ASSERT(mask); char *scan = buf; char *end = buf + buf_len - 1; - // Find first element / check for empty set. - int i; - i = mask->begin(); - if (i == mask->end()) { + // Check for empty set. + if (mask->begin() == mask->end()) { KMP_SNPRINTF(scan, end - scan + 1, "{}"); - while (*scan != '\0') - scan++; + KMP_ADVANCE_SCAN(scan); KMP_ASSERT(scan <= end); return buf; } - KMP_SNPRINTF(scan, end - scan + 1, "{%d", i); - while (*scan != '\0') - scan++; - i++; - for (; i != mask->end(); i = mask->next(i)) { - if (!KMP_CPU_ISSET(i, mask)) { - continue; + first_range = true; + start = mask->begin(); + while (1) { + // Find next range + // [start, previous] is inclusive range of contiguous bits in mask + for (finish = mask->next(start), previous = start; + finish == previous + 1 && finish != mask->end(); + finish = mask->next(finish)) { + previous = finish; } - // Check for buffer overflow. A string of the form "," will have at most - // 10 characters, plus we want to leave room to print ",...}" if the set is - // too large to print for a total of 15 characters. We already left room for - // '\0' in setting end. - if (end - scan < 15) { - break; + // The first range does not need a comma printed before it, but the rest + // of the ranges do need a comma beforehand + if (!first_range) { + KMP_SNPRINTF(scan, end - scan + 1, "%s", ","); + KMP_ADVANCE_SCAN(scan); + } else { + first_range = false; } - KMP_SNPRINTF(scan, end - scan + 1, ",%-d", i); - while (*scan != '\0') - scan++; - } - if (i != mask->end()) { - KMP_SNPRINTF(scan, end - scan + 1, ",..."); - while (*scan != '\0') - scan++; + // Range with three or more contiguous bits in the affinity mask + if (previous - start > 1) { + KMP_SNPRINTF(scan, end - scan + 1, "%d-%d", static_cast(start), + static_cast(previous)); + } else { + // Range with one or two contiguous bits in the affinity mask + KMP_SNPRINTF(scan, end - scan + 1, "%d", static_cast(start)); + KMP_ADVANCE_SCAN(scan); + if (previous - start > 0) { + KMP_SNPRINTF(scan, end - scan + 1, ",%d", static_cast(previous)); + } + } + KMP_ADVANCE_SCAN(scan); + // Start over with new start point + start = finish; + if (start == mask->end()) + break; + // Check for overflow + if (end - scan < 2) + break; } - KMP_SNPRINTF(scan, end - scan + 1, "}"); - while (*scan != '\0') - scan++; + + // Check for overflow KMP_ASSERT(scan <= end); return buf; } +#undef KMP_ADVANCE_SCAN + +// Print the affinity mask to the string buffer object in a pretty format +// The format is a comma separated list of non-negative integers or integer +// ranges: e.g., 1,2,3-5,7,9-15 +// The format can also be the string "{}" if no bits are set in mask +kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, + kmp_affin_mask_t *mask) { + int start = 0, finish = 0, previous = 0; + bool first_range; + KMP_ASSERT(buf); + KMP_ASSERT(mask); + + __kmp_str_buf_clear(buf); + + // Check for empty set. + if (mask->begin() == mask->end()) { + __kmp_str_buf_print(buf, "%s", "{}"); + return buf; + } + + first_range = true; + start = mask->begin(); + while (1) { + // Find next range + // [start, previous] is inclusive range of contiguous bits in mask + for (finish = mask->next(start), previous = start; + finish == previous + 1 && finish != mask->end(); + finish = mask->next(finish)) { + previous = finish; + } + + // The first range does not need a comma printed before it, but the rest + // of the ranges do need a comma beforehand + if (!first_range) { + __kmp_str_buf_print(buf, "%s", ","); + } else { + first_range = false; + } + // Range with three or more contiguous bits in the affinity mask + if (previous - start > 1) { + __kmp_str_buf_print(buf, "%d-%d", static_cast(start), + static_cast(previous)); + } else { + // Range with one or two contiguous bits in the affinity mask + __kmp_str_buf_print(buf, "%d", static_cast(start)); + if (previous - start > 0) { + __kmp_str_buf_print(buf, ",%d", static_cast(previous)); + } + } + // Start over with new start point + start = finish; + if (start == mask->end()) + break; + } + return buf; +} void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) { KMP_CPU_ZERO(mask); Index: runtime/src/kmp_barrier.cpp =================================================================== --- runtime/src/kmp_barrier.cpp +++ runtime/src/kmp_barrier.cpp @@ -1698,6 +1698,11 @@ if (__kmp_tasking_mode != tskm_immediate_exec) { __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); } +#if OMP_50_ENABLED + if (__kmp_display_affinity) { + KMP_CHECK_UPDATE(team->t.t_display_affinity, 0); + } +#endif #if KMP_STATS_ENABLED // Have master thread flag the workers to indicate they are now waiting for // next parallel region, Also wake them up so they switch their timers to @@ -1985,6 +1990,19 @@ } #endif #if OMP_50_ENABLED + // Perform the display affinity functionality + if (__kmp_display_affinity) { + if (team->t.t_display_affinity +#if KMP_AFFINITY_SUPPORTED + || (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) +#endif + ) { + // NULL means use the affinity-format-var ICV + __kmp_aux_display_affinity(gtid, NULL); + this_thr->th.th_prev_num_threads = team->t.t_nproc; + this_thr->th.th_prev_level = team->t.t_level; + } + } if (!KMP_MASTER_TID(tid)) KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator); #endif Index: runtime/src/kmp_csupport.cpp =================================================================== --- runtime/src/kmp_csupport.cpp +++ runtime/src/kmp_csupport.cpp @@ -1867,6 +1867,59 @@ return __kmp_get_team_size(__kmp_entry_gtid(), level); } +#if OMP_50_ENABLED +/* OpenMP 5.0 Affinity Format API */ + +void ompc_set_affinity_format(char const *format) { + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, + format, KMP_STRLEN(format) + 1); +} + +size_t ompc_get_affinity_format(char *buffer, size_t size) { + size_t format_size; + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + format_size = KMP_STRLEN(__kmp_affinity_format); + if (buffer && size) { + __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, + format_size + 1); + } + return format_size; +} + +void ompc_display_affinity(char const *format) { + int gtid; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + gtid = __kmp_get_gtid(); + __kmp_aux_display_affinity(gtid, format); +} + +size_t ompc_capture_affinity(char *buffer, size_t buf_size, + char const *format) { + int gtid; + size_t num_required; + kmp_str_buf_t capture_buf; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + gtid = __kmp_get_gtid(); + __kmp_str_buf_init(&capture_buf); + num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); + if (buffer && buf_size) { + __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, + capture_buf.used + 1); + } + __kmp_str_buf_free(&capture_buf); + return num_required; +} +#endif /* OMP_50_ENABLED */ + void kmpc_set_stacksize(int arg) { // __kmp_aux_set_stacksize initializes the library if needed __kmp_aux_set_stacksize(arg); Index: runtime/src/kmp_ftn_entry.h =================================================================== --- runtime/src/kmp_ftn_entry.h +++ runtime/src/kmp_ftn_entry.h @@ -21,6 +21,12 @@ #include "kmp_i18n.h" +#if OMP_50_ENABLED +// For affinity format functions +#include "kmp_io.h" +#include "kmp_str.h" +#endif + #if OMPT_SUPPORT #include "ompt-specific.h" #endif @@ -389,6 +395,137 @@ __kmpc_free(__kmp_entry_gtid(), ptr, allocator); #endif } + +/* OpenMP 5.0 affinity format support */ + +#ifndef KMP_STUB +static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size, + char const *csrc, size_t csrc_size) { + size_t capped_src_size = csrc_size; + if (csrc_size >= buf_size) { + capped_src_size = buf_size - 1; + } + KMP_STRNCPY_S(buffer, buf_size, csrc, capped_src_size); + if (csrc_size >= buf_size) { + KMP_DEBUG_ASSERT(buffer[buf_size - 1] == '\0'); + buffer[buf_size - 1] = csrc[buf_size - 1]; + } else { + for (size_t i = csrc_size; i < buf_size; ++i) + buffer[i] = ' '; + } +} + +// Convert a Fortran string to a C string by adding null byte +class ConvertedString { + char *buf; + kmp_info_t *th; + +public: + ConvertedString(char const *fortran_str, size_t size) { + th = __kmp_get_thread(); + buf = (char *)__kmp_thread_malloc(th, size + 1); + KMP_STRNCPY_S(buf, size + 1, fortran_str, size); + buf[size] = '\0'; + } + ~ConvertedString() { __kmp_thread_free(th, buf); } + const char *get() const { return buf; } +}; +#endif // KMP_STUB + +/* + * Set the value of the affinity-format-var ICV on the current device to the + * format specified in the argument. +*/ +void FTN_STDCALL FTN_SET_AFFINITY_FORMAT(char const *format, size_t size) { +#ifdef KMP_STUB + return; +#else + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + ConvertedString cformat(format, size); + // Since the __kmp_affinity_format variable is a C string, do not + // use the fortran strncpy function + __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, + cformat.get(), KMP_STRLEN(cformat.get())); +#endif +} + +/* + * Returns the number of characters required to hold the entire affinity format + * specification (not including null byte character) and writes the value of the + * affinity-format-var ICV on the current device to buffer. If the return value + * is larger than size, the affinity format specification is truncated. +*/ +size_t FTN_STDCALL FTN_GET_AFFINITY_FORMAT(char *buffer, size_t size) { +#ifdef KMP_STUB + return 0; +#else + size_t format_size; + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + format_size = KMP_STRLEN(__kmp_affinity_format); + if (buffer && size) { + __kmp_fortran_strncpy_truncate(buffer, size, __kmp_affinity_format, + format_size); + } + return format_size; +#endif +} + +/* + * Prints the thread affinity information of the current thread in the format + * specified by the format argument. If the format is NULL or a zero-length + * string, the value of the affinity-format-var ICV is used. +*/ +void FTN_STDCALL FTN_DISPLAY_AFFINITY(char const *format, size_t size) { +#ifdef KMP_STUB + return; +#else + int gtid; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + gtid = __kmp_get_gtid(); + ConvertedString cformat(format, size); + __kmp_aux_display_affinity(gtid, cformat.get()); +#endif +} + +/* + * Returns the number of characters required to hold the entire affinity format + * specification (not including null byte) and prints the thread affinity + * information of the current thread into the character string buffer with the + * size of size in the format specified by the format argument. If the format is + * NULL or a zero-length string, the value of the affinity-format-var ICV is + * used. The buffer must be allocated prior to calling the routine. If the + * return value is larger than size, the affinity format specification is + * truncated. +*/ +size_t FTN_STDCALL FTN_CAPTURE_AFFINITY(char *buffer, char const *format, + size_t buf_size, size_t for_size) { +#if defined(KMP_STUB) + return 0; +#else + int gtid; + size_t num_required; + kmp_str_buf_t capture_buf; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + gtid = __kmp_get_gtid(); + __kmp_str_buf_init(&capture_buf); + ConvertedString cformat(format, for_size); + num_required = __kmp_aux_capture_affinity(gtid, cformat.get(), &capture_buf); + if (buffer && buf_size) { + __kmp_fortran_strncpy_truncate(buffer, buf_size, capture_buf.str, + capture_buf.used); + } + __kmp_str_buf_free(&capture_buf); + return num_required; +#endif +} #endif /* OMP_50_ENABLED */ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) { @@ -777,34 +914,7 @@ #ifdef KMP_STUB return 1; #else - kmp_info_t *thr = __kmp_entry_thread(); - if (thr->th.th_teams_microtask) { - kmp_team_t *team = thr->th.th_team; - int tlevel = thr->th.th_teams_level; - int ii = team->t.t_level; // the level of the teams construct - int dd = team->t.t_serialized; - int level = tlevel + 1; - KMP_DEBUG_ASSERT(ii >= tlevel); - while (ii > level) { - for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { - } - if (team->t.t_serialized && (!dd)) { - team = team->t.t_parent; - continue; - } - if (ii > level) { - team = team->t.t_parent; - ii--; - } - } - if (dd > 1) { - return 1; // teams region is serialized ( 1 team of 1 thread ). - } else { - return team->t.t_parent->t.t_nproc; - } - } else { - return 1; - } + return __kmp_aux_get_num_teams(); #endif } @@ -812,34 +922,7 @@ #ifdef KMP_STUB return 0; #else - kmp_info_t *thr = __kmp_entry_thread(); - if (thr->th.th_teams_microtask) { - kmp_team_t *team = thr->th.th_team; - int tlevel = thr->th.th_teams_level; // the level of the teams construct - int ii = team->t.t_level; - int dd = team->t.t_serialized; - int level = tlevel + 1; - KMP_DEBUG_ASSERT(ii >= tlevel); - while (ii > level) { - for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { - } - if (team->t.t_serialized && (!dd)) { - team = team->t.t_parent; - continue; - } - if (ii > level) { - team = team->t.t_parent; - ii--; - } - } - if (dd > 1) { - return 0; // teams region is serialized ( 1 team of 1 thread ). - } else { - return team->t.t_master_tid; - } - } else { - return 0; - } + return __kmp_aux_get_team_num(); #endif } Index: runtime/src/kmp_ftn_os.h =================================================================== --- runtime/src/kmp_ftn_os.h +++ runtime/src/kmp_ftn_os.h @@ -139,6 +139,10 @@ #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator #define FTN_ALLOC omp_alloc #define FTN_FREE omp_free +#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format +#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format +#define FTN_DISPLAY_AFFINITY omp_display_affinity +#define FTN_CAPTURE_AFFINITY omp_capture_affinity #endif #endif /* KMP_FTN_PLAIN */ @@ -265,6 +269,10 @@ #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_ #define FTN_ALLOC omp_alloc_ #define FTN_FREE omp_free_ +#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format_ +#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format_ +#define FTN_DISPLAY_AFFINITY omp_display_affinity_ +#define FTN_CAPTURE_AFFINITY omp_capture_affinity_ #endif #endif /* KMP_FTN_APPEND */ @@ -391,6 +399,10 @@ #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR #define FTN_ALLOC OMP_ALLOC #define FTN_FREE OMP_FREE +#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT +#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT +#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY +#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY #endif #endif /* KMP_FTN_UPPER */ @@ -517,6 +529,10 @@ #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_ #define FTN_ALLOC OMP_ALLOC_ #define FTN_FREE OMP_FREE_ +#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT_ +#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT_ +#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY_ +#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY_ #endif #endif /* KMP_FTN_UAPPEND */ Index: runtime/src/kmp_global.cpp =================================================================== --- runtime/src/kmp_global.cpp +++ runtime/src/kmp_global.cpp @@ -282,6 +282,11 @@ int __kmp_affinity_num_places = 0; #endif +#if OMP_50_ENABLED +int __kmp_display_affinity = FALSE; +char *__kmp_affinity_format = NULL; +#endif // OMP_50_ENABLED + kmp_hws_item_t __kmp_hws_socket = {0, 0}; kmp_hws_item_t __kmp_hws_node = {0, 0}; kmp_hws_item_t __kmp_hws_tile = {0, 0}; Index: runtime/src/kmp_io.h =================================================================== --- runtime/src/kmp_io.h +++ runtime/src/kmp_io.h @@ -26,9 +26,10 @@ extern kmp_bootstrap_lock_t __kmp_console_lock; /* Control console initialization */ -extern void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap); +extern void __kmp_vprintf(enum kmp_io stream, char const *format, va_list ap); extern void __kmp_printf(char const *format, ...); extern void __kmp_printf_no_lock(char const *format, ...); +extern void __kmp_fprintf(enum kmp_io stream, char const *format, ...); extern void __kmp_close_console(void); #ifdef __cplusplus Index: runtime/src/kmp_io.cpp =================================================================== --- runtime/src/kmp_io.cpp +++ runtime/src/kmp_io.cpp @@ -42,10 +42,7 @@ #if KMP_OS_WINDOWS -#ifdef KMP_DEBUG -/* __kmp_stdout is used only for dev build */ static HANDLE __kmp_stdout = NULL; -#endif static HANDLE __kmp_stderr = NULL; static int __kmp_console_exists = FALSE; static kmp_str_buf_t __kmp_console_buf; @@ -72,10 +69,7 @@ /* wait until user presses return before closing window */ /* TODO only close if a window was opened */ if (__kmp_console_exists) { -#ifdef KMP_DEBUG - /* standard out is used only in dev build */ __kmp_stdout = NULL; -#endif __kmp_stderr = NULL; __kmp_str_buf_free(&__kmp_console_buf); __kmp_console_exists = FALSE; @@ -88,21 +82,17 @@ __kmp_acquire_bootstrap_lock(&__kmp_console_lock); if (!__kmp_console_exists) { -#ifdef KMP_DEBUG - /* standard out is used only in dev build */ HANDLE ho; -#endif HANDLE he; __kmp_str_buf_init(&__kmp_console_buf); AllocConsole(); -// We do not check the result of AllocConsole because -// 1. the call is harmless -// 2. it is not clear how to communicate failue -// 3. we will detect failure later when we get handle(s) + // We do not check the result of AllocConsole because + // 1. the call is harmless + // 2. it is not clear how to communicate failue + // 3. we will detect failure later when we get handle(s) -#ifdef KMP_DEBUG ho = GetStdHandle(STD_OUTPUT_HANDLE); if (ho == INVALID_HANDLE_VALUE || ho == NULL) { @@ -114,7 +104,6 @@ __kmp_stdout = ho; // temporary code, need new global for ho } -#endif he = GetStdHandle(STD_ERROR_HANDLE); if (he == INVALID_HANDLE_VALUE || he == NULL) { @@ -133,22 +122,22 @@ #else #define __kmp_stderr (stderr) +#define __kmp_stdout (stdout) #endif /* KMP_OS_WINDOWS */ -void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap) { +void __kmp_vprintf(enum kmp_io out_stream, char const *format, va_list ap) { #if KMP_OS_WINDOWS if (!__kmp_console_exists) { __kmp_redirect_output(); } - if (!__kmp_stderr && __kmp_io == kmp_err) { + if (!__kmp_stderr && out_stream == kmp_err) { return; } -#ifdef KMP_DEBUG - if (!__kmp_stdout && __kmp_io == kmp_out) { + if (!__kmp_stdout && out_stream == kmp_out) { return; } -#endif #endif /* KMP_OS_WINDOWS */ + auto stream = ((out_stream == kmp_out) ? __kmp_stdout : __kmp_stderr); if (__kmp_debug_buf && __kmp_debug_buffer != NULL) { @@ -170,14 +159,14 @@ "overflow; increase " "KMP_DEBUG_BUF_CHARS to %d\n", chars + 1); - WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, - &count, NULL); + WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count, + NULL); __kmp_str_buf_clear(&__kmp_console_buf); #else - fprintf(__kmp_stderr, "OMP warning: Debugging buffer overflow; " - "increase KMP_DEBUG_BUF_CHARS to %d\n", + fprintf(stream, "OMP warning: Debugging buffer overflow; " + "increase KMP_DEBUG_BUF_CHARS to %d\n", chars + 1); - fflush(__kmp_stderr); + fflush(stream); #endif __kmp_debug_buf_warn_chars = chars + 1; } @@ -192,15 +181,15 @@ __kmp_str_buf_print(&__kmp_console_buf, "pid=%d: ", (kmp_int32)getpid()); #endif __kmp_str_buf_vprint(&__kmp_console_buf, format, ap); - WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, - &count, NULL); + WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count, + NULL); __kmp_str_buf_clear(&__kmp_console_buf); #else #ifdef KMP_DEBUG_PIDS - fprintf(__kmp_stderr, "pid=%d: ", (kmp_int32)getpid()); + fprintf(stream, "pid=%d: ", (kmp_int32)getpid()); #endif - vfprintf(__kmp_stderr, format, ap); - fflush(__kmp_stderr); + vfprintf(stream, format, ap); + fflush(stream); #endif } } @@ -224,3 +213,14 @@ va_end(ap); } + +void __kmp_fprintf(enum kmp_io stream, char const *format, ...) { + va_list ap; + va_start(ap, format); + + __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); + __kmp_vprintf(stream, format, ap); + __kmp_release_bootstrap_lock(&__kmp_stdio_lock); + + va_end(ap); +} Index: runtime/src/kmp_os.h =================================================================== --- runtime/src/kmp_os.h +++ runtime/src/kmp_os.h @@ -101,6 +101,7 @@ #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ #if KMP_OS_WINDOWS +#define KMP_END_OF_LINE "\r\n" typedef char kmp_int8; typedef unsigned char kmp_uint8; typedef short kmp_int16; @@ -132,6 +133,7 @@ #endif /* KMP_OS_WINDOWS */ #if KMP_OS_UNIX +#define KMP_END_OF_LINE "\n" typedef char kmp_int8; typedef unsigned char kmp_uint8; typedef short kmp_int16; Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -1092,6 +1092,19 @@ #endif } +#if OMP_50_ENABLED + if (__kmp_display_affinity && team->t.t_display_affinity != 1) { + for (i = 0; i < team->t.t_nproc; i++) { + kmp_info_t *thr = team->t.t_threads[i]; + if (thr->th.th_prev_num_threads != team->t.t_nproc || + thr->th.th_prev_level != team->t.t_level) { + team->t.t_display_affinity = 1; + break; + } + } + } +#endif + KMP_MB(); } @@ -1751,6 +1764,19 @@ ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); __kmpc_serialized_parallel(loc, gtid); +#if OMP_50_ENABLED + // Perform the display affinity functionality for + // serialized parallel regions + if (__kmp_display_affinity) { + if (master_th->th.th_prev_level != master_th->th.th_team->t.t_level || + master_th->th.th_prev_num_threads != 1) { + // NULL means use the affinity-format-var ICV + __kmp_aux_display_affinity(gtid, NULL); + master_th->th.th_prev_level = master_th->th.th_team->t.t_level; + master_th->th.th_prev_num_threads = 1; + } + } +#endif if (call_context == fork_context_intel) { /* TODO this sucks, use the compiler itself to pass args! :) */ @@ -1948,7 +1974,7 @@ KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); KMP_MB(); return FALSE; - } + } // if (nthreads == 1) // GEH: only modify the executing flag in the case when not serialized // serialized case is handled in kmpc_serialized_parallel @@ -3819,6 +3845,8 @@ #endif /* KMP_AFFINITY_SUPPORTED */ #if OMP_50_ENABLED root_thread->th.th_def_allocator = __kmp_def_allocator; + root_thread->th.th_prev_level = 0; + root_thread->th.th_prev_num_threads = 1; #endif __kmp_root_counter++; @@ -4360,6 +4388,8 @@ #endif #if OMP_50_ENABLED new_thr->th.th_def_allocator = __kmp_def_allocator; + new_thr->th.th_prev_level = 0; + new_thr->th.th_prev_num_threads = 1; #endif TCW_4(new_thr->th.th_in_pool, FALSE); @@ -4548,6 +4578,12 @@ th->th.th_first_place = first_place; th->th.th_last_place = last_place; th->th.th_new_place = masters_place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && masters_place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d " "partition = [%d,%d]\n", @@ -4581,6 +4617,12 @@ th->th.th_first_place = first_place; th->th.th_last_place = last_place; th->th.th_new_place = place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " "partition = [%d,%d]\n", @@ -4602,6 +4644,12 @@ th->th.th_first_place = first_place; th->th.th_last_place = last_place; th->th.th_new_place = place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif s_count++; if ((s_count == S) && rem && (gap_ct == gap)) { @@ -4670,6 +4718,12 @@ th->th.th_first_place = place; th->th.th_new_place = place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif s_count = 1; while (s_count < S) { if (place == last_place) { @@ -4761,7 +4815,12 @@ th->th.th_first_place = first; th->th.th_new_place = place; th->th.th_last_place = last; - +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " "partition = [%d,%d], spacing = %.4f\n", @@ -4790,6 +4849,12 @@ th->th.th_first_place = place; th->th.th_last_place = place; th->th.th_new_place = place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif s_count++; if ((s_count == S) && rem && (gap_ct == gap)) { @@ -7410,6 +7475,12 @@ __kmp_nested_proc_bind.bind_types = NULL; __kmp_nested_proc_bind.size = 0; __kmp_nested_proc_bind.used = 0; +#if OMP_50_ENABLED + if (__kmp_affinity_format) { + KMP_INTERNAL_FREE(__kmp_affinity_format); + __kmp_affinity_format = NULL; + } +#endif __kmp_i18n_catclose(); @@ -7566,6 +7637,339 @@ } } +/* Getting team information common for all team API */ +// Returns NULL if not in teams construct +static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) { + kmp_info_t *thr = __kmp_entry_thread(); + teams_serialized = 0; + if (thr->th.th_teams_microtask) { + kmp_team_t *team = thr->th.th_team; + int tlevel = thr->th.th_teams_level; // the level of the teams construct + int ii = team->t.t_level; + teams_serialized = team->t.t_serialized; + int level = tlevel + 1; + KMP_DEBUG_ASSERT(ii >= tlevel); + while (ii > level) { + for (teams_serialized = team->t.t_serialized; + (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) { + } + if (team->t.t_serialized && (!teams_serialized)) { + team = team->t.t_parent; + continue; + } + if (ii > level) { + team = team->t.t_parent; + ii--; + } + } + return team; + } + return NULL; +} + +int __kmp_aux_get_team_num() { + int serialized; + kmp_team_t *team = __kmp_aux_get_team_info(serialized); + if (team) { + if (serialized > 1) { + return 0; // teams region is serialized ( 1 team of 1 thread ). + } else { + return team->t.t_master_tid; + } + } + return 0; +} + +int __kmp_aux_get_num_teams() { + int serialized; + kmp_team_t *team = __kmp_aux_get_team_info(serialized); + if (team) { + if (serialized > 1) { + return 1; + } else { + return team->t.t_parent->t.t_nproc; + } + } + return 1; +} + +/* ------------------------------------------------------------------------ */ + +#if OMP_50_ENABLED +/* + * Affinity Format Parser + * + * Field is in form of: %[[[0].]size]type + * % and type are required (%% means print a literal '%') + * type is either single char or long name surrounded by {}, + * e.g., N or {num_threads} + * 0 => leading zeros + * . => right justified when size is specified + * by default output is left justified + * size is the *minimum* field length + * All other characters are printed as is + * + * Available field types: + * L {thread_level} - omp_get_level() + * n {thread_num} - omp_get_thread_num() + * h {host} - name of host machine + * P {process_id} - process id (integer) + * T {thread_identifier} - native thread identifier (integer) + * N {num_threads} - omp_get_num_threads() + * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1) + * a {thread_affinity} - comma separated list of integers or integer ranges + * (values of affinity mask) + * + * Implementation-specific field types can be added + * If a type is unknown, print "undefined" +*/ + +// Structure holding the short name, long name, and corresponding data type +// for snprintf. A table of these will represent the entire valid keyword +// field types. +typedef struct kmp_affinity_format_field_t { + char short_name; // from spec e.g., L -> thread level + const char *long_name; // from spec thread_level -> thread level + char field_format; // data type for snprintf (typically 'd' or 's' + // for integer or string) +} kmp_affinity_format_field_t; + +static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = { +#if KMP_AFFINITY_SUPPORTED + {'A', "thread_affinity", 's'}, +#endif + {'t', "team_num", 'd'}, + {'T', "num_teams", 'd'}, + {'L', "nesting_level", 'd'}, + {'n', "thread_num", 'd'}, + {'N', "num_threads", 'd'}, + {'a', "ancestor_tnum", 'd'}, + {'H', "host", 's'}, + {'P', "process_id", 'd'}, + {'i', "native_thread_id", 'd'}}; + +// Return the number of characters it takes to hold field +static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, + const char **ptr, + kmp_str_buf_t *field_buffer) { + int rc, format_index, field_value; + const char *width_left, *width_right; + bool pad_zeros, right_justify, parse_long_name, found_valid_name; + static const int FORMAT_SIZE = 20; + char format[FORMAT_SIZE] = {0}; + char absolute_short_name = 0; + + KMP_DEBUG_ASSERT(gtid >= 0); + KMP_DEBUG_ASSERT(th); + KMP_DEBUG_ASSERT(**ptr == '%'); + KMP_DEBUG_ASSERT(field_buffer); + + __kmp_str_buf_clear(field_buffer); + + // Skip the initial % + (*ptr)++; + + // Check for %% first + if (**ptr == '%') { + __kmp_str_buf_cat(field_buffer, "%", 1); + (*ptr)++; // skip over the second % + return 1; + } + + // Parse field modifiers if they are present + pad_zeros = false; + if (**ptr == '0') { + pad_zeros = true; + (*ptr)++; // skip over 0 + } + right_justify = false; + if (**ptr == '.') { + right_justify = true; + (*ptr)++; // skip over . + } + // Parse width of field: [width_left, width_right) + width_left = width_right = NULL; + if (**ptr >= '0' && **ptr <= '9') { + width_left = *ptr; + SKIP_DIGITS(*ptr); + width_right = *ptr; + } + + // Create the format for KMP_SNPRINTF based on flags parsed above + format_index = 0; + format[format_index++] = '%'; + if (!right_justify) + format[format_index++] = '-'; + if (pad_zeros) + format[format_index++] = '0'; + if (width_left && width_right) { + int i = 0; + // Only allow 8 digit number widths. + // This also prevents overflowing format variable + while (i < 8 && width_left < width_right) { + format[format_index++] = *width_left; + width_left++; + i++; + } + } + + // Parse a name (long or short) + // Canonicalize the name into absolute_short_name + found_valid_name = false; + parse_long_name = (**ptr == '{'); + if (parse_long_name) + (*ptr)++; // skip initial left brace + for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) / + sizeof(__kmp_affinity_format_table[0]); + ++i) { + char short_name = __kmp_affinity_format_table[i].short_name; + const char *long_name = __kmp_affinity_format_table[i].long_name; + char field_format = __kmp_affinity_format_table[i].field_format; + if (parse_long_name) { + int length = KMP_STRLEN(long_name); + if (strncmp(*ptr, long_name, length) == 0) { + found_valid_name = true; + (*ptr) += length; // skip the long name + } + } else if (**ptr == short_name) { + found_valid_name = true; + (*ptr)++; // skip the short name + } + if (found_valid_name) { + format[format_index++] = field_format; + format[format_index++] = '\0'; + absolute_short_name = short_name; + break; + } + } + if (parse_long_name) { + if (**ptr != '}') { + absolute_short_name = 0; + } else { + (*ptr)++; // skip over the right brace + } + } + + // Attempt to fill the buffer with the requested + // value using snprintf within __kmp_str_buf_print() + switch (absolute_short_name) { + case 't': + rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num()); + break; + case 'T': + rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams()); + break; + case 'L': + rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level); + break; + case 'n': + rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid)); + break; + case 'H': { + static const int BUFFER_SIZE = 256; + char buf[BUFFER_SIZE]; + __kmp_expand_host_name(buf, BUFFER_SIZE); + rc = __kmp_str_buf_print(field_buffer, format, buf); + } break; + case 'P': + rc = __kmp_str_buf_print(field_buffer, format, getpid()); + break; + case 'i': + rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()); + break; + case 'N': + rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc); + break; + case 'a': + field_value = + __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1); + rc = __kmp_str_buf_print(field_buffer, format, field_value); + break; +#if KMP_AFFINITY_SUPPORTED + case 'A': { + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); + rc = __kmp_str_buf_print(field_buffer, format, buf.str); + __kmp_str_buf_free(&buf); + } break; +#endif + default: + // According to spec, If an implementation does not have info for field + // type, then "undefined" is printed + rc = __kmp_str_buf_print(field_buffer, "%s", "undefined"); + // Skip the field + if (parse_long_name) { + SKIP_TOKEN(*ptr); + if (**ptr == '}') + (*ptr)++; + } else { + (*ptr)++; + } + } + + KMP_ASSERT(format_index <= FORMAT_SIZE); + return rc; +} + +/* + * Return number of characters needed to hold the affinity string + * (not including null byte character) + * The resultant string is printed to buffer, which the caller can then + * handle afterwards +*/ +size_t __kmp_aux_capture_affinity(int gtid, const char *format, + kmp_str_buf_t *buffer) { + const char *parse_ptr; + size_t retval; + const kmp_info_t *th; + kmp_str_buf_t field; + + KMP_DEBUG_ASSERT(buffer); + KMP_DEBUG_ASSERT(gtid >= 0); + + __kmp_str_buf_init(&field); + __kmp_str_buf_clear(buffer); + + th = __kmp_threads[gtid]; + retval = 0; + + // If format is NULL or zero-length string, then we use + // affinity-format-var ICV + parse_ptr = format; + if (parse_ptr == NULL || *parse_ptr == '\0') { + parse_ptr = __kmp_affinity_format; + } + KMP_DEBUG_ASSERT(parse_ptr); + + while (*parse_ptr != '\0') { + // Parse a field + if (*parse_ptr == '%') { + // Put field in the buffer + int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field); + __kmp_str_buf_catbuf(buffer, &field); + retval += rc; + } else { + // Put literal character in buffer + __kmp_str_buf_cat(buffer, parse_ptr, 1); + retval++; + parse_ptr++; + } + } + __kmp_str_buf_free(&field); + return retval; +} + +// Displays the affinity string to stdout +void __kmp_aux_display_affinity(int gtid, const char *format) { + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + __kmp_aux_capture_affinity(gtid, format, &buf); + __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str); + __kmp_str_buf_free(&buf); +} +#endif // OMP_50_ENABLED + /* ------------------------------------------------------------------------ */ void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { Index: runtime/src/kmp_safe_c_api.h =================================================================== --- runtime/src/kmp_safe_c_api.h +++ runtime/src/kmp_safe_c_api.h @@ -11,6 +11,9 @@ #ifndef KMP_SAFE_C_API_H #define KMP_SAFE_C_API_H +#include "kmp_platform.h" +#include + // Replacement for banned C API // Not every unsafe call listed here is handled now, but keeping everything @@ -57,4 +60,16 @@ #endif // KMP_OS_WINDOWS +// Offer truncated version of strncpy +static inline void __kmp_strncpy_truncate(char *buffer, size_t buf_size, + char const *src, size_t src_size) { + if (src_size >= buf_size) { + src_size = buf_size - 1; + KMP_STRNCPY_S(buffer, buf_size, src, src_size); + buffer[buf_size - 1] = '\0'; + } else { + KMP_STRNCPY_S(buffer, buf_size, src, src_size); + } +} + #endif // KMP_SAFE_C_API_H Index: runtime/src/kmp_settings.cpp =================================================================== --- runtime/src/kmp_settings.cpp +++ runtime/src/kmp_settings.cpp @@ -3252,7 +3252,29 @@ #endif /* OMP_40_ENABLED */ #if OMP_50_ENABLED - +static void __kmp_stg_parse_display_affinity(char const *name, + char const *value, void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_display_affinity); +} +static void __kmp_stg_print_display_affinity(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_display_affinity); +} +static void __kmp_stg_parse_affinity_format(char const *name, char const *value, + void *data) { + size_t length = KMP_STRLEN(value); + __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, value, + length); +} +static void __kmp_stg_print_affinity_format(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print(buffer, " %s='", name); + } + __kmp_str_buf_print(buffer, "%s'\n", __kmp_affinity_format); +} // OMP_ALLOCATOR sets default allocator static void __kmp_stg_parse_allocator(char const *name, char const *value, void *data) { @@ -4879,7 +4901,12 @@ #endif #endif // KMP_AFFINITY_SUPPORTED - +#if OMP_50_ENABLED + {"OMP_DISPLAY_AFFINITY", __kmp_stg_parse_display_affinity, + __kmp_stg_print_display_affinity, NULL, 0, 0}, + {"OMP_AFFINITY_FORMAT", __kmp_stg_parse_affinity_format, + __kmp_stg_print_affinity_format, NULL, 0, 0}, +#endif {"KMP_INIT_AT_FORK", __kmp_stg_parse_init_at_fork, __kmp_stg_print_init_at_fork, NULL, 0, 0}, {"KMP_SCHEDULE", __kmp_stg_parse_schedule, __kmp_stg_print_schedule, NULL, @@ -5409,6 +5436,21 @@ } #endif /* OMP_40_ENABLED */ +#if OMP_50_ENABLED + // Set up the affinity format ICV + // Grab the default affinity format string from the message catalog + kmp_msg_t m = + __kmp_msg_format(kmp_i18n_msg_AffFormatDefault, "%P", "%i", "%n", "%A"); + KMP_DEBUG_ASSERT(KMP_STRLEN(m.str) < KMP_AFFINITY_FORMAT_SIZE); + + if (__kmp_affinity_format == NULL) { + __kmp_affinity_format = + (char *)KMP_INTERNAL_MALLOC(sizeof(char) * KMP_AFFINITY_FORMAT_SIZE); + } + KMP_STRCPY_S(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, m.str); + __kmp_str_free(&m.str); +#endif + // Now process all of the settings. for (i = 0; i < block.count; ++i) { __kmp_stg_parse(block.vars[i].name, block.vars[i].value); Index: runtime/src/kmp_str.h =================================================================== --- runtime/src/kmp_str.h +++ runtime/src/kmp_str.h @@ -51,9 +51,10 @@ void __kmp_str_buf_detach(kmp_str_buf_t *buffer); void __kmp_str_buf_free(kmp_str_buf_t *buffer); void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len); -void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, - va_list args); -void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...); +void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src); +int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, + va_list args); +int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...); void __kmp_str_buf_print_size(kmp_str_buf_t *buffer, size_t size); /* File name parser. Index: runtime/src/kmp_str.cpp =================================================================== --- runtime/src/kmp_str.cpp +++ runtime/src/kmp_str.cpp @@ -143,13 +143,28 @@ KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_cat -void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, - va_list args) { +void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src) { + KMP_DEBUG_ASSERT(dest); + KMP_DEBUG_ASSERT(src); + KMP_STR_BUF_INVARIANT(dest); + KMP_STR_BUF_INVARIANT(src); + if (!src->str || !src->used) + return; + __kmp_str_buf_reserve(dest, dest->used + src->used + 1); + KMP_MEMCPY(dest->str + dest->used, src->str, src->used); + dest->str[dest->used + src->used] = 0; + dest->used += src->used; + KMP_STR_BUF_INVARIANT(dest); +} // __kmp_str_buf_catbuf + +// Return the number of characters written +int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, + va_list args) { + int rc; KMP_STR_BUF_INVARIANT(buffer); for (;;) { int const free = buffer->size - buffer->used; - int rc; int size; // Try to format string. @@ -198,13 +213,17 @@ KMP_DEBUG_ASSERT(buffer->size > 0); KMP_STR_BUF_INVARIANT(buffer); + return rc; } // __kmp_str_buf_vprint -void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) { +// Return the number of characters written +int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) { + int rc; va_list args; va_start(args, format); - __kmp_str_buf_vprint(buffer, format, args); + rc = __kmp_str_buf_vprint(buffer, format, args); va_end(args); + return rc; } // __kmp_str_buf_print /* The function prints specified size to buffer. Size is expressed using biggest Index: runtime/src/kmp_stub.cpp =================================================================== --- runtime/src/kmp_stub.cpp +++ runtime/src/kmp_stub.cpp @@ -35,6 +35,10 @@ #define omp_set_num_threads ompc_set_num_threads #define omp_set_dynamic ompc_set_dynamic #define omp_set_nested ompc_set_nested +#define omp_set_affinity_format ompc_set_affinity_format +#define omp_get_affinity_format ompc_get_affinity_format +#define omp_display_affinity ompc_display_affinity +#define omp_capture_affinity ompc_capture_affinity #define kmp_set_stacksize kmpc_set_stacksize #define kmp_set_stacksize_s kmpc_set_stacksize_s #define kmp_set_blocktime kmpc_set_blocktime @@ -350,6 +354,17 @@ const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6; const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7; const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8; +/* OpenMP 5.0 Affinity Format */ +void omp_set_affinity_format(char const *format) { i; } +size_t omp_get_affinity_format(char *buffer, size_t size) { + i; + return 0; +} +void omp_display_affinity(char const *format) { i; } +size_t omp_capture_affinity(char *buffer, size_t buf_size, char const *format) { + i; + return 0; +} #endif /* OMP_50_ENABLED */ // end of file // Index: runtime/test/affinity/format/affinity_display.1.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/affinity_display.1.c @@ -0,0 +1,91 @@ +// RUN: %libomp-compile +// RUN: env OMP_DISPLAY_AFFINITY=TRUE OMP_NUM_THREADS=4 OMP_PLACES='{0,1},{2,3},{4,5},{6,7}' %libomp-run | python %S/check.py -c 'CHECK' %s + +// Affinity Display examples +#include +#include // also null is in +#include +#include +#include + +// ENVIRONMENT +// OMP_DISPLAY_AFFINITY=TRUE +// OMP_NUM_THREADS=4 +// OMP_PLACES='{0,1},{2,3},{4,5},{6,7}' + +// CHECK: num_threads=1 OMP: pid [0-9]+ tid [0-9]+ thread [0-4] bound to OS proc set \{(0,1)|(undefined)\} +// CHECK: num_threads=4 Thread id [0-3] reporting in +// CHECK: num_threads=4 OMP: pid [0-9]+ tid [0-9]+ thread [0-4] bound to OS proc set \{([0246],[1357])|(undefined)\} +// CHECK: num_threads=1 Default Affinity Format is: +// CHECK: num_threads=1 Affinity Format set to: host=%20H tid=%0.4n binds_to=%A +// CHECK: num_threads=4 tid=[0-3] affinity:host=[a-zA-Z0-9_.-]+[ ]+tid=000[0-4][ ]+binds_to=(([0246],[1357])|(undefined)) + +#define FORMAT_STORE 80 +#define BUFFER_STORE 80 + +int main(int argc, char** argv) { + int i, n, tid, max_req_store = 0; + size_t nchars; + char default_format[FORMAT_STORE]; + char my_format[] = "host=%20H tid=%0.4n binds_to=%A"; + char **buffer; + + // CODE SEGMENT 1 AFFINITY DISPLAY + omp_display_affinity(NULL); + + // OMP_DISPLAY_AFFINITY=TRUE, + // Affinity reported for 1 parallel region + #pragma omp parallel + { + printf("Thread id %d reporting in.\n", omp_get_thread_num()); + } + + // Get and Display Default Affinity Format + nchars = omp_get_affinity_format(default_format, (size_t)FORMAT_STORE); + printf("Default Affinity Format is: %s\n", default_format); + + if (nchars > FORMAT_STORE) { + printf("Caution: Reported Format is truncated. Increase\n"); + printf(" FORMAT_STORE by %d.\n", nchars - FORMAT_STORE); + } + + // Set Affinity Format + omp_set_affinity_format(my_format); + printf("Affinity Format set to: %s\n", my_format); + + // CODE SEGMENT 3 CAPTURE AFFINITY + // Set up buffer for affinity of n threads + n = omp_get_max_threads(); + buffer = (char **)malloc(sizeof(char *) * n); + for (i = 0; i < n; i++) { + buffer[i] = (char *)malloc(sizeof(char) * BUFFER_STORE); + } + + // Capture Affinity using Affinity Format set above. + // Use critical reduction to check size of buffer areas + #pragma omp parallel private(tid, nchars) + { + tid = omp_get_thread_num(); + nchars = omp_capture_affinity(buffer[tid], (size_t)BUFFER_STORE, NULL); + #pragma omp critical + { + if (nchars > max_req_store) + max_req_store = nchars; + } + } + + for (i = 0; i < n; i++) { + printf("tid=%d affinity:%s:\n", i, buffer[i]); + } + // for 4 threads with OMP_PLACES='{0,1},{2,3},{4,5},{6,7}' + // host=%20H tid=%0.4n binds_to=%A + // host= tid=0000 binds_to=0,1 + // host= tid=0001 binds_to=2,3 + // host= tid=0002 binds_to=4,5 + // host= tid=0003 binds_to=6,7 + + if (max_req_store > BUFFER_STORE) { + printf("Caution: Affinity string truncated. Increase\n"); + printf(" BUFFER_STORE by %d\n", max_req_store - BUFFER_STORE); + } +} Index: runtime/test/affinity/format/affinity_values.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/affinity_values.c @@ -0,0 +1,123 @@ +// RUN: %libomp-compile +// RUN: env OMP_PROC_BIND=close OMP_PLACES=threads %libomp-run +// RUN: env OMP_PROC_BIND=close OMP_PLACES=cores %libomp-run +// RUN: env OMP_PROC_BIND=close OMP_PLACES=sockets %libomp-run +// RUN: env KMP_AFFINITY=compact %libomp-run +// RUN: env KMP_AFFINITY=scatter %libomp-run +// REQUIRES: affinity + +#include +#include +#include +#include +#include "helper.h" +#define DEBUG 0 + +#if DEBUG +#include +#endif + +#define BUFFER_SIZE 1024 + +char buf[BUFFER_SIZE]; +#pragma omp threadprivate(buf) + +static int debug_printf(const char* format, ...) { + int retval = 0; +#if DEBUG + va_list args; + va_start(args, format); + retval = vprintf(format, args); + va_end(args); +#endif + return retval; +} + +static void display_affinity_environment() { +#if DEBUG + printf("Affinity Environment:\n"); + printf(" OMP_PROC_BIND=%s\n", getenv("OMP_PROC_BIND")); + printf(" OMP_PLACES=%s\n", getenv("OMP_PLACES")); + printf(" KMP_AFFINITY=%s\n", getenv("KMP_AFFINITY")); +#endif +} + +// Reads in a list of integers into ids array (not going past ids_size) +// e.g., if affinity = "0-4,6,8-10,14,16,17-20,23" +// then ids = [0,1,2,3,4,6,8,9,10,14,16,17,18,19,20,23] +void list_to_ids(const char* affinity, int* ids, int ids_size) { + int id, b, e, ids_index; + char *aff, *begin, *end, *absolute_end; + aff = strdup(affinity); + absolute_end = aff + strlen(aff); + ids_index = 0; + begin = end = aff; + while (end < absolute_end) { + end = begin; + while (*end != '\0' && *end != ',') + end++; + *end = '\0'; + if (strchr(begin, '-') != NULL) { + // Range + sscanf(begin, "%d-%d", &b, &e); + } else { + // Single Number + sscanf(begin, "%d", &b); + e = b; + } + for (id = b; id <= e; ++id) { + ids[ids_index++] = id; + if (ids_index >= ids_size) { + free(aff); + return; + } + } + begin = end + 1; + } + free(aff); +} + +void check_thread_affinity() { + int i; + const char *formats[2] = {"%{thread_affinity}", "%A"}; + for (i = 0; i < sizeof(formats) / sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel + { + int j, k; + int place = omp_get_place_num(); + int num_procs = omp_get_place_num_procs(place); + int *ids = (int *)malloc(sizeof(int) * num_procs); + int *ids2 = (int *)malloc(sizeof(int) * num_procs); + char buf[256]; + size_t n = omp_capture_affinity(buf, 256, NULL); + check(n <= 256); + omp_get_place_proc_ids(place, ids); + list_to_ids(buf, ids2, num_procs); + + #pragma omp for schedule(static) ordered + for (k = 0; k < omp_get_num_threads(); ++k) { + #pragma omp ordered + { + debug_printf("Thread %d: captured affinity = %s\n", + omp_get_thread_num(), buf); + for (j = 0; j < num_procs; ++j) { + debug_printf("Thread %d: ids[%d] = %d ids2[%d] = %d\n", + omp_get_thread_num(), j, ids[j], j, ids2[j]); + check(ids[j] == ids2[j]); + } + } + } + + free(ids); + free(ids2); + } + } +} + +int main(int argc, char** argv) { + omp_set_nested(1); + display_affinity_environment(); + check_thread_affinity(); + return 0; +} Index: runtime/test/affinity/format/api.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/api.c @@ -0,0 +1,45 @@ +// RUN: %libomp-compile-and-run +// RUN: %libomp-run | python %S/check.py -c 'CHECK' %s + +#include +#include +#include +#include +#include "helper.h" + +#define BUFFER_SIZE 1024 + +int main(int argc, char** argv) { + char buf[BUFFER_SIZE]; + size_t needed; + + omp_set_affinity_format("0123456789"); + + needed = omp_get_affinity_format(buf, BUFFER_SIZE); + check(streqls(buf, "0123456789")); + check(needed == 10) + + // Check that it is truncated properly + omp_get_affinity_format(buf, 5); + check(streqls(buf, "0123")); + + #pragma omp parallel + { + char my_buf[512]; + size_t needed = omp_capture_affinity(my_buf, 512, NULL); + check(streqls(my_buf, "0123456789")); + check(needed == 10); + // Check that it is truncated properly + omp_capture_affinity(my_buf, 5, NULL); + check(streqls(my_buf, "0123")); + } + + #pragma omp parallel num_threads(4) + { + omp_display_affinity(NULL); + } + + return 0; +} + +// CHECK: num_threads=4 0123456789 Index: runtime/test/affinity/format/api2.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/api2.c @@ -0,0 +1,69 @@ +// RUN: %libomp-compile-and-run +// RUN: %libomp-run | python %S/check.py -c 'CHECK' %s + +#include +#include +#include +#include +#include "helper.h" + +#define BUFFER_SIZE 1024 + +int main(int argc, char** argv) { + char buf[BUFFER_SIZE]; + size_t needed, length; + const char* format = "tl:%L tn:%n nt:%N an:%a"; + const char* second_format = "nesting_level:%{nesting_level} thread_num:%{thread_num} num_threads:%{num_threads} ancestor_tnum:%{ancestor_tnum}"; + + length = strlen(format); + omp_set_affinity_format(format); + + needed = omp_get_affinity_format(buf, BUFFER_SIZE); + check(streqls(buf, format)); + check(needed == length) + + // Check that it is truncated properly + omp_get_affinity_format(buf, 5); + check(streqls(buf, "tl:%")); + + #pragma omp parallel + { + char my_buf[512]; + char supposed[512]; + int tl, tn, nt, an; + size_t needed, needed2; + tl = omp_get_level(); + tn = omp_get_thread_num(); + nt = omp_get_num_threads(); + an = omp_get_ancestor_thread_num(omp_get_level()-1); + needed = omp_capture_affinity(my_buf, 512, NULL); + needed2 = (size_t)snprintf(supposed, 512, "tl:%d tn:%d nt:%d an:%d", tl, tn, nt, an); + check(streqls(my_buf, supposed)); + check(needed == needed2); + // Check that it is truncated properly + supposed[4] = '\0'; + omp_capture_affinity(my_buf, 5, NULL); + check(streqls(my_buf, supposed)); + + needed = omp_capture_affinity(my_buf, 512, second_format); + needed2 = (size_t)snprintf(supposed, 512, "nesting_level:%d thread_num:%d num_threads:%d ancestor_tnum:%d", tl, tn, nt, an); + check(streqls(my_buf, supposed)); + check(needed == needed2); + + // Check that it is truncated properly + supposed[25] = '\0'; + omp_capture_affinity(my_buf, 26, second_format); + check(streqls(my_buf, supposed)); + } + + #pragma omp parallel num_threads(4) + { + omp_display_affinity(NULL); + omp_display_affinity(second_format); + } + + return 0; +} + +// CHECK: num_threads=4 tl:[0-9]+ tn:[0-9]+ nt:[0-9]+ an:[0-9]+ +// CHECK: num_threads=4 nesting_level:[0-9]+ thread_num:[0-9]+ num_threads:[0-9]+ ancestor_tnum:[0-9]+ Index: runtime/test/affinity/format/check.py =================================================================== --- /dev/null +++ runtime/test/affinity/format/check.py @@ -0,0 +1,73 @@ +import os +import sys +import argparse +import re + +class Checks(object): + class CheckError(Exception): + pass + + def __init__(self, filename, prefix): + self.checks = [] + self.lines = [] + self.check_no_output = False + self.filename = filename + self.prefix = prefix + def readStdin(self): + self.lines = [l.rstrip('\r\n') for l in sys.stdin.readlines()] + def readChecks(self): + with open(self.filename) as f: + for line in f: + match = re.search('{}: NO_OUTPUT'.format(self.prefix), line) + if match is not None: + self.check_no_output = True + return + match = re.search('{}: num_threads=([0-9]+) (.*)$'.format(self.prefix), line) + if match is not None: + num_threads = int(match.group(1)) + for i in range(num_threads): + self.checks.append(match.group(2)) + continue + def check(self): + # If no checks at all, then nothing to do + if len(self.checks) == 0 and not self.check_no_output: + print('Nothing to check for') + return + # Check if we are expecting no output + if self.check_no_output: + if len(self.lines) == 0: + return + else: + raise Checks.CheckError('{}: Output was found when expecting none.'.format(self.prefix)) + # Run through each check line and see if it exists in the output + # If it does, then delete the line from output and look for the + # next check line. + # If you don't find the line then raise Checks.CheckError + # If there are extra lines of output then raise Checks.CheckError + for c in self.checks: + found = False + index = -1 + for idx, line in enumerate(self.lines): + if re.search(c, line) is not None: + found = True + index = idx + break + if not found: + raise Checks.CheckError('{}: Did not find: {}'.format(self.prefix, c)) + else: + del self.lines[index] + if len(self.lines) != 0: + raise Checks.CheckError('{}: Extra output: {}'.format(self.prefix, self.lines)) + +# Setup argument parsing +parser = argparse.ArgumentParser(description='''This script checks output of + a program against "CHECK" lines in filename''') +parser.add_argument('filename', default=None, help='filename to check against') +parser.add_argument('-c', '--check-prefix', dest='prefix', + default='CHECK', help='check prefix token default: %(default)s') +command_args = parser.parse_args() +# Do the checking +checks = Checks(command_args.filename, command_args.prefix) +checks.readStdin() +checks.readChecks() +checks.check() Index: runtime/test/affinity/format/fields_modifiers.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/fields_modifiers.c @@ -0,0 +1,106 @@ +// RUN: %libomp-compile-and-run + +#include +#include +#include +#include +#include "helper.h" + +#define BUFFER_SIZE 1024 + +char buf[BUFFER_SIZE]; +#pragma omp threadprivate(buf) + +char* get_string(size_t check_needed) { + size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL); + //printf("buf = %s\n", buf); + check(needed < BUFFER_SIZE); + if (check_needed != 0) { + check(needed == check_needed); + } + return buf; +} + +void check_thread_num_padded_rjustified() { + int i; + const char* formats[2] = {"%0.8{thread_num}", "%0.8n"}; + for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + int j; + int tid = omp_get_thread_num(); + char ctid = '0' + (char)tid; + char* s = get_string(8); + for (j = 0; j < 7; ++j) { + check(s[j] == '0'); + } + check(s[j] == ctid); + } + } +} + +void check_thread_num_rjustified() { + int i; + const char* formats[2] = {"%.12{thread_num}", "%.12n"}; + for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + int j; + int tid = omp_get_thread_num(); + char ctid = '0' + (char)tid; + char* s = get_string(12); + for (j = 0; j < 11; ++j) { + check(s[j] == ' '); + } + check(s[j] == ctid); + } + } +} + +void check_thread_num_ljustified() { + int i; + const char* formats[2] = {"%5{thread_num}", "%5n"}; + for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + int j; + int tid = omp_get_thread_num(); + char ctid = '0' + (char)tid; + char* s = get_string(5); + check(s[0] == ctid); + for (j = 1; j < 5; ++j) { + check(s[j] == ' '); + } + } + } +} + +void check_thread_num_padded_ljustified() { + int i; + const char* formats[2] = {"%018{thread_num}", "%018n"}; + for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + int j; + int tid = omp_get_thread_num(); + char ctid = '0' + (char)tid; + char* s = get_string(18); + check(s[0] == ctid); + for (j = 1; j < 18; ++j) { + check(s[j] == ' '); + } + } + } +} + +int main(int argc, char** argv) { + check_thread_num_ljustified(); + check_thread_num_rjustified(); + check_thread_num_padded_ljustified(); + check_thread_num_padded_rjustified(); + return 0; +} Index: runtime/test/affinity/format/fields_values.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/fields_values.c @@ -0,0 +1,140 @@ +// RUN: %libomp-compile-and-run + +#include +#include +#include +#include +#include "helper.h" + +#if defined(_WIN32) +#include +#define getpid _getpid +typedef int pid_t; +#define gettid GetCurrentThreadId +#define my_gethostname(buf, sz) GetComputerNameA(buf, &(sz)) +#else +#include +#include +#define my_gethostname(buf, sz) gethostname(buf, sz) +#endif + +#define BUFFER_SIZE 1024 + +char buf[BUFFER_SIZE]; +#pragma omp threadprivate(buf) + +int get_integer() { + int n, retval; + size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL); + check(needed < BUFFER_SIZE); + //printf("buf = %s\n", buf); + n = sscanf(buf, "%d", &retval); + check(n == 1); + return retval; +} + +char* get_string() { + int n, retval; + size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL); + printf("buf = %s\n", buf); + check(needed < BUFFER_SIZE); + return buf; +} + +void check_integer(const char* formats[2], int(*func)()) { + int i; + for (i = 0; i < 2; ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + check(get_integer() == func()); + #pragma omp parallel num_threads(3) + { + check(get_integer() == func()); + } + check(get_integer() == func()); + } + } +} + +void check_nesting_level() { + // Check %{nesting_level} and %L + const char* formats[2] = {"%{nesting_level}", "%L"}; + check_integer(formats, omp_get_level); +} + +void check_thread_num() { + // Check %{thread_num} and %n + const char* formats[2] = {"%{thread_num}", "%n"}; + check_integer(formats, omp_get_thread_num); +} + +void check_num_threads() { + // Check %{num_threads} and %N + const char* formats[2] = {"%{num_threads}", "%N"}; + check_integer(formats, omp_get_num_threads); +} + +int ancestor_helper() { + return omp_get_ancestor_thread_num(omp_get_level() - 1); +} +void check_ancestor_tnum() { + // Check %{ancestor_tnum} and %a + const char* formats[2] = {"%{ancestor_tnum}", "%a"}; + check_integer(formats, ancestor_helper); +} + +int my_get_pid() { return (int)getpid(); } +void check_process_id() { + // Check %{process_id} and %P + const char* formats[2] = {"%{process_id}", "%P"}; + check_integer(formats, my_get_pid); +} + +/* +int my_get_tid() { return (int)gettid(); } +void check_native_thread_id() { + // Check %{native_thread_id} and %i + const char* formats[2] = {"%{native_thread_id}", "%i"}; + check_integer(formats, my_get_tid); +} +*/ + +void check_host() { + int i; + int buffer_size = 256; + const char* formats[2] = {"%{host}", "%H"}; + char hostname[256]; + my_gethostname(hostname, buffer_size); + for (i = 0; i < 2; ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + check(streqls(get_string(), hostname)); + } + } +} + +void check_undefined() { + int i; + const char* formats[2] = {"%{foobar}", "%X"}; + for (i = 0; i < 2; ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + check(streqls(get_string(), "undefined")); + } + } +} + +int main(int argc, char** argv) { + omp_set_nested(1); + check_nesting_level(); + check_num_threads(); + check_ancestor_tnum(); + check_process_id(); + //check_native_thread_id(); + check_host(); + check_undefined(); + return 0; +} Index: runtime/test/affinity/format/helper.h =================================================================== --- /dev/null +++ runtime/test/affinity/format/helper.h @@ -0,0 +1,23 @@ +#ifndef HELPER_H +#define HELPER_H + +#include +#include + +#define XSTR(x) #x +#define STR(x) XSTR(x) + +#define streqls(s1, s2) (!strcmp(s1, s2)) + +#define check(condition) \ + if (!(condition)) { \ + fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \ + __LINE__); \ + exit(1); \ + } + +#if defined(_WIN32) +#define snprintf _snprintf +#endif + +#endif Index: runtime/test/affinity/format/increase.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/increase.c @@ -0,0 +1,36 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s + +#include +#include +#include + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N h:%H"); + // should print all for first parallel + omp_set_num_threads(4); + #pragma omp parallel + { } + // should print all because of new threads + omp_set_num_threads(8); + #pragma omp parallel + { } + // should not print anything here + omp_set_num_threads(6); + #pragma omp parallel + { } + // should print all because of new thread + omp_set_num_threads(9); + #pragma omp parallel + { } + // should not print anything here + omp_set_num_threads(2); + #pragma omp parallel + { } + return 0; +} + +// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=6 TESTER: tl:1 tn:[0-5] nt:6 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=9 TESTER: tl:1 tn:[0-8] nt:9 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=2 TESTER: tl:1 tn:[01] nt:2 h:[A-Za-z0-9-.]+ Index: runtime/test/affinity/format/nested.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/nested.c @@ -0,0 +1,23 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES=threads OMP_PROC_BIND=spread,close %libomp-run | python %S/check.py -c 'CHECK' %s +// REQUIRES: affinity + +#include +#include +#include + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N h:%H"); + omp_set_nested(1); + #pragma omp parallel num_threads(4) + { + #pragma omp parallel num_threads(3) + { } + } + return 0; +} + +// CHECK: num_threads=4 TESTER: tl:1 at:0 tn:[0-3] nt:4 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3 h:[A-Za-z0-9-.]+ Index: runtime/test/affinity/format/nested2.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/nested2.c @@ -0,0 +1,29 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES=threads OMP_PROC_BIND=spread,close KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run | python %S/check.py -c 'CHECK' %s + +#include +#include +#include + +// Currently, KMP_HOT_TEAMS_MAX_LEVEL has to be equal to the +// nest depth for intuitive behavior +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N h:%H"); + omp_set_nested(1); + #pragma omp parallel num_threads(4) + { + #pragma omp parallel num_threads(3) + { } + #pragma omp parallel num_threads(3) + { } + } + #pragma omp parallel num_threads(4) + { } + return 0; +} + +// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4 h:[A-Za-z0-9-.]+ Index: runtime/test/affinity/format/nested_mixed.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/nested_mixed.c @@ -0,0 +1,46 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s + +#include +#include +#include + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N h:%H"); + omp_set_nested(1); + #pragma omp parallel num_threads(1) + { + #pragma omp parallel num_threads(2) + { } + #pragma omp parallel num_threads(2) + { + #pragma omp parallel num_threads(1) + { + #pragma omp parallel num_threads(2) + { } + } + } + #pragma omp parallel num_threads(1) + { } + } + #pragma omp parallel num_threads(2) + { } + #pragma omp parallel num_threads(1) + { } + return 0; +} + +// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1 h:[A-Za-z0-9-.]+ + +// CHECK: num_threads=2 TESTER: tl:2 at:[0-9] tn:[01] nt:2 h:[A-Za-z0-9-.]+ + +// CHECK: num_threads=1 TESTER: tl:3 at:[0-9] tn:0 nt:1 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=1 TESTER: tl:3 at:[0-9] tn:0 nt:1 h:[A-Za-z0-9-.]+ + +// CHECK: num_threads=2 TESTER: tl:4 at:[0-9] tn:[01] nt:2 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=2 TESTER: tl:4 at:[0-9] tn:[01] nt:2 h:[A-Za-z0-9-.]+ + +// CHECK: num_threads=1 TESTER: tl:2 at:[0-9] tn:0 nt:1 h:[A-Za-z0-9-.]+ + +// CHECK: num_threads=2 TESTER: tl:1 at:[0-9] tn:[01] nt:2 h:[A-Za-z0-9-.]+ + +// CHECK: num_threads=1 TESTER: tl:1 at:[0-9] tn:0 nt:1 h:[A-Za-z0-9-.]+ Index: runtime/test/affinity/format/nested_serial.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/nested_serial.c @@ -0,0 +1,35 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s + +#include +#include +#include + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N h:%H"); + omp_set_nested(1); + #pragma omp parallel num_threads(1) + { + #pragma omp parallel num_threads(1) + { } + #pragma omp parallel num_threads(1) + { } + #pragma omp parallel num_threads(1) + { + #pragma omp parallel num_threads(1) + { } + } + #pragma omp parallel num_threads(1) + { } + } + #pragma omp parallel num_threads(1) + { } + #pragma omp parallel num_threads(1) + { } + return 0; +} + +// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=1 TESTER: tl:2 at:0 tn:0 nt:1 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=1 TESTER: tl:3 at:0 tn:0 nt:1 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=1 TESTER: tl:2 at:0 tn:0 nt:1 h:[A-Za-z0-9-.]+ +// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1 h:[A-Za-z0-9-.]+ Index: runtime/test/affinity/format/proc_bind.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/proc_bind.c @@ -0,0 +1,31 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES='{0},{0,1},{0},{0,1},{0},{0,1},{0},{0,1},{0},{0,1},{0}' %libomp-run | python %S/check.py -c 'CHECK' %s +// REQUIRES: affinity + +#include +#include +#include + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N h:%H aff:{%A}"); + omp_set_num_threads(8); + // Initial parallel + #pragma omp parallel proc_bind(spread) + { } + #pragma omp parallel proc_bind(spread) + { } + // Affinity changes here + #pragma omp parallel proc_bind(close) + { } + #pragma omp parallel proc_bind(close) + { } + // Affinity changes here + #pragma omp parallel proc_bind(master) + { } + #pragma omp parallel proc_bind(master) + { } + return 0; +} + +// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 h:[A-Za-z0-9-.]+ aff: +// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 h:[A-Za-z0-9-.]+ aff: +// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 h:[A-Za-z0-9-.]+ aff: Index: runtime/test/affinity/format/simple.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/simple.c @@ -0,0 +1,27 @@ +// RUN: %libomp-compile +// RUN: env OMP_DISPLAY_AFFINITY=false %libomp-run | python %S/check.py -c 'NOTHING' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=1 %libomp-run | python %S/check.py -c 'CHECK' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=2 %libomp-run | python %S/check.py -c 'CHECK-2' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=3 %libomp-run | python %S/check.py -c 'CHECK-3' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=4 %libomp-run | python %S/check.py -c 'CHECK-4' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=8 %libomp-run | python %S/check.py -c 'CHECK-8' %s + +#include +#include +#include + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N h:%H"); + #pragma omp parallel + { } + #pragma omp parallel + { } + return 0; +} + +// NOTHING: NO_OUTPUT +// CHECK: num_threads=1 TESTER: tl:1 tn:0 nt:1 h:[A-Za-z0-9-.]+ +// CHECK-2: num_threads=2 TESTER: tl:1 tn:[01] nt:2 h:[A-Za-z0-9-.]+ +// CHECK-3: num_threads=3 TESTER: tl:1 tn:[0-2] nt:3 h:[A-Za-z0-9-.]+ +// CHECK-4: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4 h:[A-Za-z0-9-.]+ +// CHECK-8: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 h:[A-Za-z0-9-.]+ Index: runtime/test/affinity/format/simple_env.c =================================================================== --- /dev/null +++ runtime/test/affinity/format/simple_env.c @@ -0,0 +1,16 @@ +// RUN: %libomp-compile +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_AFFINITY_FORMAT='TESTER-ENV: tl:%L tn:%n nt:%N h:%H' OMP_NUM_THREADS=8 %libomp-run | python %S/check.py -c 'CHECK-8' %s + +#include +#include +#include + +int main(int argc, char** argv) { + #pragma omp parallel + { } + #pragma omp parallel + { } + return 0; +} + +// CHECK-8: num_threads=8 TESTER-ENV: tl:1 tn:[0-7] nt:8 h:[A-Za-z0-9-.]+ Index: runtime/test/lit.cfg =================================================================== --- runtime/test/lit.cfg +++ runtime/test/lit.cfg @@ -94,6 +94,9 @@ if 'Linux' in config.operating_system: config.available_features.add("linux") +if config.operating_system in ['Linux', 'Windows']: + config.available_features.add('affinity') + # to run with icc INTEL_LICENSE_FILE must be set if 'INTEL_LICENSE_FILE' in os.environ: config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE']