diff --git a/openmp/runtime/src/kmp_itt.h b/openmp/runtime/src/kmp_itt.h --- a/openmp/runtime/src/kmp_itt.h +++ b/openmp/runtime/src/kmp_itt.h @@ -276,7 +276,13 @@ extern __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; extern __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; extern __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; -extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; + +struct ITTLocCache { + kmp_int32 team_size; + char const *psource; +}; +extern ITTLocCache __kmp_itt_region_cache[KMP_MAX_FRAME_DOMAINS]; +extern ITTLocCache __kmp_itt_barrier_cache[KMP_MAX_FRAME_DOMAINS]; extern __itt_domain *metadata_domain; extern __itt_string_handle *string_handle_imbl; extern __itt_string_handle *string_handle_loop; diff --git a/openmp/runtime/src/kmp_itt.cpp b/openmp/runtime/src/kmp_itt.cpp --- a/openmp/runtime/src/kmp_itt.cpp +++ b/openmp/runtime/src/kmp_itt.cpp @@ -29,7 +29,8 @@ __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; -kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; +ITTLocCache __kmp_itt_region_cache[KMP_MAX_FRAME_DOMAINS]; +ITTLocCache __kmp_itt_barrier_cache[KMP_MAX_FRAME_DOMAINS]; __itt_domain *metadata_domain = NULL; __itt_string_handle *string_handle_imbl = NULL; __itt_string_handle *string_handle_loop = NULL; diff --git a/openmp/runtime/src/kmp_itt.inl b/openmp/runtime/src/kmp_itt.inl --- a/openmp/runtime/src/kmp_itt.inl +++ b/openmp/runtime/src/kmp_itt.inl @@ -123,6 +123,9 @@ __itt_suppress_push(__itt_suppress_memory_errors); __kmp_itt_region_domains[frm] = __itt_domain_create(buff); + ITTLocCache ®ion_cache = __kmp_itt_region_cache[frm]; + region_cache.team_size = team_size; + region_cache.psource = loc->psource; __itt_suppress_pop(); __kmp_str_free(&buff); @@ -137,7 +140,7 @@ } char *buff = NULL; buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, - str_loc.file, str_loc.col); + str_loc.file, str_loc.line); __itt_suppress_push(__itt_suppress_memory_errors); __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); __itt_suppress_pop(); @@ -153,25 +156,29 @@ // Check if team size was changed. Then create new region domain for this // location unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if ((frm < KMP_MAX_FRAME_DOMAINS) && - (__kmp_itt_region_team_size[frm] != team_size)) { - char *buff = NULL; - kmp_str_loc_t str_loc = - __kmp_str_loc_init(loc->psource, /* init_fname */ false); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, - team_size, str_loc.file, str_loc.line, - str_loc.col); + if (frm < KMP_MAX_FRAME_DOMAINS) { + ITTLocCache ®ion_cache = __kmp_itt_region_cache[frm]; + if ((region_cache.psource != loc->psource) || + (region_cache.team_size != team_size)) { + char *buff = NULL; + kmp_str_loc_t str_loc = + __kmp_str_loc_init(loc->psource, /* init_fname */ false); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, + team_size, str_loc.file, str_loc.line, + str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); - __kmp_str_free(&buff); - __kmp_str_loc_free(&str_loc); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); - } else { // Team size was not changed. Use existing domain. - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + __kmp_str_free(&buff); + __kmp_str_loc_free(&str_loc); + region_cache.psource = loc->psource; + region_cache.team_size = team_size; + __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + } else { // Team size was not changed. Use existing domain. + __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + } } } KMP_ITT_DEBUG_LOCK(); @@ -186,6 +193,9 @@ __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region) { #if USE_ITT_NOTIFY + if (!loc) { + return; + } if (region) { kmp_team_t *team = __kmp_team_from_gtid(gtid); int serialized = (region == 2 ? 1 : 0); @@ -195,9 +205,11 @@ } // Check region domain has not been created before. It's index is saved in // the low two bytes. - if ((loc->reserved_2 & 0x0000FFFF) == 0) { + bool new_region = (loc->reserved_2 & 0x0000FFFF) == 0; + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if (new_region) { if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = + frm = KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value if (frm >= KMP_MAX_FRAME_DOMAINS) { KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count @@ -209,51 +221,36 @@ // barrier indexes to the high two bytes. It is OK because // KMP_MAX_FRAME_DOMAINS = 512. loc->reserved_2 |= (frm + 1); // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$parallel:team_size@[file:][:]" - char *buff = NULL; - kmp_str_loc_t str_loc = - __kmp_str_loc_init(loc->psource, /* init_fname */ false); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, - team_size, str_loc.file, str_loc.line, - str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); - - __kmp_str_free(&buff); - __kmp_str_loc_free(&str_loc); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); + } else { + return; } - } else { // Region domain exists for this location - // Check if team size was changed. Then create new region domain for this - // location - unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if (frm >= KMP_MAX_FRAME_DOMAINS) - return; // something's gone wrong, returning - if (__kmp_itt_region_team_size[frm] != team_size) { - char *buff = NULL; - kmp_str_loc_t str_loc = - __kmp_str_loc_init(loc->psource, /* init_fname */ false); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, - team_size, str_loc.file, str_loc.line, - str_loc.col); + } - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); + ITTLocCache ®ion_cache = __kmp_itt_region_cache[frm]; + + if (new_region || + (region_cache.psource != loc->psource) || + (region_cache.team_size != team_size)) { + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "$omp$parallel:team_size@[file:][:]" + char *buff = NULL; + kmp_str_loc_t str_loc = + __kmp_str_loc_init(loc->psource, /* init_fname */ false); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, + team_size, str_loc.file, str_loc.line, + str_loc.col); - __kmp_str_free(&buff); - __kmp_str_loc_free(&str_loc); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); - } else { // Team size was not changed. Use existing domain. - __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); - } + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); + region_cache.team_size = team_size; + region_cache.psource = loc->psource; + __kmp_str_free(&buff); + __kmp_str_loc_free(&str_loc); + __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); + } else { // Team size was not changed. Use existing domain. + __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); } KMP_ITT_DEBUG_LOCK(); KMP_ITT_DEBUG_PRINT( @@ -261,64 +258,70 @@ gtid, loc->reserved_2, region, loc, begin, end); return; } else { // called for barrier reporting - if (loc) { - if ((loc->reserved_2 & 0xFFFF0000) == 0) { - if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( - &__kmp_barrier_domain_count); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( - &__kmp_barrier_domain_count); // revert the count - return; // loc->reserved_2 is still 0 - } - // Save the barrier frame index to the high two bytes. - loc->reserved_2 |= (frm + 1) << 16; // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$frame@[file:][:]" - kmp_str_loc_t str_loc = - __kmp_str_loc_init(loc->psource, /* init_fname */ false); - if (imbalance) { - char *buff_imb = NULL; - buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", - str_loc.func, team_size, str_loc.file, - str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin, - end); - __kmp_str_free(&buff_imb); - } else { - char *buff = NULL; - buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, - str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin, - end); - __kmp_str_free(&buff); - } - __kmp_str_loc_free(&str_loc); - } - } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS - if (imbalance) { - __itt_frame_submit_v3( - __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL, - begin, end); - } else { - __itt_frame_submit_v3( - __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, - begin, end); + bool new_barrier = (loc->reserved_2 & 0xFFFF0000) == 0; + int frm = (loc->reserved_2 >> 16) - 1; + if (new_barrier) { + if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { + frm = KMP_TEST_THEN_INC32( + &__kmp_barrier_domain_count); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( + &__kmp_barrier_domain_count); // revert the count + return; // loc->reserved_2 is still 0 } + // Save the barrier frame index to the high two bytes. + loc->reserved_2 |= (frm + 1) << 16; // save "new" value + } else { + return; } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( - "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid, - loc->reserved_2, loc, begin, end); } + + ITTLocCache &barrier_cache = __kmp_itt_barrier_cache[frm]; + + if (new_barrier || + (barrier_cache.psource != loc->psource) || + (!imbalance && (barrier_cache.team_size != team_size))) { + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "$omp$frame@[file:][:]" + kmp_str_loc_t str_loc = + __kmp_str_loc_init(loc->psource, /* init_fname */ false); + char *buff = NULL; + if (imbalance) { + buff = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", + str_loc.func, team_size, str_loc.file, + str_loc.line); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); + __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin, + end); + } else { + buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, + str_loc.file, str_loc.line); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); + __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin, + end); + } + barrier_cache.team_size = team_size; + barrier_cache.psource = loc->psource; + __kmp_str_free(&buff); + __kmp_str_loc_free(&str_loc); + } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS + if (imbalance) { + __itt_frame_submit_v3( + __kmp_itt_imbalance_domains[frm], NULL, begin, end); + } else { + __itt_frame_submit_v3( + __kmp_itt_barrier_domains[frm], NULL, begin, end); + } + } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( + "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid, + loc->reserved_2, loc, begin, end); } #endif } // __kmp_itt_frame_submit