diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -849,6 +849,7 @@ } kmp_nested_proc_bind_t; extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; +extern kmp_proc_bind_t __kmp_teams_proc_bind; extern int __kmp_display_affinity; extern char *__kmp_affinity_format; diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -280,6 +280,7 @@ #endif /* KMP_AFFINITY_SUPPORTED */ kmp_nested_proc_bind_t __kmp_nested_proc_bind = {NULL, 0, 0}; +kmp_proc_bind_t __kmp_teams_proc_bind = proc_bind_spread; int __kmp_affinity_num_places = 0; int __kmp_display_affinity = FALSE; char *__kmp_affinity_format = NULL; diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -914,7 +914,8 @@ assured that there are enough threads available, because we checked on that earlier within critical section forkjoin */ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, - kmp_info_t *master_th, int master_gtid) { + kmp_info_t *master_th, int master_gtid, + int fork_teams_workers) { int i; int use_hot_team; @@ -1003,7 +1004,12 @@ } #if KMP_AFFINITY_SUPPORTED - __kmp_partition_places(team); + // Do not partition the places list for teams construct workers who + // haven't actually been forked to do real work yet. This partitioning + // will take place in the parallel region nested within the teams construct. + if (!fork_teams_workers) { + __kmp_partition_places(team); + } #endif } @@ -1597,6 +1603,41 @@ } #endif + // Figure out the proc_bind policy for the nested parallel within teams + kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; + // proc_bind_default means don't update + kmp_proc_bind_t proc_bind_icv = proc_bind_default; + if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { + proc_bind = proc_bind_false; + } else { + // No proc_bind clause specified; use current proc-bind-var + if (proc_bind == proc_bind_default) { + proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; + } + /* else: The proc_bind policy was specified explicitly on parallel + clause. + This overrides proc-bind-var for this parallel region, but does not + change proc-bind-var. */ + // Figure the value of proc-bind-var for the child threads. + if ((level + 1 < __kmp_nested_proc_bind.used) && + (__kmp_nested_proc_bind.bind_types[level + 1] != + master_th->th.th_current_task->td_icvs.proc_bind)) { + proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; + } + } + KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind); + // Need to change the bind-var ICV to correct value for each implicit task + if (proc_bind_icv != proc_bind_default && + master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) { + kmp_info_t **other_threads = parent_team->t.t_threads; + for (i = 0; i < master_th->th.th_team_nproc; ++i) { + other_threads[i]->th.th_current_task->td_icvs.proc_bind = + proc_bind_icv; + } + } + // Reset for next parallel region + master_th->th.th_set_proc_bind = proc_bind_default; + #if USE_ITT_BUILD && USE_ITT_NOTIFY if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) || KMP_ITT_DEBUG) && @@ -1613,6 +1654,9 @@ parent_team->t.t_stack_id = __kmp_itt_stack_caller_create(); } #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ +#if KMP_AFFINITY_SUPPORTED + __kmp_partition_places(parent_team); +#endif KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, " "master_th=%p, gtid=%d\n", @@ -1953,16 +1997,21 @@ // Figure out the proc_bind_policy for the new team. kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; - kmp_proc_bind_t proc_bind_icv = - proc_bind_default; // proc_bind_default means don't update + // proc_bind_default means don't update + kmp_proc_bind_t proc_bind_icv = proc_bind_default; if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { proc_bind = proc_bind_false; } else { + // No proc_bind clause specified; use current proc-bind-var for this + // parallel region if (proc_bind == proc_bind_default) { - // No proc_bind clause specified; use current proc-bind-var for this - // parallel region proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; } + // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND + if (master_th->th.th_teams_microtask && + microtask == (microtask_t)__kmp_teams_master) { + proc_bind = __kmp_teams_proc_bind; + } /* else: The proc_bind policy was specified explicitly on parallel clause. This overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */ @@ -1970,7 +2019,11 @@ if ((level + 1 < __kmp_nested_proc_bind.used) && (__kmp_nested_proc_bind.bind_types[level + 1] != master_th->th.th_current_task->td_icvs.proc_bind)) { - proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; + // Do not modify the proc bind icv for the two teams construct forks + // They just let the proc bind icv pass through + if (!master_th->th.th_teams_microtask || + !(microtask == (microtask_t)__kmp_teams_master || ap == NULL)) + proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; } } @@ -2142,7 +2195,7 @@ if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong root->r.r_active = TRUE; - __kmp_fork_team_threads(root, team, master_th, gtid); + __kmp_fork_team_threads(root, team, master_th, gtid, !ap); __kmp_setup_icv_copy(team, nthreads, &master_th->th.th_current_task->td_icvs, loc); @@ -2411,6 +2464,14 @@ } // active_level == 1 #endif /* USE_ITT_BUILD */ +#if KMP_AFFINITY_SUPPORTED + if (!exit_teams) { + // Restore master thread's partition. + master_th->th.th_first_place = team->t.t_first_place; + master_th->th.th_last_place = team->t.t_last_place; + } +#endif // KMP_AFFINITY_SUPPORTED + if (master_th->th.th_teams_microtask && !exit_teams && team->t.t_pkfn != (microtask_t)__kmp_teams_master && team->t.t_level == master_th->th.th_teams_level + 1) { @@ -2518,11 +2579,6 @@ master_th, team)); __kmp_pop_current_task_from_thread(master_th); -#if KMP_AFFINITY_SUPPORTED - // Restore master thread's partition. - master_th->th.th_first_place = team->t.t_first_place; - master_th->th.th_last_place = team->t.t_last_place; -#endif // KMP_AFFINITY_SUPPORTED master_th->th.th_def_allocator = team->t.t_def_allocator; #if OMPD_SUPPORT @@ -5016,6 +5072,7 @@ kmp_team_t *team; int use_hot_team = !root->r.r_active; int level = 0; + int do_place_partition = 1; KA_TRACE(20, ("__kmp_allocate_team: called\n")); KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); @@ -5037,6 +5094,12 @@ ++level; // not increment if #teams==1, or for outer fork of the teams; // increment otherwise } + // Do not perform the place partition if inner fork of the teams + // Wait until nested parallel region encountered inside teams construct + if ((master->th.th_teams_size.nteams == 1 && + master->th.th_teams_level >= team->t.t_level) || + (team->t.t_pkfn == (microtask_t)__kmp_teams_master)) + do_place_partition = 0; } hot_teams = master->th.th_hot_teams; if (level < __kmp_hot_teams_max_level && hot_teams && @@ -5074,6 +5137,10 @@ __kmp_resize_dist_barrier(team, old_nthr, new_nproc); } + // If not doing the place partition, then reset the team's proc bind + // to indicate that partitioning of all threads still needs to take place + if (do_place_partition == 0) + team->t.t_proc_bind = proc_bind_default; // Has the number of threads changed? /* Let's assume the most common case is that the number of threads is unchanged, and put that case first. */ @@ -5103,16 +5170,20 @@ if ((team->t.t_size_changed == 0) && (team->t.t_proc_bind == new_proc_bind)) { if (new_proc_bind == proc_bind_spread) { - __kmp_partition_places( - team, 1); // add flag to update only master for spread + if (do_place_partition) { + // add flag to update only master for spread + __kmp_partition_places(team, 1); + } } KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " "proc_bind = %d, partition = [%d,%d]\n", team->t.t_id, new_proc_bind, team->t.t_first_place, team->t.t_last_place)); } else { - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); - __kmp_partition_places(team); + if (do_place_partition) { + KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); + __kmp_partition_places(team); + } } #else KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); @@ -5189,10 +5260,12 @@ } #endif - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); + if (do_place_partition) { + KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); #if KMP_AFFINITY_SUPPORTED - __kmp_partition_places(team); + __kmp_partition_places(team); #endif + } } else { // team->t.t_nproc < new_nproc #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED kmp_affin_mask_t *old_mask; @@ -5328,10 +5401,12 @@ } #endif - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); + if (do_place_partition) { + KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); #if KMP_AFFINITY_SUPPORTED - __kmp_partition_places(team); + __kmp_partition_places(team); #endif + } } // Check changes in number of threads kmp_info_t *master = team->t.t_threads[0]; diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -3207,6 +3207,47 @@ } } // __kmp_stg_print_topology_method +// KMP_TEAMS_PROC_BIND +struct kmp_proc_bind_info_t { + const char *name; + kmp_proc_bind_t proc_bind; +}; +static kmp_proc_bind_info_t proc_bind_table[] = { + {"spread", proc_bind_spread}, + {"true", proc_bind_spread}, + {"close", proc_bind_close}, + // teams-bind = false means "replicate the primary thread's affinity" + {"false", proc_bind_primary}, + {"primary", proc_bind_primary}}; +static void __kmp_stg_parse_teams_proc_bind(char const *name, char const *value, + void *data) { + int valid; + const char *end; + valid = 0; + for (size_t i = 0; i < sizeof(proc_bind_table) / sizeof(proc_bind_table[0]); + ++i) { + if (__kmp_match_str(proc_bind_table[i].name, value, &end)) { + __kmp_teams_proc_bind = proc_bind_table[i].proc_bind; + valid = 1; + break; + } + } + if (!valid) { + KMP_WARNING(StgInvalidValue, name, value); + } +} +static void __kmp_stg_print_teams_proc_bind(kmp_str_buf_t *buffer, + char const *name, void *data) { + const char *value = KMP_I18N_STR(NotDefined); + for (size_t i = 0; i < sizeof(proc_bind_table) / sizeof(proc_bind_table[0]); + ++i) { + if (__kmp_teams_proc_bind == proc_bind_table[i].proc_bind) { + value = proc_bind_table[i].name; + break; + } + } + __kmp_stg_print_str(buffer, name, value); +} #endif /* KMP_AFFINITY_SUPPORTED */ // OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X* @@ -5312,6 +5353,8 @@ #endif /* KMP_GOMP_COMPAT */ {"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, NULL, 0, 0}, + {"KMP_TEAMS_PROC_BIND", __kmp_stg_parse_teams_proc_bind, + __kmp_stg_print_teams_proc_bind, NULL, 0, 0}, {"OMP_PLACES", __kmp_stg_parse_places, __kmp_stg_print_places, NULL, 0, 0}, {"KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method, __kmp_stg_print_topology_method, NULL, 0, 0}, diff --git a/openmp/runtime/test/affinity/libomp_test_topology.h b/openmp/runtime/test/affinity/libomp_test_topology.h --- a/openmp/runtime/test/affinity/libomp_test_topology.h +++ b/openmp/runtime/test/affinity/libomp_test_topology.h @@ -8,6 +8,7 @@ #include #include #include +#include typedef enum topology_obj_type_t { TOPOLOGY_OBJ_THREAD, @@ -18,6 +19,8 @@ typedef struct place_list_t { int num_places; + int current_place; + int *place_nums; affinity_mask_t **masks; } place_list_t; @@ -147,6 +150,7 @@ static place_list_t *topology_alloc_type_places(topology_obj_type_t type) { char buf[1024]; int i, cpu, num_places, num_unique; + int *place_nums; int num_cpus = topology_get_num_cpus(); place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t)); affinity_mask_t **masks = @@ -184,8 +188,13 @@ if (mask) masks[num_unique++] = mask; } + place_nums = (int *)malloc(sizeof(int) * num_unique); + for (i = 0; i < num_unique; ++i) + place_nums[i] = i; places->num_places = num_unique; places->masks = masks; + places->place_nums = place_nums; + places->current_place = -1; return places; } @@ -195,6 +204,7 @@ place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t)); affinity_mask_t **masks = (affinity_mask_t **)malloc(sizeof(affinity_mask_t *) * num_places); + int *place_nums = (int *)malloc(sizeof(int) * num_places); for (place = 0; place < num_places; ++place) { int num_procs = omp_get_place_num_procs(place); int *ids = (int *)malloc(sizeof(int) * num_procs); @@ -203,9 +213,45 @@ for (i = 0; i < num_procs; ++i) affinity_mask_set(mask, ids[i]); masks[place] = mask; + place_nums[place] = place; } places->num_places = num_places; + places->place_nums = place_nums; places->masks = masks; + places->current_place = omp_get_place_num(); + return places; +} + +static place_list_t *topology_alloc_openmp_partition() { + int p, i; + int num_places = omp_get_partition_num_places(); + place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t)); + int *place_nums = (int *)malloc(sizeof(int) * num_places); + affinity_mask_t **masks = + (affinity_mask_t **)malloc(sizeof(affinity_mask_t *) * num_places); + omp_get_partition_place_nums(place_nums); + for (p = 0; p < num_places; ++p) { + int place = place_nums[p]; + int num_procs = omp_get_place_num_procs(place); + int *ids = (int *)malloc(sizeof(int) * num_procs); + if (num_procs == 0) { + fprintf(stderr, "place %d has 0 procs?\n", place); + exit(EXIT_FAILURE); + } + omp_get_place_proc_ids(place, ids); + affinity_mask_t *mask = affinity_mask_alloc(); + for (i = 0; i < num_procs; ++i) + affinity_mask_set(mask, ids[i]); + if (affinity_mask_count(mask) == 0) { + fprintf(stderr, "place %d has 0 procs set?\n", place); + exit(EXIT_FAILURE); + } + masks[p] = mask; + } + places->num_places = num_places; + places->place_nums = place_nums; + places->masks = masks; + places->current_place = omp_get_place_num(); return places; } @@ -216,6 +262,7 @@ for (i = 0; i < places->num_places; ++i) affinity_mask_free(places->masks[i]); free(places->masks); + free(places->place_nums); free(places); } @@ -224,8 +271,306 @@ char buf[1024]; for (i = 0; i < p->num_places; ++i) { affinity_mask_snprintf(buf, sizeof(buf), p->masks[i]); - printf("Place %d: %s\n", i, buf); + printf("Place %d: %s\n", p->place_nums[i], buf); + } +} + +// Print out an error message, possibly with two problem place lists, +// and then exit with failure +static void proc_bind_die(omp_proc_bind_t proc_bind, int T, int P, + const char *format, ...) { + va_list args; + va_start(args, format); + const char *pb; + switch (proc_bind) { + case omp_proc_bind_false: + pb = "False"; + break; + case omp_proc_bind_true: + pb = "True"; + break; + case omp_proc_bind_master: + pb = "Master (Primary)"; + break; + case omp_proc_bind_close: + pb = "Close"; + break; + case omp_proc_bind_spread: + pb = "Spread"; + break; + default: + pb = "(Unknown Proc Bind Type)"; + break; + } + if (proc_bind == omp_proc_bind_spread || proc_bind == omp_proc_bind_close) { + if (T <= P) { + fprintf(stderr, "%s : (T(%d) <= P(%d)) : ", pb, T, P); + } else { + fprintf(stderr, "%s : (T(%d) > P(%d)) : ", pb, T, P); + } + } else { + fprintf(stderr, "%s : T = %d, P = %d : ", pb, T, P); + } + vfprintf(stderr, format, args); + va_end(args); + + exit(EXIT_FAILURE); +} + +// Return 1 on failure, 0 on success. +static void proc_bind_check(omp_proc_bind_t proc_bind, + const place_list_t *parent, place_list_t **children, + int nchildren) { + place_list_t *partition; + int T, i, j, place, low, high, first, last, count, current_place, num_places; + const int *place_nums; + int P = parent->num_places; + + // Find the correct T (there could be null entries in children) + place_list_t **partitions = + (place_list_t **)malloc(sizeof(place_list_t *) * nchildren); + T = 0; + for (i = 0; i < nchildren; ++i) + if (children[i]) + partitions[T++] = children[i]; + // Only able to check spread, close, master (primary) + if (proc_bind != omp_proc_bind_spread && proc_bind != omp_proc_bind_close && + proc_bind != omp_proc_bind_master) + proc_bind_die(proc_bind, T, P, NULL, NULL, + "Cannot check this proc bind type\n"); + + if (proc_bind == omp_proc_bind_spread) { + if (T <= P) { + // Run through each subpartition + for (i = 0; i < T; ++i) { + partition = partitions[i]; + place_nums = partition->place_nums; + num_places = partition->num_places; + current_place = partition->current_place; + // Correct count? + low = P / T; + high = P / T + (P % T ? 1 : 0); + if (num_places != low && num_places != high) { + proc_bind_die(proc_bind, T, P, + "Incorrect number of places for thread %d: %d. " + "Expecting between %d and %d\n", + i, num_places, low, high); + } + // Consecutive places? + for (j = 1; j < num_places; ++j) { + if (place_nums[j] != (place_nums[j - 1] + 1) % P) { + proc_bind_die(proc_bind, T, P, + "Not consecutive places: %d, %d in partition\n", + place_nums[j - 1], place_nums[j]); + } + } + first = place_nums[0]; + last = place_nums[num_places - 1]; + // Primary thread executes on place of the parent thread? + if (i == 0) { + if (current_place != parent->current_place) { + proc_bind_die( + proc_bind, T, P, + "Primary thread not on same place (%d) as parent thread (%d)\n", + current_place, parent->current_place); + } + } else { + // Thread's current place is first place within it's partition? + if (current_place != first) { + proc_bind_die(proc_bind, T, P, + "Thread's current place (%d) is not the first place " + "in its partition [%d, %d]\n", + current_place, first, last); + } + } + // Partitions don't have intersections? + int f1 = first; + int l1 = last; + for (j = 0; j < i; ++j) { + int f2 = partitions[j]->place_nums[0]; + int l2 = partitions[j]->place_nums[partitions[j]->num_places - 1]; + if (f1 > l1 && f2 > l2) { + proc_bind_die(proc_bind, T, P, + "partitions intersect. [%d, %d] and [%d, %d]\n", f1, + l1, f2, l2); + } + if (f1 > l1 && f2 <= l2) + if (f1 < l2 || l1 > f2) { + proc_bind_die(proc_bind, T, P, + "partitions intersect. [%d, %d] and [%d, %d]\n", f1, + l1, f2, l2); + } + if (f1 <= l1 && f2 > l2) + if (f2 < l1 || l2 > f1) { + proc_bind_die(proc_bind, T, P, + "partitions intersect. [%d, %d] and [%d, %d]\n", f1, + l1, f2, l2); + } + if (f1 <= l1 && f2 <= l2) + if (!(f2 > l1 || l2 < f1)) { + proc_bind_die(proc_bind, T, P, + "partitions intersect. [%d, %d] and [%d, %d]\n", f1, + l1, f2, l2); + } + } + } + } else { + // T > P + // Each partition has only one place? + for (i = 0; i < T; ++i) { + if (partitions[i]->num_places != 1) { + proc_bind_die( + proc_bind, T, P, + "Incorrect number of places for thread %d: %d. Expecting 1\n", i, + partitions[i]->num_places); + } + } + // Correct number of consecutive threads per partition? + low = T / P; + high = T / P + (T % P ? 1 : 0); + for (i = 1, count = 1; i < T; ++i) { + if (partitions[i]->place_nums[0] == partitions[i - 1]->place_nums[0]) { + count++; + if (count > high) { + proc_bind_die( + proc_bind, T, P, + "Too many threads have place %d for their partition\n", + partitions[i]->place_nums[0]); + } + } else { + if (count < low) { + proc_bind_die( + proc_bind, T, P, + "Not enough threads have place %d for their partition\n", + partitions[i]->place_nums[0]); + } + count = 1; + } + } + // Primary thread executes on place of the parent thread? + current_place = partitions[0]->place_nums[0]; + if (parent->current_place != -1 && + current_place != parent->current_place) { + proc_bind_die( + proc_bind, T, P, + "Primary thread not on same place (%d) as parent thread (%d)\n", + current_place, parent->current_place); + } + } + } else if (proc_bind == omp_proc_bind_close || + proc_bind == omp_proc_bind_master) { + // Check that each subpartition is the same as the parent + for (i = 0; i < T; ++i) { + partition = partitions[i]; + place_nums = partition->place_nums; + num_places = partition->num_places; + current_place = partition->current_place; + if (parent->num_places != num_places) { + proc_bind_die(proc_bind, T, P, + "Number of places in subpartition (%d) does not match " + "parent (%d)\n", + num_places, parent->num_places); + } + for (j = 0; j < num_places; ++j) { + if (parent->place_nums[j] != place_nums[j]) { + proc_bind_die(proc_bind, T, P, + "Subpartition place (%d) does not match " + "parent partition place (%d)\n", + place_nums[j], parent->place_nums[j]); + } + } + } + // Find index into place_nums of current place for parent + for (j = 0; j < parent->num_places; ++j) + if (parent->place_nums[j] == parent->current_place) + break; + if (proc_bind == omp_proc_bind_close) { + if (T <= P) { + // close T <= P + // check place assignment for each thread + for (i = 0; i < T; ++i) { + partition = partitions[i]; + current_place = partition->current_place; + if (current_place != parent->place_nums[j]) { + proc_bind_die( + proc_bind, T, P, + "Thread %d's current place (%d) is incorrect. expected %d\n", i, + current_place, parent->place_nums[j]); + } + j = (j + 1) % parent->num_places; + } + } else { + // close T > P + // check place assignment for each thread + low = T / P; + high = T / P + (T % P ? 1 : 0); + count = 1; + if (partitions[0]->current_place != parent->current_place) { + proc_bind_die( + proc_bind, T, P, + "Primary thread's place (%d) is not parent thread's place (%d)\n", + partitions[0]->current_place, parent->current_place); + } + for (i = 1; i < T; ++i) { + current_place = partitions[i]->current_place; + if (current_place == parent->place_nums[j]) { + count++; + if (count > high) { + proc_bind_die( + proc_bind, T, P, + "Too many threads have place %d for their current place\n", + current_place); + } + } else { + if (count < low) { + proc_bind_die( + proc_bind, T, P, + "Not enough threads have place %d for their current place\n", + parent->place_nums[j]); + } + j = (j + 1) % parent->num_places; + if (current_place != parent->place_nums[j]) { + proc_bind_die( + proc_bind, T, P, + "Thread %d's place (%d) is not corret. Expected %d\n", i, + partitions[i]->current_place, parent->place_nums[j]); + } + count = 1; + } + } + } + } else { + // proc_bind_primary + // Every thread should be assigned to the primary thread's place + for (i = 0; i < T; ++i) { + if (partitions[i]->current_place != parent->current_place) { + proc_bind_die( + proc_bind, T, P, + "Thread %d's place (%d) is not the primary thread's place (%d)\n", + i, partitions[i]->current_place, parent->current_place); + } + } + } } + + // Check that each partition's current place is within the partition + for (i = 0; i < T; ++i) { + current_place = partitions[i]->current_place; + num_places = partitions[i]->num_places; + first = partitions[i]->place_nums[0]; + last = partitions[i]->place_nums[num_places - 1]; + for (j = 0; j < num_places; ++j) + if (partitions[i]->place_nums[j] == current_place) + break; + if (j == num_places) { + proc_bind_die(proc_bind, T, P, + "Thread %d's current place (%d) is not within its " + "partition [%d, %d]\n", + i, current_place, first, last); + } + } + + free(partitions); } #endif diff --git a/openmp/runtime/test/affinity/teams-affinity.c b/openmp/runtime/test/affinity/teams-affinity.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/affinity/teams-affinity.c @@ -0,0 +1,119 @@ +// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 %libomp-run +// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run +// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_TEAMS_PROC_BIND=close %libomp-run +// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_TEAMS_PROC_BIND=close KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run +// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_TEAMS_PROC_BIND=primary %libomp-run +// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_TEAMS_PROC_BIND=primary KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run +// REQUIRES: linux +// UNSUPPORTED: clang-5, clang-6, clang-7, clang-8, clang-9, clang-10 +// UNSUPPORTED: gcc-5, gcc-6, gcc-7, gcc-8 +// UNSUPPORTED: icc +// +// KMP_TEAMS_THREAD_LIMIT limits the number of total teams +// OMP_TEAMS_THREAD_LIMIT limits the number of threads per team + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include "libomp_test_affinity.h" +#include "libomp_test_topology.h" + +#define _STR(X) #X +#define STR(X) _STR(X) + +#ifndef MAX_NTEAMS +#define MAX_NTEAMS 256 +#endif + +static void set_default_max_nteams() { + // Do not overwrite if already in environment + setenv("KMP_TEAMS_THREAD_LIMIT", STR(MAX_NTEAMS), 0); +} + +static int get_max_nteams() { + int max_nteams; + const char *value = getenv("KMP_TEAMS_THREAD_LIMIT"); + if (!value) { + fprintf(stderr, "KMP_TEAMS_THREAD_LIMIT must be set!\n"); + exit(EXIT_FAILURE); + } + max_nteams = atoi(value); + if (max_nteams <= 0) + max_nteams = 1; + if (max_nteams > MAX_NTEAMS) + max_nteams = MAX_NTEAMS; + return max_nteams; +} + +// Return the value in KMP_TEAMS_PROC_BIND +static omp_proc_bind_t get_teams_proc_bind() { + // defaults to spread + omp_proc_bind_t proc_bind = omp_proc_bind_spread; + const char *value = getenv("KMP_TEAMS_PROC_BIND"); + if (value) { + if (strcmp(value, "spread") == 0) { + proc_bind = omp_proc_bind_spread; + } else if (strcmp(value, "close") == 0) { + proc_bind = omp_proc_bind_close; + } else if (strcmp(value, "primary") == 0 || strcmp(value, "master") == 0) { + proc_bind = omp_proc_bind_master; + } else { + fprintf(stderr, + "KMP_TEAMS_PROC_BIND should be one of spread, close, primary"); + exit(EXIT_FAILURE); + } + } + return proc_bind; +} + +int main(int argc, char **argv) { + int i, nteams, max_nteams, factor; + place_list_t **teams_places; + place_list_t *place_list; + omp_proc_bind_t teams_proc_bind; + + // Set a default for the max number of teams if it is not already set + set_default_max_nteams(); + place_list = topology_alloc_openmp_places(); + max_nteams = get_max_nteams(); + // Further limit the number of teams twice the number of OMP_PLACES + if (max_nteams > 2 * place_list->num_places) + max_nteams = 2 * place_list->num_places; + teams_places = (place_list_t **)malloc(sizeof(place_list_t *) * max_nteams); + for (i = 0; i < max_nteams; ++i) + teams_places[i] = NULL; + teams_proc_bind = get_teams_proc_bind(); + + // factor inversely controls the number of test cases. + // the larger the factor, the more test cases will be performed. + if (teams_proc_bind == omp_proc_bind_master) { + factor = 2; + } else { + factor = 8; + } + + for (nteams = 1; nteams <= max_nteams; + nteams = nteams * factor / (factor - 1) + 1) { + // Check the same value twice to make sure hot teams are ok + int j; + for (j = 0; j < 2; ++j) { + // Gather the proc bind partitions from each team + #pragma omp teams num_teams(nteams) + teams_places[omp_get_team_num()] = topology_alloc_openmp_partition(); + + // Check all the partitions with the parent partition + proc_bind_check(teams_proc_bind, place_list, teams_places, nteams); + + // Free the proc bind partitions + for (i = 0; i < nteams; ++i) + topology_free_places(teams_places[i]); + } + } + + free(teams_places); + topology_free_places(place_list); + return EXIT_SUCCESS; +}