Index: openmp/trunk/runtime/src/i18n/en_US.txt =================================================================== --- openmp/trunk/runtime/src/i18n/en_US.txt +++ openmp/trunk/runtime/src/i18n/en_US.txt @@ -425,7 +425,7 @@ AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested." HierSchedInvalid "Hierarchy ignored: unsupported level: %1$s." AffFormatDefault "OMP: pid %1$s tid %2$s thread %3$s bound to OS proc set {%4$s}" - +APIDeprecated "%1$s routine deprecated, please use %2$s instead." # -------------------------------------------------------------------------------------------------- -*- HINTS -*- @@ -489,4 +489,3 @@ # -------------------------------------------------------------------------------------------------- # end of file # # -------------------------------------------------------------------------------------------------- - Index: openmp/trunk/runtime/src/kmp.h =================================================================== --- openmp/trunk/runtime/src/kmp.h +++ openmp/trunk/runtime/src/kmp.h @@ -1847,7 +1847,6 @@ typedef struct kmp_internal_control { int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */ - kmp_int8 nested; /* internal control for nested parallelism (per thread) */ kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */ kmp_int8 @@ -2054,8 +2053,6 @@ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) #endif -#define get__nested_2(xteam, xtid) \ - ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested) #define get__dynamic_2(xteam, xtid) \ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) #define get__nproc_2(xteam, xtid) \ @@ -2076,11 +2073,6 @@ #define set__bt_set_team(xteam, xtid, xval) \ (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval)) -#define set__nested(xthread, xval) \ - (((xthread)->th.th_current_task->td_icvs.nested) = (xval)) -#define get__nested(xthread) \ - (((xthread)->th.th_current_task->td_icvs.nested) ? (FTN_TRUE) : (FTN_FALSE)) - #define set__dynamic(xthread, xval) \ (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval)) #define get__dynamic(xthread) \ @@ -2095,6 +2087,9 @@ #define set__max_active_levels(xthread, xval) \ (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval)) +#define get__max_active_levels(xthread) \ + ((xthread)->th.th_current_task->td_icvs.max_active_levels) + #define set__sched(xthread, xval) \ (((xthread)->th.th_current_task->td_icvs.sched) = (xval)) @@ -2821,8 +2816,6 @@ // TODO: GEH - then replace r_active with t_active_levels if we can to reduce // the synch overhead or keeping r_active volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ - // GEH: This is misnamed, should be r_in_parallel - volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely. // keeps a count of active parallel regions per root std::atomic r_in_parallel; // GEH: This is misnamed, should be r_active_levels @@ -3013,8 +3006,6 @@ used (fixed) */ extern int __kmp_tp_cached; /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */ -extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la - OMP_NESTED */ extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */ #if KMP_USE_MONITOR @@ -3056,9 +3047,12 @@ extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested - parallelism enabled by default via - OMP_MAX_ACTIVE_LEVELS */ +// max_active_levels for nested parallelism enabled by default via +// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND +extern int __kmp_dflt_max_active_levels; +// Indicates whether value of __kmp_dflt_max_active_levels was already +// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false +extern bool __kmp_dflt_max_active_levels_set; extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in concurrent execution per team */ #if KMP_NESTED_HOT_TEAMS Index: openmp/trunk/runtime/src/kmp_csupport.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_csupport.cpp +++ openmp/trunk/runtime/src/kmp_csupport.cpp @@ -1872,7 +1872,7 @@ __kmp_save_internal_controls(thread); - set__nested(thread, flag ? TRUE : FALSE); + set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1); } void ompc_set_max_active_levels(int max_active_levels) { Index: openmp/trunk/runtime/src/kmp_ftn_entry.h =================================================================== --- openmp/trunk/runtime/src/kmp_ftn_entry.h +++ openmp/trunk/runtime/src/kmp_ftn_entry.h @@ -595,6 +595,7 @@ } void FTN_STDCALL KMP_EXPAND_NAME(FTN_SET_NESTED)(int KMP_DEREF flag) { + KMP_INFORM(APIDeprecated, "omp_set_nested", "omp_set_max_active_levels"); #ifdef KMP_STUB __kmps_set_nested(KMP_DEREF flag); #else @@ -602,17 +603,22 @@ /* For the thread-private internal controls implementation */ thread = __kmp_entry_thread(); __kmp_save_internal_controls(thread); - set__nested(thread, ((KMP_DEREF flag) ? TRUE : FALSE)); + // Somewhat arbitrarily decide where to get a value for max_active_levels + int max_active_levels = get__max_active_levels(thread); + if (max_active_levels == 1) + max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; + set__max_active_levels(thread, (KMP_DEREF flag) ? max_active_levels : 1); #endif } int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NESTED)(void) { + KMP_INFORM(APIDeprecated, "omp_get_nested", "omp_get_max_active_levels"); #ifdef KMP_STUB return __kmps_get_nested(); #else kmp_info_t *thread; thread = __kmp_entry_thread(); - return get__nested(thread); + return get__max_active_levels(thread) > 1; #endif } Index: openmp/trunk/runtime/src/kmp_global.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_global.cpp +++ openmp/trunk/runtime/src/kmp_global.cpp @@ -127,10 +127,9 @@ int __kmp_dflt_team_nth_ub = 0; int __kmp_tp_capacity = 0; int __kmp_tp_cached = 0; -int __kmp_dflt_nested = FALSE; int __kmp_dispatch_num_buffers = KMP_DFLT_DISP_NUM_BUFF; -int __kmp_dflt_max_active_levels = - KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */ +int __kmp_dflt_max_active_levels = 1; // Nesting off by default +bool __kmp_dflt_max_active_levels_set = false; // Don't override set value #if KMP_NESTED_HOT_TEAMS int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */ /* 1 - keep extra threads when reduced */ Index: openmp/trunk/runtime/src/kmp_runtime.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_runtime.cpp +++ openmp/trunk/runtime/src/kmp_runtime.cpp @@ -1719,11 +1719,11 @@ // parallel out of teams construct). This code moved here from // __kmp_reserve_threads() to speedup nested serialized parallels. if (nthreads > 1) { - if ((!get__nested(master_th) && (root->r.r_in_parallel + if ((get__max_active_levels(master_th) == 1 && (root->r.r_in_parallel #if OMP_40_ENABLED - && !enter_teams + && !enter_teams #endif /* OMP_40_ENABLED */ - )) || + )) || (__kmp_library == library_serial)) { KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d" " threads\n", @@ -3150,8 +3150,6 @@ kmp_internal_control_t g_icvs = { 0, // int serial_nesting_level; //corresponds to value of th_team_serialized - (kmp_int8)__kmp_dflt_nested, // int nested; //internal control - // for nested parallelism (per thread) (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic // adjustment of threads (per thread) (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for @@ -3207,7 +3205,6 @@ root->r.r_active = FALSE; root->r.r_in_parallel = 0; root->r.r_blocktime = __kmp_dflt_blocktime; - root->r.r_nested = __kmp_dflt_nested; /* setup the root team for this task */ /* allocate the root team structure */ @@ -3432,7 +3429,6 @@ __kmp_print_structure_thread(" Uber Thread: ", root->r.r_uber_thread); __kmp_printf(" Active?: %2d\n", root->r.r_active); - __kmp_printf(" Nested?: %2d\n", root->r.r_nested); __kmp_printf(" In Parallel: %2d\n", KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel)); __kmp_printf("\n"); Index: openmp/trunk/runtime/src/kmp_settings.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_settings.cpp +++ openmp/trunk/runtime/src/kmp_settings.cpp @@ -975,12 +975,27 @@ static void __kmp_stg_parse_nested(char const *name, char const *value, void *data) { - __kmp_stg_parse_bool(name, value, &__kmp_dflt_nested); + int nested; + KMP_INFORM(EnvVarDeprecated, name, "OMP_MAX_ACTIVE_LEVELS"); + __kmp_stg_parse_bool(name, value, &nested); + if (nested) { + if (!__kmp_dflt_max_active_levels_set) + __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; + } else { // nesting explicitly turned off + __kmp_dflt_max_active_levels = 1; + __kmp_dflt_max_active_levels_set = true; + } } // __kmp_stg_parse_nested static void __kmp_stg_print_nested(kmp_str_buf_t *buffer, char const *name, void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_dflt_nested); + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); + } + __kmp_str_buf_print(buffer, ": deprecated; max-active-levels-var=%d\n", + __kmp_dflt_max_active_levels); } // __kmp_stg_print_nested static void __kmp_parse_nested_num_threads(const char *var, const char *env, @@ -1026,6 +1041,8 @@ } } } + if (!__kmp_dflt_max_active_levels_set && total > 1) + __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; KMP_DEBUG_ASSERT(total > 0); if (total <= 0) { KMP_WARNING(NthSyntaxError, var, env); @@ -1182,8 +1199,22 @@ static void __kmp_stg_parse_max_active_levels(char const *name, char const *value, void *data) { - __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, - &__kmp_dflt_max_active_levels); + kmp_uint64 tmp_dflt = 0; + char const *msg = NULL; + if (!__kmp_dflt_max_active_levels_set) { + // Don't overwrite __kmp_dflt_max_active_levels if we get an invalid setting + __kmp_str_to_uint(value, &tmp_dflt, &msg); + if (msg != NULL) { // invalid setting; print warning and ignore + KMP_WARNING(ParseSizeIntWarn, name, value, msg); + } else if (tmp_dflt > KMP_MAX_ACTIVE_LEVELS_LIMIT) { + // invalid setting; print warning and ignore + msg = KMP_I18N_STR(ValueTooLarge); + KMP_WARNING(ParseSizeIntWarn, name, value, msg); + } else { // valid setting + __kmp_dflt_max_active_levels = tmp_dflt; + __kmp_dflt_max_active_levels_set = true; + } + } } // __kmp_stg_parse_max_active_levels static void __kmp_stg_print_max_active_levels(kmp_str_buf_t *buffer, @@ -1240,9 +1271,13 @@ value = "MANDATORY"; else if (__kmp_target_offload == tgt_disabled) value = "DISABLED"; - if (value) { - __kmp_str_buf_print(buffer, " %s=%s\n", name, value); + KMP_DEBUG_ASSERT(value); + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); } + __kmp_str_buf_print(buffer, "=%s\n", value); } // __kmp_stg_print_target_offload #endif @@ -3162,6 +3197,9 @@ } __kmp_nested_proc_bind.used = nelem; + if (nelem > 1 && !__kmp_dflt_max_active_levels_set) + __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; + // Save values in the nested proc_bind array int i = 0; for (;;) { @@ -5248,7 +5286,7 @@ /* OMP_NESTED */ value = __kmp_env_blk_var(block, "OMP_NESTED"); if (value) { - ompc_set_nested(__kmp_dflt_nested); + ompc_set_nested(__kmp_dflt_max_active_levels > 1); } /* OMP_DYNAMIC */