Index: CREDITS.txt =================================================================== --- CREDITS.txt +++ CREDITS.txt @@ -51,3 +51,7 @@ N: Cheng Wang D: Contributor to testsuite from OpenUH + +N: Diego Caballero +E: diego.l.caballero@gmail.com +D: Fork performance improvements Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -1951,6 +1951,9 @@ } kmp_local_t; +#define KMP_CHECK_UPDATE(a, b) if ((a) != (b)) (a) = (b) +#define KMP_CHECK_UPDATE_SYNC(a, b) if ((a) != (b)) TCW_SYNC_PTR((a), (b)) + #define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) #define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) #define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) @@ -2196,7 +2199,7 @@ kmp_uint32 td_taskwait_counter; kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */ KMP_ALIGN_CACHE kmp_internal_control_t td_icvs; /* Internal control variables for the task */ - volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */ + KMP_ALIGN_CACHE volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */ volatile kmp_uint32 td_incomplete_child_tasks; /* Child tasks not yet complete */ #if OMP_40_ENABLED kmp_taskgroup_t * td_taskgroup; // Each task keeps pointer to its current taskgroup @@ -2515,12 +2518,14 @@ void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ]; KMP_ALIGN_CACHE kmp_info_t **t_threads; - int t_max_argc; + kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task + int t_level; // nested parallel level + + KMP_ALIGN_CACHE int t_max_argc; int t_max_nproc; // maximum threads this team can handle (dynamicly expandable) int t_serialized; // levels deep of serialized teams dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system int t_id; // team's id, assigned by debugger. - int t_level; // nested parallel level int t_active_level; // nested active parallel level kmp_r_sched_t t_sched; // run-time schedule for the team #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED @@ -2536,8 +2541,7 @@ // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding. char dummy_padding[1024]; #endif - KMP_ALIGN_CACHE kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task - kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams. + KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams. // for SERIALIZED teams nested 2 or more levels deep #if OMP_40_ENABLED kmp_int32 t_cancel_request; // typed flag to store request state of cancellation Index: runtime/src/kmp_runtime.c =================================================================== --- runtime/src/kmp_runtime.c +++ runtime/src/kmp_runtime.c @@ -2003,32 +2003,38 @@ KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) ); /* setup the new team */ - team->t.t_master_tid = master_tid; - team->t.t_master_this_cons = master_this_cons; - team->t.t_ident = loc; - team->t.t_parent = parent_team; - TCW_SYNC_PTR(team->t.t_pkfn, microtask); + KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); + KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons); + KMP_CHECK_UPDATE(team->t.t_ident, loc); + KMP_CHECK_UPDATE(team->t.t_parent, parent_team); + KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask); #if OMPT_SUPPORT - TCW_SYNC_PTR(team->t.ompt_team_info.microtask, unwrapped_task); + KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task); #endif - team->t.t_invoke = invoker; /* TODO move this to root, maybe */ + KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */ // TODO: parent_team->t.t_level == INT_MAX ??? #if OMP_40_ENABLED if ( !master_th->th.th_teams_microtask || level > teams_level ) { #endif /* OMP_40_ENABLED */ - team->t.t_level = parent_team->t.t_level + 1; - team->t.t_active_level = parent_team->t.t_active_level + 1; + int new_level = parent_team->t.t_level + 1; + KMP_CHECK_UPDATE(team->t.t_level, new_level); + new_level = parent_team->t.t_active_level + 1; + KMP_CHECK_UPDATE(team->t.t_active_level, new_level); #if OMP_40_ENABLED } else { // AC: Do not increase parallel level at start of the teams construct - team->t.t_level = parent_team->t.t_level; - team->t.t_active_level = parent_team->t.t_active_level; + int new_level = parent_team->t.t_level; + KMP_CHECK_UPDATE(team->t.t_level, new_level); + new_level = parent_team->t.t_active_level; + KMP_CHECK_UPDATE(team->t.t_active_level, new_level); } #endif /* OMP_40_ENABLED */ - team->t.t_sched = get__sched_2(parent_team, master_tid); // set master's schedule as new run-time schedule + kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); + if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || new_sched.chunk != new_sched.chunk) + team->t.t_sched = new_sched; // set master's schedule as new run-time schedule #if OMP_40_ENABLED - team->t.t_cancel_request = cancel_noreq; + KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); #endif // Update the floating point rounding in the team if required. @@ -2095,23 +2101,27 @@ #if OMP_40_ENABLED if ( ap ) { #endif /* OMP_40_ENABLED */ - for ( i=argc-1; i >= 0; --i ) + for ( i=argc-1; i >= 0; --i ) { // TODO: revert workaround for Intel(R) 64 tracker #96 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg( *ap, void * ); + void *new_argv = va_arg(*ap, void *); #else - *argv++ = va_arg( ap, void * ); + void *new_argv = va_arg(ap, void *); #endif + KMP_CHECK_UPDATE(*argv, new_argv); + argv++; + } #if OMP_40_ENABLED } else { - for ( i=0; i < argc; ++i ) + for ( i=0; i < argc; ++i ) { // Get args from parent team for teams construct - argv[i] = team->t.t_parent->t.t_argv[i]; + KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); + } } #endif /* OMP_40_ENABLED */ /* now actually fork the threads */ - team->t.t_master_active = master_active; + KMP_CHECK_UPDATE(team->t.t_master_active, master_active); if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong root->r.r_active = TRUE; @@ -4320,9 +4330,9 @@ team->t.t_threads[0], team ) ); KMP_DEBUG_ASSERT( team && new_icvs); KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); - team->t.t_ident = loc; + KMP_CHECK_UPDATE(team->t.t_ident, loc); - team->t.t_id = KMP_GEN_TEAM_ID(); + KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); // Copy ICVs to the master thread's implicit taskdata __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE ); @@ -4774,11 +4784,13 @@ if ( team->t.t_size_changed == -1 ) { team->t.t_size_changed = 1; } else { - team->t.t_size_changed = 0; + KMP_CHECK_UPDATE(team->t.t_size_changed, 0); } // TODO???: team->t.t_max_active_levels = new_max_active_levels; - team->t.t_sched = new_icvs->sched; + kmp_r_sched_t new_sched = new_icvs->sched; + if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || new_sched.chunk != new_sched.chunk) + team->t.t_sched = new_sched; // set master's schedule as new run-time schedule __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); @@ -4795,7 +4807,7 @@ team->t.t_last_place ) ); } else { - team->t.t_proc_bind = new_proc_bind; + KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); __kmp_partition_places( team ); } # else @@ -5016,7 +5028,7 @@ /* reallocate space for arguments if necessary */ __kmp_alloc_argv_entries( argc, team, TRUE ); - team->t.t_argc = argc; + KMP_CHECK_UPDATE(team->t.t_argc, argc); // // The hot team re-uses the previous task team, // if untouched during the previous release->gather phase. @@ -5059,7 +5071,7 @@ /* reallocate space for arguments if necessary */ __kmp_alloc_argv_entries( argc, team, TRUE ); - team->t.t_argc = argc; + KMP_CHECK_UPDATE(team->t.t_argc, argc); KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));