Index: openmp/trunk/runtime/src/kmp.h =================================================================== --- openmp/trunk/runtime/src/kmp.h +++ openmp/trunk/runtime/src/kmp.h @@ -2100,14 +2100,6 @@ KMP_ALIGN_CACHE volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */ - - KMP_ALIGN_CACHE -#if KMP_USE_INTERNODE_ALIGNMENT - kmp_int32 tt_padme[INTERNODE_CACHE_LINE/sizeof(kmp_int32)]; -#endif - - volatile kmp_uint32 tt_ref_ct; /* #threads accessing struct */ - /* (not incl. master) */ } kmp_base_task_team_t; union KMP_ALIGN_CACHE kmp_task_team { @@ -3172,15 +3164,16 @@ #endif /* USE_ITT_BUILD */ kmp_int32 is_constrained); +extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ); extern void __kmp_reap_task_teams( void ); -extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread ); extern void __kmp_wait_to_unref_task_teams( void ); -extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int both, int always ); +extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always ); extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team ); extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team #if USE_ITT_BUILD , void * itt_sync_obj #endif /* USE_ITT_BUILD */ + , int wait=1 ); extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ); Index: openmp/trunk/runtime/src/kmp_barrier.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_barrier.cpp +++ openmp/trunk/runtime/src/kmp_barrier.cpp @@ -1153,7 +1153,7 @@ if (__kmp_tasking_mode != tskm_immediate_exec) { __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj) ); - __kmp_task_team_setup(this_thr, team, 0, 0); // use 0,0 to only setup the current team if nthreads > 1 + __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1 } #if USE_DEBUGGER // Let the debugger know: All threads are arrived and starting leaving the barrier. @@ -1261,7 +1261,7 @@ KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE); __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); - __kmp_task_team_setup(this_thr, team, 0, 0); + __kmp_task_team_setup(this_thr, team, 0); #if USE_ITT_BUILD if (__itt_sync_create_ptr || KMP_ITT_DEBUG) @@ -1575,7 +1575,7 @@ #endif if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_setup(this_thr, team, 1, 0); // 1,0 indicates setup both task teams if nthreads > 1 + __kmp_task_team_setup(this_thr, team, 0); // 0 indicates setup current task team if nthreads > 1 } /* The master thread may have changed its blocktime between the join barrier and the @@ -1614,14 +1614,7 @@ // Early exit for reaping threads releasing forkjoin barrier if (TCR_4(__kmp_global.g.g_done)) { - if (this_thr->th.th_task_team != NULL) { - if (KMP_MASTER_TID(tid)) { - TCW_PTR(this_thr->th.th_task_team, NULL); - } - else { - __kmp_unref_task_team(this_thr->th.th_task_team, this_thr); - } - } + this_thr->th.th_task_team = NULL; #if USE_ITT_BUILD && USE_ITT_NOTIFY if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { Index: openmp/trunk/runtime/src/kmp_runtime.c =================================================================== --- openmp/trunk/runtime/src/kmp_runtime.c +++ openmp/trunk/runtime/src/kmp_runtime.c @@ -2104,23 +2104,31 @@ // Take a memo of master's task_state KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size - kmp_uint8 *old_stack, *new_stack = (kmp_uint8 *) __kmp_allocate( 2*master_th->th.th_task_state_stack_sz ); + kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz; + kmp_uint8 *old_stack, *new_stack; kmp_uint32 i; + new_stack = (kmp_uint8 *)__kmp_allocate(new_size); for (i=0; ith.th_task_state_stack_sz; ++i) { new_stack[i] = master_th->th.th_task_state_memo_stack[i]; } + for (i=master_th->th.th_task_state_stack_sz; ith.th_task_state_memo_stack; master_th->th.th_task_state_memo_stack = new_stack; - master_th->th.th_task_state_stack_sz *= 2; + master_th->th.th_task_state_stack_sz = new_size; __kmp_free(old_stack); } // Store master's task_state on stack master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; master_th->th.th_task_state_top++; - master_th->th.th_task_state = 0; + if (team == master_th->th.th_hot_teams[level].hot_team) { // Restore master's nested state if nested hot team + master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; + } + else { + master_th->th.th_task_state = 0; + } } - master_th->th.th_task_team = team->t.t_task_team[master_th->th.th_task_state]; - #if !KMP_NESTED_HOT_TEAMS KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team)); #endif @@ -2410,12 +2418,7 @@ int old_num = master_th->th.th_team_nproc; int new_num = master_th->th.th_teams_size.nth; kmp_info_t **other_threads = team->t.t_threads; - kmp_task_team_t * task_team = master_th->th.th_task_team; team->t.t_nproc = new_num; - if ( task_team ) { // task team might have lesser value of counters - task_team->tt.tt_ref_ct = new_num - 1; - task_team->tt.tt_unfinished_threads = new_num; - } for ( i = 0; i < old_num; ++i ) { other_threads[i]->th.th_team_nproc = new_num; } @@ -2509,18 +2512,18 @@ } if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Restore task state from memo stack - KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); - if (master_th->th.th_task_state_top > 0) { + if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack + KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); + // Remember master's state if we re-use this nested hot team + master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; --master_th->th.th_task_state_top; // pop + // Now restore state at this level master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; } - // Copy the first task team from the new child / old parent team to the thread and reset state flag. + // Copy the task team from the parent team to the master thread master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state]; - KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", - __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, - parent_team ) ); + __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) ); } // TODO: GEH - cannot do this assertion because root thread not set up as executing @@ -2615,31 +2618,13 @@ __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - int tt_idx; - for (tt_idx=0; tt_idx<2; ++tt_idx) { - kmp_task_team_t *task_team = hot_team->t.t_task_team[tt_idx]; - if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) { - // Signal worker threads (esp. the extra ones) to stop looking for tasks while spin waiting. - // The task teams are reference counted and will be deallocated by the last worker thread. - KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 ); - TCW_SYNC_4( task_team->tt.tt_active, FALSE ); - KMP_MB(); - KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n", - &hot_team->t.t_task_team[tt_idx] ) ); - hot_team->t.t_task_team[tt_idx] = NULL; - } - else { - KMP_DEBUG_ASSERT( task_team == NULL ); - } - } - } - - // // Release the extra threads we don't need any more. - // for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) { KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); + if ( __kmp_tasking_mode != tskm_immediate_exec) { + // When decreasing team size, threads no longer in the team should unref task team. + hot_team->t.t_threads[f]->th.th_task_team = NULL; + } __kmp_free_thread( hot_team->t.t_threads[f] ); hot_team->t.t_threads[f] = NULL; } @@ -4081,7 +4066,6 @@ TCW_PTR(this_thr->th.th_sleep_loc, NULL); KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata ); - this_thr->th.th_task_state = 0; KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", tid, gtid, this_thr, this_thr->th.th_current_task ) ); @@ -4151,9 +4135,12 @@ this_thr->th.th_next_pool = NULL; if (!this_thr->th.th_task_state_memo_stack) { + size_t i; this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) ); this_thr->th.th_task_state_top = 0; this_thr->th.th_task_state_stack_sz = 4; + for (i=0; ith.th_task_state_stack_sz; ++i) // zero init the stack + this_thr->th.th_task_state_memo_stack[i] = 0; } KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); @@ -4211,6 +4198,7 @@ TCW_4(__kmp_nth, __kmp_nth + 1); + new_thr->th.th_task_state = 0; new_thr->th.th_task_state_top = 0; new_thr->th.th_task_state_stack_sz = 4; @@ -4896,26 +4884,6 @@ KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc )); team->t.t_size_changed = 1; - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Signal the worker threads (esp. extra ones) to stop looking for tasks while spin waiting. - // The task teams are reference counted and will be deallocated by the last worker thread. - int tt_idx; - for (tt_idx=0; tt_idx<2; ++tt_idx) { - // We don't know which of the two task teams workers are waiting on, so deactivate both. - kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; - if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) { - KMP_DEBUG_ASSERT( team->t.t_nproc > 1 ); - TCW_SYNC_4( task_team->tt.tt_active, FALSE ); - KMP_MB(); - KA_TRACE(20, ("__kmp_allocate_team: setting task_team %p to NULL\n", - &team->t.t_task_team[tt_idx])); - team->t.t_task_team[tt_idx] = NULL; - } - else { - KMP_DEBUG_ASSERT( task_team == NULL ); - } - } - } #if KMP_NESTED_HOT_TEAMS if( __kmp_hot_teams_mode == 0 ) { // AC: saved number of threads should correspond to team's value in this mode, @@ -4926,6 +4894,10 @@ /* release the extra threads we don't need any more */ for( f = new_nproc ; f < team->t.t_nproc ; f++ ) { KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); + if ( __kmp_tasking_mode != tskm_immediate_exec) { + // When decreasing team size, threads no longer in the team should unref task team. + team->t.t_threads[f]->th.th_task_team = NULL; + } __kmp_free_thread( team->t.t_threads[ f ] ); team->t.t_threads[ f ] = NULL; } @@ -4937,32 +4909,9 @@ team->t.t_sched = new_icvs->sched; __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Init both task teams - int tt_idx; - for (tt_idx=0; tt_idx<2; ++tt_idx) { - kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; - if ( task_team != NULL ) { - KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) ); - task_team->tt.tt_nproc = new_nproc; - task_team->tt.tt_unfinished_threads = new_nproc; - task_team->tt.tt_ref_ct = new_nproc - 1; - } - } - } - /* update the remaining threads */ - if (level) { - team->t.t_threads[0]->th.th_team_nproc = new_nproc; - for(f = 1; f < new_nproc; ++f) { - team->t.t_threads[f]->th.th_team_nproc = new_nproc; - team->t.t_threads[f]->th.th_task_state = 0; - } - } - else { - for(f = 0; f < new_nproc; ++f) { - team->t.t_threads[f]->th.th_team_nproc = new_nproc; - } + for(f = 0; f < new_nproc; ++f) { + team->t.t_threads[f]->th.th_team_nproc = new_nproc; } // restore the current task state of the master thread: should be the implicit task KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", @@ -5076,39 +5025,24 @@ } // end of check of t_nproc vs. new_nproc vs. hot_team_nth #endif // KMP_NESTED_HOT_TEAMS /* make sure everyone is syncronized */ + int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident ); - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Signal the worker threads to stop looking for tasks while spin waiting. - // The task teams are reference counted and will be deallocated by the last worker thread. - int tt_idx; - for (tt_idx=0; tt_idx<2; ++tt_idx) { - // We don't know which of the two task teams workers are waiting on, so deactivate both. - kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; - if ( (task_team != NULL) && TCR_SYNC_4(task_team->tt.tt_active) ) { - TCW_SYNC_4( task_team->tt.tt_active, FALSE ); - team->t.t_task_team[tt_idx] = NULL; - } - } - } - /* reinitialize the threads */ KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); - if (level) { - int old_state = team->t.t_threads[0]->th.th_task_state; - for (f=0; f < team->t.t_nproc; ++f) - __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); - // th_task_state for master thread will be put in stack of states in __kmp_fork_call() - // before zeroing, for workers it was just zeroed in __kmp_initialize_info() - team->t.t_threads[0]->th.th_task_state = old_state; - } - else { - int old_state = team->t.t_threads[0]->th.th_task_state; - for (f=0; ft.t_nproc; ++f) { - __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); + for (f=0; f < team->t.t_nproc; ++f) + __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); + if (level) { // set th_task_state for new threads in nested hot team + // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the + // th_task_state for the new threads. th_task_state for master thread will not be accurate until + // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value. + for (f=old_nproc; f < team->t.t_nproc; ++f) + team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level]; + } + else { // set th_task_state for new threads in non-nested hot team + int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state + for (f=old_nproc; f < team->t.t_nproc; ++f) team->t.t_threads[f]->th.th_task_state = old_state; - team->t.t_threads[f]->th.th_task_team = team->t.t_task_team[old_state]; - } } #ifdef KMP_DEBUG @@ -5342,18 +5276,17 @@ /* if we are non-hot team, release our threads */ if( ! use_hot_team ) { if ( __kmp_tasking_mode != tskm_immediate_exec ) { + // Delete task teams int tt_idx; for (tt_idx=0; tt_idx<2; ++tt_idx) { - // We don't know which of the two task teams workers are waiting on, so deactivate both. kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; if ( task_team != NULL ) { - // Signal the worker threads to stop looking for tasks while spin waiting. The task - // teams are reference counted and will be deallocated by the last worker thread via the - // thread's pointer to the task team. - KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n", task_team ) ); + for (f=0; ft.t_nproc; ++f) { // Have all threads unref task teams + team->t.t_threads[f]->th.th_task_team = NULL; + } + KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) ); KMP_DEBUG_ASSERT( team->t.t_nproc > 1 ); - TCW_SYNC_4( task_team->tt.tt_active, FALSE ); - KMP_MB(); + __kmp_free_task_team( master, task_team ); team->t.t_task_team[tt_idx] = NULL; } } @@ -5452,6 +5385,7 @@ balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; balign[b].bb.team = NULL; } + this_th->th.th_task_state = 0; /* put thread back on the free pool */ @@ -5622,9 +5556,7 @@ } #endif - if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) { - __kmp_unref_task_team( this_thr->th.th_task_team, this_thr ); - } + this_thr->th.th_task_team = NULL; /* run the destructors for the threadprivate data for this thread */ __kmp_common_destroy_gtid( gtid ); @@ -6120,10 +6052,7 @@ KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid )); if ( gtid >= 0 ) { - kmp_info_t *this_thr = __kmp_threads[ gtid ]; - if (TCR_PTR(this_thr->th.th_task_team) != NULL) { - __kmp_unref_task_team(this_thr->th.th_task_team, this_thr); - } + __kmp_threads[gtid]->th.th_task_team = NULL; } KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid )); Index: openmp/trunk/runtime/src/kmp_tasking.c =================================================================== --- openmp/trunk/runtime/src/kmp_tasking.c +++ openmp/trunk/runtime/src/kmp_tasking.c @@ -895,7 +895,7 @@ */ KMP_DEBUG_ASSERT(team->t.t_serialized); KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid)); - __kmp_task_team_setup(thread,team,0,1); // 0,1 indicates only setup the current team regardless of nthreads + __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; } kmp_task_team_t * task_team = thread->th.th_task_team; @@ -1297,8 +1297,7 @@ kmp_info_t * thread; int thread_finished = FALSE; - KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", - gtid, loc_ref) ); + KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) ); if ( __kmp_tasking_mode != tskm_immediate_exec ) { // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? @@ -1688,7 +1687,7 @@ KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] ); task_team = thread -> th.th_task_team; - KMP_DEBUG_ASSERT( task_team != NULL ); + if (task_team == NULL) return FALSE; KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n", gtid, final_spin, *thread_finished) ); @@ -1732,6 +1731,7 @@ KA_TRACE(15, ("__kmp_execute_tasks_template(exit #1): T#%d spin condition satisfied\n", gtid) ); return TRUE; } + if (thread->th.th_task_team == NULL) break; KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task } @@ -1767,6 +1767,7 @@ } } + if (thread->th.th_task_team == NULL) return FALSE; #if OMP_41_ENABLED // check if there are other threads to steal from, otherwise go back if ( nthreads == 1 ) @@ -1805,6 +1806,7 @@ return TRUE; } + if (thread->th.th_task_team == NULL) break; KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task // If the execution of the stolen task resulted in more tasks being // placed on our run queue, then restart the whole process. @@ -1851,6 +1853,7 @@ return TRUE; } } + if (thread->th.th_task_team == NULL) return FALSE; } // Find a different thread to steal work from. Pick a random thread. @@ -1919,6 +1922,7 @@ gtid) ); return TRUE; } + if (thread->th.th_task_team == NULL) break; KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task // If the execution of the stolen task resulted in more tasks being @@ -1966,6 +1970,7 @@ return TRUE; } } + if (thread->th.th_task_team == NULL) return FALSE; } KA_TRACE(15, ("__kmp_execute_tasks_template(exit #7): T#%d can't find work\n", gtid) ); @@ -2350,10 +2355,9 @@ TCW_4( task_team -> tt.tt_unfinished_threads, nthreads ); TCW_4( task_team -> tt.tt_active, TRUE ); - TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1); - KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n", - (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) ); + KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n", + (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) ); return task_team; } @@ -2362,16 +2366,13 @@ // __kmp_free_task_team: // Frees the task team associated with a specific thread, and adds it // to the global task team free list. -// -static void +void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ) { KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n", thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) ); - KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 ); - // Put task team back on free list __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock ); @@ -2412,32 +2413,6 @@ } } - -//------------------------------------------------------------------------------ -// __kmp_unref_task_teams: -// Remove one thread from referencing the task team structure by -// decreasing the reference count and deallocate task team if no more -// references to it. -// -void -__kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread ) -{ - kmp_uint ref_ct; - - ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1; - - KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n", - __kmp_gtid_from_thread( thread ), task_team, ref_ct ) ); - - - if ( ref_ct == 0 ) { - __kmp_free_task_team( thread, task_team ); - } - - TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL ); -} - - //------------------------------------------------------------------------------ // __kmp_wait_to_unref_task_teams: // Some threads could still be in the fork barrier release code, possibly @@ -2475,9 +2450,7 @@ #if KMP_OS_WINDOWS // TODO: GEH - add this check for Linux* OS / OS X* as well? if (!__kmp_is_thread_alive(thread, &exit_val)) { - if (TCR_PTR(thread->th.th_task_team) != NULL) { - __kmp_unref_task_team( thread->th.th_task_team, thread ); - } + thread->th.th_task_team = NULL; continue; } #endif @@ -2517,34 +2490,46 @@ // an already created, unused one if it already exists. // This may be called by any thread, but only for teams with # threads >1. void -__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int both, int always ) +__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always ) { KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - if ( ( team->t.t_task_team[this_thr->th.th_task_state] == NULL ) && ( always || team->t.t_nproc > 1 ) ) { - // Allocate a new task team, which will be propagated to - // all of the worker threads after the barrier. As they - // spin in the barrier release phase, then will continue - // to use the previous task team struct, until they receive - // the signal to stop checking for tasks (they can't safely - // reference the kmp_team_t struct, which could be reallocated - // by the master thread). + // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next. + // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use. + if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) { team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team ); KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n", __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state], ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); } - // else: Either all threads have reported in, and no tasks were spawned for this release->gather region - // Leave the old task team struct in place for the upcoming region. - // No task teams are formed for serialized teams. - if (both) { - int other_team = 1 - this_thr->th.th_task_state; - if ( ( team->t.t_task_team[other_team] == NULL ) && ( team->t.t_nproc > 1 ) ) { // setup other team as well - team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team ); - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n", - __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], - ((team != NULL) ? team->t.t_id : -1), other_team )); - } + + // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is + // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the + // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely + // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for + // serialized teams. + int other_team = 1 - this_thr->th.th_task_state; + if (team->t.t_task_team[other_team] == NULL && team->t.t_nproc > 1) { // setup other team as well + team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team ); + KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n", + __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], + ((team != NULL) ? team->t.t_id : -1), other_team )); + } + else { // Leave the old task team struct in place for the upcoming region; adjust as needed + kmp_task_team_t *task_team = team->t.t_task_team[other_team]; + if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) { + TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); + TCW_4(task_team->tt.tt_found_tasks, FALSE); +#if OMP_41_ENABLED + TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); +#endif + TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc ); + TCW_4(task_team->tt.tt_active, TRUE ); + } + // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary + KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n", + __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], + ((team != NULL) ? team->t.t_id : -1), other_team )); } } @@ -2559,26 +2544,11 @@ { KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - // In case this thread never saw that the task team was no longer active, unref/deallocate it now. - if ( this_thr->th.th_task_team != NULL ) { - if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) { - KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) ); - KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team (%p)is not active, unrefing\n", - __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team)); - __kmp_unref_task_team( this_thr->th.th_task_team, this_thr ); - } -#if KMP_DEBUG - else { // We are re-using a task team that was never enabled. - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]); - } -#endif - } - // Toggle the th_task_state field, to switch which task_team this thread refers to this_thr->th.th_task_state = 1 - this_thr->th.th_task_state; // It is now safe to propagate the task team pointer from the team struct to the current thread. TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]); - KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to %p from Team #%d task team (parity=%d)\n", + KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n", __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team, ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); } @@ -2586,11 +2556,14 @@ //-------------------------------------------------------------------------------------------- // __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather -// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created +// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created. +// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0 +// optionally as the last argument. When wait is zero, master thread does not wait for +// unfinished_threads to reach 0. void __kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team USE_ITT_BUILD_ARG(void * itt_sync_obj) - ) + , int wait) { kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; @@ -2598,18 +2571,18 @@ KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team ); if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) { - KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n", - __kmp_gtid_from_thread(this_thr), task_team)); - // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait - // here for tasks to complete. To avoid memory contention, only master thread checks termination condition. - kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj)); - - // Kill the old task team, so that the worker threads will stop referencing it while spinning. - // They will deallocate it when the reference count reaches zero. - // The master thread is not included in the ref count. - KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: setting active to false, setting local and team's pointer to NULL\n", + if (wait) { + KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n", + __kmp_gtid_from_thread(this_thr), task_team)); + // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait + // here for tasks to complete. To avoid memory contention, only master thread checks termination condition. + kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U); + flag.wait(this_thr, TRUE + USE_ITT_BUILD_ARG(itt_sync_obj)); + } + // Deactivate the old task team, so that the worker threads will stop referencing it while spinning. + KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: " + "setting active to false, setting local and team's pointer to NULL\n", __kmp_gtid_from_thread(this_thr), task_team)); #if OMP_41_ENABLED KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE ); @@ -2621,7 +2594,6 @@ KMP_MB(); TCW_PTR(this_thr->th.th_task_team, NULL); - team->t.t_task_team[this_thr->th.th_task_state] = NULL; } } Index: openmp/trunk/runtime/src/kmp_wait_release.h =================================================================== --- openmp/trunk/runtime/src/kmp_wait_release.h +++ openmp/trunk/runtime/src/kmp_wait_release.h @@ -178,12 +178,14 @@ if (__kmp_tasking_mode != tskm_immediate_exec) { task_team = this_thr->th.th_task_team; if (task_team != NULL) { - if (!TCR_SYNC_4(task_team->tt.tt_active)) { + if (TCR_SYNC_4(task_team->tt.tt_active)) { + if (KMP_TASKING_ENABLED(task_team)) + flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed + USE_ITT_BUILD_ARG(itt_sync_obj), 0); + } + else { KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); - __kmp_unref_task_team(task_team, this_thr); - } else if (KMP_TASKING_ENABLED(task_team)) { - flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed - USE_ITT_BUILD_ARG(itt_sync_obj), 0); + this_thr->th.th_task_team = NULL; } } // if } // if