diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports --- a/openmp/runtime/src/dllexports +++ b/openmp/runtime/src/dllexports @@ -158,7 +158,7 @@ # # Regular entry points - __kmp_wait_yield_4 + __kmp_wait_4 __kmp_fork_call __kmp_invoke_microtask %ifdef KMP_USE_MONITOR diff --git a/openmp/runtime/src/exports_so.txt b/openmp/runtime/src/exports_so.txt --- a/openmp/runtime/src/exports_so.txt +++ b/openmp/runtime/src/exports_so.txt @@ -83,7 +83,7 @@ __kmp_reap_worker; __kmp_release_64; __kmp_wait_64; - __kmp_wait_yield_4; + __kmp_wait_4; # ittnotify symbols to be used by debugger __kmp_itt_fini_ittlib; diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -981,10 +981,6 @@ (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC) #define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW()) #endif -#define KMP_YIELD_NOW() \ - (KMP_NOW_MSEC() / KMP_MAX(__kmp_dflt_blocktime, 1) % \ - (__kmp_yield_on_count + __kmp_yield_off_count) < \ - (kmp_uint32)__kmp_yield_on_count) #endif // KMP_USE_MONITOR #define KMP_MIN_STATSCOLS 40 @@ -999,14 +995,6 @@ #define KMP_MAX_CHUNK (INT_MAX - 1) #define KMP_DEFAULT_CHUNK 1 -#define KMP_MIN_INIT_WAIT 1 -#define KMP_MAX_INIT_WAIT (INT_MAX / 2) -#define KMP_DEFAULT_INIT_WAIT 2048U - -#define KMP_MIN_NEXT_WAIT 1 -#define KMP_MAX_NEXT_WAIT (INT_MAX / 2) -#define KMP_DEFAULT_NEXT_WAIT 1024U - #define KMP_DFLT_DISP_NUM_BUFF 7 #define KMP_MAX_ORDERED 8 @@ -1090,7 +1078,7 @@ extern void __kmp_x86_pause(void); #elif KMP_MIC // Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed -// regression after removal of extra PAUSE from KMP_YIELD_SPIN(). Changing +// regression after removal of extra PAUSE from spin loops. Changing // the delay from 100 to 300 showed even better performance than double PAUSE // on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC. static inline void __kmp_x86_pause(void) { _mm_delay_32(300); } @@ -1115,31 +1103,54 @@ #define KMP_INIT_YIELD(count) \ { (count) = __kmp_yield_init; } +#define KMP_OVERSUBSCRIBED \ + (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) + +#define KMP_TRY_YIELD \ + ((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED))) + +#define KMP_TRY_YIELD_OVERSUB \ + ((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED)) + #define KMP_YIELD(cond) \ { \ KMP_CPU_PAUSE(); \ - __kmp_yield((cond)); \ + if ((cond) && (KMP_TRY_YIELD)) \ + __kmp_yield(); \ + } + +#define KMP_YIELD_OVERSUB() \ + { \ + KMP_CPU_PAUSE(); \ + if ((KMP_TRY_YIELD_OVERSUB)) \ + __kmp_yield(); \ } // Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround, // there should be no yielding since initial value from KMP_INIT_YIELD() is odd. - -#define KMP_YIELD_WHEN(cond, count) \ +#define KMP_YIELD_SPIN(count) \ { \ KMP_CPU_PAUSE(); \ - (count) -= 2; \ - if (!(count)) { \ - __kmp_yield(cond); \ - (count) = __kmp_yield_next; \ + if (KMP_TRY_YIELD) { \ + (count) -= 2; \ + if (!(count)) { \ + __kmp_yield(); \ + (count) = __kmp_yield_next; \ + } \ } \ } -#define KMP_YIELD_SPIN(count) \ + +#define KMP_YIELD_OVERSUB_ELSE_SPIN(count) \ { \ KMP_CPU_PAUSE(); \ - (count) -= 2; \ - if (!(count)) { \ - __kmp_yield(1); \ - (count) = __kmp_yield_next; \ + if ((KMP_TRY_YIELD_OVERSUB)) \ + __kmp_yield(); \ + else if (__kmp_use_yield == 1) { \ + (count) -= 2; \ + if (!(count)) { \ + __kmp_yield(); \ + (count) = __kmp_yield_next; \ + } \ } \ } @@ -2945,10 +2956,6 @@ extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ -/* used for yielding spin-waits */ -extern unsigned int __kmp_init_wait; /* initial number of spin-tests */ -extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */ - extern enum library_type __kmp_library; extern enum sched_type __kmp_sched; /* default runtime scheduling */ @@ -2977,16 +2984,11 @@ extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */ #endif +extern kmp_int32 __kmp_use_yield; +extern kmp_int32 __kmp_use_yield_exp_set; extern kmp_uint32 __kmp_yield_init; extern kmp_uint32 __kmp_yield_next; -#if KMP_USE_MONITOR -extern kmp_uint32 __kmp_yielding_on; -#endif -extern kmp_uint32 __kmp_yield_cycle; -extern kmp_int32 __kmp_yield_on_count; -extern kmp_int32 __kmp_yield_off_count; - /* ------------------------------------------------------------------------- */ extern int __kmp_allThreadsSpecified; @@ -3309,7 +3311,7 @@ int num_threads); #endif -extern void __kmp_yield(int cond); +extern void __kmp_yield(); extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, enum sched_type schedule, kmp_int32 lb, @@ -3374,13 +3376,11 @@ extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker); extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker); extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker); -extern kmp_uint32 __kmp_wait_yield_4(kmp_uint32 volatile *spinner, - kmp_uint32 checker, - kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), - void *obj); -extern void __kmp_wait_yield_4_ptr(void *spinner, kmp_uint32 checker, - kmp_uint32 (*pred)(void *, kmp_uint32), - void *obj); +extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker, + kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), + void *obj); +extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker, + kmp_uint32 (*pred)(void *, kmp_uint32), void *obj); class kmp_flag_32; class kmp_flag_64; diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -683,7 +683,7 @@ // } // and adding the yield here is good for at least a 10x speedup // when running >2 threads per core (on the NAS LU benchmark). - __kmp_yield(TRUE); + __kmp_yield(); #endif #else #error Unknown or unsupported architecture @@ -993,24 +993,18 @@ kmp_uint32 spins; \ KMP_FSYNC_PREPARE(l); \ KMP_INIT_YIELD(spins); \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ kmp_backoff_t backoff = __kmp_spin_backoff_params; \ - while ( \ - KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ - !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \ - __kmp_spin_backoff(&backoff); \ + do { \ if (TCR_4(__kmp_nth) > \ (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ KMP_YIELD(TRUE); \ } else { \ KMP_YIELD_SPIN(spins); \ } \ - } \ + __kmp_spin_backoff(&backoff); \ + } while ( \ + KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \ + !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \ } \ KMP_FSYNC_ACQUIRED(l); \ } @@ -1096,8 +1090,7 @@ KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ } \ KMP_MB(); \ - KMP_YIELD(TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ + KMP_YIELD_OVERSUB(); \ } #endif // KMP_USE_FUTEX @@ -3976,8 +3969,8 @@ // __kmp_dispatch_num_buffers) if (idx != sh_buf->doacross_buf_idx) { // Shared buffer is occupied, wait for it to be free - __kmp_wait_yield_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, - __kmp_eq_4, NULL); + __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, + __kmp_eq_4, NULL); } #if KMP_32_BIT_ARCH // Check if we are the first thread. After the CAS the first thread gets 0, diff --git a/openmp/runtime/src/kmp_dispatch.h b/openmp/runtime/src/kmp_dispatch.h --- a/openmp/runtime/src/kmp_dispatch.h +++ b/openmp/runtime/src/kmp_dispatch.h @@ -269,7 +269,7 @@ } /* - Spin wait loop that first does pause, then yield. + Spin wait loop that pauses between checks. Waits until function returns non-zero when called with *spinner and check. Does NOT put threads to sleep. Arguments: @@ -282,15 +282,14 @@ is used to report locks consistently. For example, if lock is acquired immediately, its address is reported to ittnotify via KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired immediately - and lock routine calls to KMP_WAIT_YIELD(), the later should report the + and lock routine calls to KMP_WAIT(), the later should report the same address, not an address of low-level spinner. #endif // USE_ITT_BUILD TODO: make inline function (move to header file for icl) */ template -static UT __kmp_wait_yield(volatile UT *spinner, UT checker, - kmp_uint32 (*pred)(UT, UT) - USE_ITT_BUILD_ARG(void *obj)) { +static UT __kmp_wait(volatile UT *spinner, UT checker, + kmp_uint32 (*pred)(UT, UT) USE_ITT_BUILD_ARG(void *obj)) { // note: we may not belong to a team at this point volatile UT *spin = spinner; UT check = checker; @@ -308,12 +307,8 @@ It causes problems with infinite recursion because of exit lock */ /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) __kmp_abort_thread(); */ - - // if we are oversubscribed, - // or have waited a bit (and KMP_LIBRARY=throughput, then yield - // pause is in the following code - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); + // If oversubscribed, or have waited a bit then yield. + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); } KMP_FSYNC_SPIN_ACQUIRED(obj); return r; @@ -379,8 +374,8 @@ __kmp_str_free(&buff); } #endif - __kmp_wait_yield(&sh->u.s.ordered_iteration, lower, - __kmp_ge USE_ITT_BUILD_ARG(NULL)); + __kmp_wait(&sh->u.s.ordered_iteration, lower, + __kmp_ge USE_ITT_BUILD_ARG(NULL)); KMP_MB(); /* is this necessary? */ #ifdef KMP_DEBUG { diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -858,9 +858,9 @@ KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " "sh->buffer_index:%d\n", gtid, my_buffer_index, sh->buffer_index)); - __kmp_wait_yield(&sh->buffer_index, my_buffer_index, - __kmp_eq USE_ITT_BUILD_ARG(NULL)); - // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and + __kmp_wait(&sh->buffer_index, my_buffer_index, + __kmp_eq USE_ITT_BUILD_ARG(NULL)); + // Note: KMP_WAIT() cannot be used there: buffer index and // my_buffer_index are *always* 32-bit integers. KMP_MB(); /* is this necessary? */ KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " @@ -1004,8 +1004,8 @@ } #endif - __kmp_wait_yield(&sh->u.s.ordered_iteration, lower, - __kmp_ge USE_ITT_BUILD_ARG(NULL)); + __kmp_wait(&sh->u.s.ordered_iteration, lower, + __kmp_ge USE_ITT_BUILD_ARG(NULL)); KMP_MB(); /* is this necessary? */ #ifdef KMP_DEBUG { @@ -1073,8 +1073,8 @@ } #endif - __kmp_wait_yield(&sh->u.s.ordered_iteration, lower, - __kmp_ge USE_ITT_BUILD_ARG(NULL)); + __kmp_wait(&sh->u.s.ordered_iteration, lower, + __kmp_ge USE_ITT_BUILD_ARG(NULL)); KMP_MB(); /* is this necessary? */ KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting " @@ -2489,10 +2489,10 @@ } kmp_uint32 -__kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker, - kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), - void *obj // Higher-level synchronization object, or NULL. - ) { +__kmp_wait_4(volatile kmp_uint32 *spinner, kmp_uint32 checker, + kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), + void *obj // Higher-level synchronization object, or NULL. + ) { // note: we may not belong to a team at this point volatile kmp_uint32 *spin = spinner; kmp_uint32 check = checker; @@ -2509,20 +2509,16 @@ split. It causes problems with infinite recursion because of exit lock */ /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) __kmp_abort_thread(); */ - - /* if we have waited a bit, or are oversubscribed, yield */ - /* pause is in the following code */ - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); } KMP_FSYNC_SPIN_ACQUIRED(obj); return r; } -void __kmp_wait_yield_4_ptr( - void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32), - void *obj // Higher-level synchronization object, or NULL. - ) { +void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker, + kmp_uint32 (*pred)(void *, kmp_uint32), + void *obj // Higher-level synchronization object, or NULL. + ) { // note: we may not belong to a team at this point void *spin = spinner; kmp_uint32 check = checker; @@ -2534,10 +2530,9 @@ // main wait spin loop while (!f(spin, check)) { KMP_FSYNC_SPIN_PREPARE(obj); - /* if we have waited a bit, or are oversubscribed, yield */ + /* if we have waited a bit, or are noversubscribed, yield */ /* pause is in the following code */ - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); } KMP_FSYNC_SPIN_ACQUIRED(obj); } diff --git a/openmp/runtime/src/kmp_dispatch_hier.h b/openmp/runtime/src/kmp_dispatch_hier.h --- a/openmp/runtime/src/kmp_dispatch_hier.h +++ b/openmp/runtime/src/kmp_dispatch_hier.h @@ -263,8 +263,8 @@ next_wait_value)); char v = (current_wait_value ? 0x1 : 0x0); (RCAST(volatile char *, &(bdata->val[current_index])))[id] = v; - __kmp_wait_yield(&(bdata->val[current_index]), current_wait_value, - __kmp_eq USE_ITT_BUILD_ARG(NULL)); + __kmp_wait(&(bdata->val[current_index]), current_wait_value, + __kmp_eq USE_ITT_BUILD_ARG(NULL)); tdata->wait_val[current_index] = next_wait_value; tdata->index = next_index; } @@ -310,8 +310,8 @@ next_wait_value)); val = RCAST(volatile kmp_int64 *, &(bdata->val[current_index])); KMP_TEST_THEN_INC64(val); - __kmp_wait_yield(&(bdata->val[current_index]), current_wait_value, - __kmp_ge USE_ITT_BUILD_ARG(NULL)); + __kmp_wait(&(bdata->val[current_index]), current_wait_value, + __kmp_ge USE_ITT_BUILD_ARG(NULL)); tdata->wait_val[current_index] = next_wait_value; tdata->index = next_index; } diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -62,11 +62,6 @@ std::atomic __kmp_team_counter = ATOMIC_VAR_INIT(0); std::atomic __kmp_task_counter = ATOMIC_VAR_INIT(0); -unsigned int __kmp_init_wait = - KMP_DEFAULT_INIT_WAIT; /* initial number of spin-tests */ -unsigned int __kmp_next_wait = - KMP_DEFAULT_NEXT_WAIT; /* susequent number of spin-tests */ - size_t __kmp_stksize = KMP_DEFAULT_STKSIZE; #if KMP_USE_MONITOR size_t __kmp_monitor_stksize = 0; // auto adjust @@ -395,22 +390,17 @@ int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */ int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */ +// From KMP_USE_YIELD: +// 0 = never yield; +// 1 = always yield (default); +// 2 = yield only if oversubscribed +kmp_int32 __kmp_use_yield = 1; +// This will be 1 if KMP_USE_YIELD environment variable was set explicitly +kmp_int32 __kmp_use_yield_exp_set = 0; + kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT; kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT; -#if KMP_USE_MONITOR -kmp_uint32 __kmp_yielding_on = 1; -#endif -#if KMP_OS_CNK -kmp_uint32 __kmp_yield_cycle = 0; -#else -kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */ -#endif -kmp_int32 __kmp_yield_on_count = - 10; /* By default, yielding is on for 10 monitor periods. */ -kmp_int32 __kmp_yield_off_count = - 1; /* By default, yielding is off for 1 monitor periods. */ - /* ------------------------------------------------------ */ /* STATE mostly syncronized with global lock */ /* data written to rarely by masters, read often by workers */ diff --git a/openmp/runtime/src/kmp_itt.h b/openmp/runtime/src/kmp_itt.h --- a/openmp/runtime/src/kmp_itt.h +++ b/openmp/runtime/src/kmp_itt.h @@ -219,7 +219,7 @@ with a delay (and not called at all if waiting time is small). So, in spin loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and - KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */ + KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT() for example. */ #undef KMP_FSYNC_SPIN_INIT #define KMP_FSYNC_SPIN_INIT(obj, spin) \ diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -652,21 +652,11 @@ kmp_uint32 spins; \ KMP_FSYNC_PREPARE(lck); \ KMP_INIT_YIELD(spins); \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq( \ - &lck->tas.lk.poll, 0, gtid + 1)) { \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - } \ + do { \ + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \ + } while ( \ + lck->tas.lk.poll != 0 || \ + !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \ } \ KMP_FSYNC_ACQUIRED(lck); \ } else { \ @@ -770,22 +760,11 @@ kmp_uint32 spins; \ KMP_FSYNC_PREPARE(lck); \ KMP_INIT_YIELD(spins); \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - while ( \ + do { \ + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \ + } while ( \ (lck->tas.lk.poll != 0) || \ - !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ - if (TCR_4(__kmp_nth) > \ - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - } \ + !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \ } \ lck->tas.lk.depth_locked = 1; \ *depth = KMP_LOCK_ACQUIRED_FIRST; \ diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -100,23 +100,12 @@ kmp_uint32 spins; KMP_FSYNC_PREPARE(lck); KMP_INIT_YIELD(spins); - if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { - KMP_YIELD(TRUE); - } else { - KMP_YIELD_SPIN(spins); - } - kmp_backoff_t backoff = __kmp_spin_backoff_params; - while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || - !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) { + do { __kmp_spin_backoff(&backoff); - if (TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { - KMP_YIELD(TRUE); - } else { - KMP_YIELD_SPIN(spins); - } - } + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); + } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free || + !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)); KMP_FSYNC_ACQUIRED(lck); return KMP_LOCK_ACQUIRED_FIRST; } @@ -169,8 +158,7 @@ KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas)); KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + KMP_YIELD_OVERSUB(); return KMP_LOCK_RELEASED; } @@ -474,8 +462,7 @@ KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, lck->lk.poll, gtid)); - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + KMP_YIELD_OVERSUB(); return KMP_LOCK_RELEASED; } @@ -651,7 +638,7 @@ std::memory_order_acquire) == my_ticket) { return KMP_LOCK_ACQUIRED_FIRST; } - KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); + KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); return KMP_LOCK_ACQUIRED_FIRST; } @@ -1249,10 +1236,9 @@ ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid)); - /* ToDo: May want to consider using __kmp_wait_sleep or something that - sleeps for throughput only here. */ KMP_MB(); - KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); + // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf + KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "acq spin"); @@ -1282,8 +1268,8 @@ /* Yield if number of threads > number of logical processors */ /* ToDo: Not sure why this should only be in oversubscription case, maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + KMP_YIELD_OVERSUB(); + #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "acq retry"); #endif @@ -1462,8 +1448,8 @@ KMP_MB(); /* make sure enqueuing thread has time to update next waiting thread * field */ - *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, - KMP_NEQ, NULL); + *head_id_p = + KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); #endif @@ -2131,7 +2117,7 @@ // lock from now on. while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { KMP_INC_STAT(lck, lemmingYields); - __kmp_yield(TRUE); + KMP_YIELD(TRUE); } if (__kmp_test_adaptive_lock_only(lck, gtid)) @@ -2259,23 +2245,14 @@ // polling area has been reconfigured. Unless it is reconfigured, the // reloads stay in L1 cache and are cheap. // - // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! - // - // The current implementation of KMP_WAIT_YIELD doesn't allow for mask + // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!! + // The current implementation of KMP_WAIT doesn't allow for mask // and poll to be re-read every spin iteration. kmp_uint32 spins; - KMP_FSYNC_PREPARE(lck); KMP_INIT_YIELD(spins); while (polls[ticket & mask] < ticket) { // atomic load - // If we are oversubscribed, - // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. - // CPU Pause is in the macros for yield. - // - KMP_YIELD(TCR_4(__kmp_nth) > - (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); - KMP_YIELD_SPIN(spins); - + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); // Re-read the mask and the poll pointer from the lock structure. // // Make certain that "mask" is read before "polls" !!! @@ -2807,8 +2784,9 @@ } if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { // Wait until lock becomes free - while (!__kmp_is_unlocked_queuing_lock(lck)) - __kmp_yield(TRUE); + while (!__kmp_is_unlocked_queuing_lock(lck)) { + KMP_YIELD(TRUE); + } } else if (!(status & _XABORT_RETRY)) break; } while (retries--); diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -304,7 +304,7 @@ #define KMP_CACHE_PREFETCH(ADDR) /* nothing */ -// Define attribute that indicates that the fall through from the previous +// Define attribute that indicates that the fall through from the previous // case label is intentional and should not be diagnosed by a compiler // Code from libcxx/include/__config // Use a function like macro to imply that it must be followed by a semicolon @@ -882,8 +882,8 @@ #define VOLATILE_CAST(x) (x) #endif -#define KMP_WAIT_YIELD __kmp_wait_yield_4 -#define KMP_WAIT_YIELD_PTR __kmp_wait_yield_4_ptr +#define KMP_WAIT __kmp_wait_4 +#define KMP_WAIT_PTR __kmp_wait_4_ptr #define KMP_EQ __kmp_eq_4 #define KMP_NEQ __kmp_neq_4 #define KMP_LT __kmp_lt_4 diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -327,7 +327,7 @@ static int done = FALSE; while (!done) { - KMP_YIELD(1); + KMP_YIELD(TRUE); } } @@ -672,24 +672,6 @@ #endif /* KMP_OS_WINDOWS */ #endif /* KMP_DYNAMIC_LIB */ -/* Change the library type to "status" and return the old type */ -/* called from within initialization routines where __kmp_initz_lock is held */ -int __kmp_change_library(int status) { - int old_status; - - old_status = __kmp_yield_init & - 1; // check whether KMP_LIBRARY=throughput (even init count) - - if (status) { - __kmp_yield_init |= 1; // throughput => turnaround (odd init count) - } else { - __kmp_yield_init &= ~1; // turnaround => throughput (even init count) - } - - return old_status; // return previous setting of whether - // KMP_LIBRARY=throughput -} - /* __kmp_parallel_deo -- Wait until it's our turn. */ void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { int gtid = *gtid_ref; @@ -708,8 +690,8 @@ #ifdef BUILD_PARALLEL_ORDERED if (!team->t.t_serialized) { KMP_MB(); - KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), - KMP_EQ, NULL); + KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ, + NULL); KMP_MB(); } #endif /* BUILD_PARALLEL_ORDERED */ @@ -7735,13 +7717,14 @@ switch (__kmp_library) { case library_serial: { KMP_INFORM(LibraryIsSerial); - (void)__kmp_change_library(TRUE); } break; case library_turnaround: - (void)__kmp_change_library(TRUE); + if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set) + __kmp_use_yield = 2; // only yield when oversubscribed break; case library_throughput: - (void)__kmp_change_library(FALSE); + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) + __kmp_dflt_blocktime = 200; break; default: KMP_FATAL(UnknownLibraryType, arg); diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -629,6 +629,19 @@ } // __kmp_stg_print_teams_thread_limit // ----------------------------------------------------------------------------- +// KMP_USE_YIELD +static void __kmp_stg_parse_use_yield(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 0, 2, &__kmp_use_yield); + __kmp_use_yield_exp_set = 1; +} // __kmp_stg_parse_use_yield + +static void __kmp_stg_print_use_yield(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_use_yield); +} // __kmp_stg_print_use_yield + +// ----------------------------------------------------------------------------- // KMP_BLOCKTIME static void __kmp_stg_parse_blocktime(char const *name, char const *value, @@ -745,18 +758,24 @@ __kmp_library = library_serial; } else if (__kmp_str_match("throughput", 2, value)) { /* TH */ __kmp_library = library_throughput; + if (blocktime_str == NULL) { + // KMP_BLOCKTIME not specified, so set default to 0. + __kmp_dflt_blocktime = 0; + } } else if (__kmp_str_match("turnaround", 2, value)) { /* TU */ __kmp_library = library_turnaround; } else if (__kmp_str_match("dedicated", 1, value)) { /* D */ __kmp_library = library_turnaround; } else if (__kmp_str_match("multiuser", 1, value)) { /* M */ __kmp_library = library_throughput; + if (blocktime_str == NULL) { + // KMP_BLOCKTIME not specified, so set default to 0. + __kmp_dflt_blocktime = 0; + } } else { KMP_WARNING(StgInvalidValue, name, value); } } - __kmp_aux_set_library(__kmp_library); - } // __kmp_stg_parse_wait_policy static void __kmp_stg_print_wait_policy(kmp_str_buf_t *buffer, char const *name, @@ -3944,79 +3963,9 @@ } } // __kmp_stg_print_par_range_env -// ----------------------------------------------------------------------------- -// KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF - -static void __kmp_stg_parse_yield_cycle(char const *name, char const *value, - void *data) { - int flag = __kmp_yield_cycle; - __kmp_stg_parse_bool(name, value, &flag); - __kmp_yield_cycle = flag; -} // __kmp_stg_parse_yield_cycle - -static void __kmp_stg_print_yield_cycle(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_bool(buffer, name, __kmp_yield_cycle); -} // __kmp_stg_print_yield_cycle - -static void __kmp_stg_parse_yield_on(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_on_count); -} // __kmp_stg_parse_yield_on - -static void __kmp_stg_print_yield_on(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_yield_on_count); -} // __kmp_stg_print_yield_on - -static void __kmp_stg_parse_yield_off(char const *name, char const *value, - void *data) { - __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_off_count); -} // __kmp_stg_parse_yield_off - -static void __kmp_stg_print_yield_off(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_yield_off_count); -} // __kmp_stg_print_yield_off - #endif // ----------------------------------------------------------------------------- -// KMP_INIT_WAIT, KMP_NEXT_WAIT - -static void __kmp_stg_parse_init_wait(char const *name, char const *value, - void *data) { - int wait; - KMP_ASSERT((__kmp_init_wait & 1) == 0); - wait = __kmp_init_wait / 2; - __kmp_stg_parse_int(name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, &wait); - __kmp_init_wait = wait * 2; - KMP_ASSERT((__kmp_init_wait & 1) == 0); - __kmp_yield_init = __kmp_init_wait; -} // __kmp_stg_parse_init_wait - -static void __kmp_stg_print_init_wait(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_init_wait); -} // __kmp_stg_print_init_wait - -static void __kmp_stg_parse_next_wait(char const *name, char const *value, - void *data) { - int wait; - KMP_ASSERT((__kmp_next_wait & 1) == 0); - wait = __kmp_next_wait / 2; - __kmp_stg_parse_int(name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, &wait); - __kmp_next_wait = wait * 2; - KMP_ASSERT((__kmp_next_wait & 1) == 0); - __kmp_yield_next = __kmp_next_wait; -} // __kmp_stg_parse_next_wait - -static void __kmp_stg_print_next_wait(kmp_str_buf_t *buffer, char const *name, - void *data) { - __kmp_stg_print_int(buffer, name, __kmp_next_wait); -} //__kmp_stg_print_next_wait - -// ----------------------------------------------------------------------------- // KMP_GTID_MODE static void __kmp_stg_parse_gtid_mode(char const *name, char const *value, @@ -4726,6 +4675,8 @@ {"KMP_ALL_THREADS", __kmp_stg_parse_device_thread_limit, NULL, NULL, 0, 0}, {"KMP_BLOCKTIME", __kmp_stg_parse_blocktime, __kmp_stg_print_blocktime, NULL, 0, 0}, + {"KMP_USE_YIELD", __kmp_stg_parse_use_yield, __kmp_stg_print_use_yield, + NULL, 0, 0}, {"KMP_DUPLICATE_LIB_OK", __kmp_stg_parse_duplicate_lib_ok, __kmp_stg_print_duplicate_lib_ok, NULL, 0, 0}, {"KMP_LIBRARY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, @@ -4830,12 +4781,6 @@ {"KMP_PAR_RANGE", __kmp_stg_parse_par_range_env, __kmp_stg_print_par_range_env, NULL, 0, 0}, - {"KMP_YIELD_CYCLE", __kmp_stg_parse_yield_cycle, - __kmp_stg_print_yield_cycle, NULL, 0, 0}, - {"KMP_YIELD_ON", __kmp_stg_parse_yield_on, __kmp_stg_print_yield_on, NULL, - 0, 0}, - {"KMP_YIELD_OFF", __kmp_stg_parse_yield_off, __kmp_stg_print_yield_off, - NULL, 0, 0}, #endif // KMP_DEBUG {"KMP_ALIGN_ALLOC", __kmp_stg_parse_align_alloc, @@ -4927,10 +4872,6 @@ #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ {"KMP_MALLOC_POOL_INCR", __kmp_stg_parse_malloc_pool_incr, __kmp_stg_print_malloc_pool_incr, NULL, 0, 0}, - {"KMP_INIT_WAIT", __kmp_stg_parse_init_wait, __kmp_stg_print_init_wait, - NULL, 0, 0}, - {"KMP_NEXT_WAIT", __kmp_stg_parse_next_wait, __kmp_stg_print_next_wait, - NULL, 0, 0}, {"KMP_GTID_MODE", __kmp_stg_parse_gtid_mode, __kmp_stg_print_gtid_mode, NULL, 0, 0}, {"OMP_DYNAMIC", __kmp_stg_parse_omp_dynamic, __kmp_stg_print_omp_dynamic, diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -2705,8 +2705,7 @@ if (thread->th.th_task_team == NULL) { break; } - // Yield before executing next task - KMP_YIELD(__kmp_library == library_throughput); + KMP_YIELD(__kmp_library == library_throughput); // Yield before next task // If execution of a stolen task results in more tasks being placed on our // run queue, reset use_own_tasks if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) { @@ -3242,10 +3241,8 @@ break; } - // If we are oversubscribed, or have waited a bit (and library mode is - // throughput), yield. Pause is in the following code. - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput + // If oversubscribed or have waited a bit, yield. + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); } } @@ -3410,7 +3407,7 @@ __kmp_abort_thread(); break; } - KMP_YIELD(TRUE); // GH: We always yield here + KMP_YIELD(TRUE); } #if USE_ITT_BUILD KMP_FSYNC_SPIN_ACQUIRED(RCAST(void *, spin)); diff --git a/openmp/runtime/src/kmp_taskq.cpp b/openmp/runtime/src/kmp_taskq.cpp --- a/openmp/runtime/src/kmp_taskq.cpp +++ b/openmp/runtime/src/kmp_taskq.cpp @@ -51,7 +51,7 @@ taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; - KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL); + KMP_WAIT(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL); KMP_MB(); } } @@ -95,7 +95,7 @@ taskq = thunk->th.th_shareds->sv_queue; if (taskq->tq_tasknum_serving <= my_token) { - KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL); + KMP_WAIT(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL); KMP_MB(); taskq->tq_tasknum_serving = my_token + 1; KMP_MB(); @@ -1056,8 +1056,7 @@ while (queue->tq_ref_count > 1) { __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE, - NULL); + KMP_WAIT((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE, NULL); __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); // Make sure data structures are in consistent state before querying them @@ -1538,8 +1537,6 @@ in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); if (in_parallel) { - kmp_uint32 spins; - /* this is just a safeguard to release the waiting threads if */ /* the outermost taskq never queues a task */ @@ -1556,12 +1553,10 @@ do { /* wait until something is available to dequeue */ - KMP_INIT_YIELD(spins); - while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) && (!__kmp_taskq_has_any_children(queue)) && (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) { - KMP_YIELD_WHEN(TRUE, spins); + KMP_CPU_PAUSE(); } /* check to see if we can execute tasks in the queue */ @@ -1628,7 +1623,6 @@ /* WAIT until all tasks are finished and no child queues exist before * proceeding */ - KMP_INIT_YIELD(spins); while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) { @@ -1643,7 +1637,8 @@ in_parallel); } - KMP_YIELD_WHEN(thunk == NULL, spins); + if (thunk == NULL) + KMP_CPU_PAUSE(); __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); } @@ -1669,8 +1664,6 @@ // Outermost Queue: steal work from descendants until all tasks are finished - KMP_INIT_YIELD(spins); - while (!__kmp_taskq_tasks_finished(queue)) { thunk = __kmp_find_task_in_descendant_queue(global_tid, queue); @@ -1683,7 +1676,8 @@ __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); } - KMP_YIELD_WHEN(thunk == NULL, spins); + if (thunk == NULL) + KMP_CPU_PAUSE(); } /* Need this barrier to prevent destruction of queue before threads have all diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h --- a/openmp/runtime/src/kmp_wait_release.h +++ b/openmp/runtime/src/kmp_wait_release.h @@ -150,8 +150,8 @@ } #endif -/* Spin wait loop that first does pause, then yield, then sleep. A thread that - calls __kmp_wait_* must make certain that another thread calls __kmp_release +/* Spin wait loop that first does pause/yield, then sleep. A thread that calls + __kmp_wait_* must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! NOTE: We may not belong to a team at this point. */ @@ -270,8 +270,7 @@ } #endif - // Setup for waiting - KMP_INIT_YIELD(spins); + KMP_INIT_YIELD(spins); // Setup for waiting if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME #if OMP_50_ENABLED @@ -368,14 +367,8 @@ // If we are oversubscribed, or have waited a bit (and // KMP_LIBRARY=throughput), then yield - // TODO: Should it be number of cores instead of thread contexts? Like: - // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores); - // Need performance improvement data to make the change... - if (oversubscribed) { - KMP_YIELD(1); - } else { - KMP_YIELD_SPIN(spins); - } + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); + // Check if this thread was transferred from a team // to the thread pool (or vice-versa) while spinning. in_pool = !!TCR_4(this_thr->th.th_in_pool); diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -437,7 +437,7 @@ __kmp_msg_null); } #endif - __kmp_yield(TRUE); + KMP_YIELD(TRUE); } // /* Set thread stack info according to values returned by pthread_getattr_np(). @@ -580,8 +580,6 @@ sigset_t new_set; #endif /* KMP_BLOCK_SIGNALS */ struct timespec interval; - int yield_count; - int yield_cycles = 0; KMP_MB(); /* Flush all pending memory write invalidates. */ @@ -665,13 +663,6 @@ KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n")); - if (__kmp_yield_cycle) { - __kmp_yielding_on = 0; /* Start out with yielding shut off */ - yield_count = __kmp_yield_off_count; - } else { - __kmp_yielding_on = 1; /* Yielding is on permanently */ - } - while (!TCR_4(__kmp_global.g.g_done)) { struct timespec now; struct timeval tval; @@ -707,22 +698,6 @@ status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex); KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); - if (__kmp_yield_cycle) { - yield_cycles++; - if ((yield_cycles % yield_count) == 0) { - if (__kmp_yielding_on) { - __kmp_yielding_on = 0; /* Turn it off now */ - yield_count = __kmp_yield_off_count; - } else { - __kmp_yielding_on = 1; /* Turn it on now */ - yield_count = __kmp_yield_on_count; - } - yield_cycles = 0; - } - } else { - __kmp_yielding_on = 1; - } - TCW_4(__kmp_global.g.g_time.dt.t_value, TCR_4(__kmp_global.g.g_time.dt.t_value) + 1); @@ -1011,8 +986,8 @@ // Wait for the monitor thread is really started and set its *priority*. KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == sizeof(__kmp_global.g.g_time.dt.t_value)); - __kmp_wait_yield_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, - -1, &__kmp_neq_4, NULL); + __kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1, + &__kmp_neq_4, NULL); #endif // KMP_REAL_TIME_FIX #ifdef KMP_THREAD_ATTR @@ -1688,18 +1663,7 @@ } #endif // KMP_USE_MONITOR -void __kmp_yield(int cond) { - if (!cond) - return; -#if KMP_USE_MONITOR - if (!__kmp_yielding_on) - return; -#else - if (__kmp_yield_cycle && !KMP_YIELD_NOW()) - return; -#endif - sched_yield(); -} +void __kmp_yield() { sched_yield(); } void __kmp_gtid_set_specific(int gtid) { if (__kmp_init_gtid) { diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp --- a/openmp/runtime/src/z_Windows_NT_util.cpp +++ b/openmp/runtime/src/z_Windows_NT_util.cpp @@ -483,10 +483,7 @@ __kmp_resume_template(target_gtid, flag); } -void __kmp_yield(int cond) { - if (cond) - Sleep(0); -} +void __kmp_yield() { Sleep(0); } void __kmp_gtid_set_specific(int gtid) { if (__kmp_init_gtid) { @@ -1245,8 +1242,8 @@ Right solution seems to be waiting for *either* thread termination *or* ds_alive resetting. */ { - // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize - // KMP_WAIT_YIELD to cover this usage also. + // TODO: This code is very similar to KMP_WAIT. Need to generalize + // KMP_WAIT to cover this usage also. void *obj = NULL; kmp_uint32 spins; #if USE_ITT_BUILD @@ -1258,8 +1255,7 @@ KMP_FSYNC_SPIN_PREPARE(obj); #endif /* USE_ITT_BUILD */ __kmp_is_thread_alive(th, &exit_val); - KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_YIELD_SPIN(spins); + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); } while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive)); #if USE_ITT_BUILD if (exit_val == STILL_ACTIVE) {