Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -3064,6 +3064,7 @@ extern kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker ); extern kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker ); extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj ); +extern void __kmp_wait_yield_4_ptr( void * spinner, kmp_uint32 checker, kmp_uint32 (* pred)( void *, kmp_uint32 ), void * obj ); class kmp_flag_32; class kmp_flag_64; Index: runtime/src/kmp_dispatch.cpp =================================================================== --- runtime/src/kmp_dispatch.cpp +++ runtime/src/kmp_dispatch.cpp @@ -2570,6 +2570,32 @@ return r; } +void +__kmp_wait_yield_4_ptr(void *spinner, + kmp_uint32 checker, + kmp_uint32 (*pred)( void *, kmp_uint32 ), + void *obj // Higher-level synchronization object, or NULL. + ) +{ + // note: we may not belong to a team at this point + register void *spin = spinner; + register kmp_uint32 check = checker; + register kmp_uint32 spins; + register kmp_uint32 (*f) ( void *, kmp_uint32 ) = pred; + + KMP_FSYNC_SPIN_INIT( obj, spin ); + KMP_INIT_YIELD( spins ); + // main wait spin loop + while ( !f( spin, check ) ) { + KMP_FSYNC_SPIN_PREPARE( obj ); + /* if we have waited a bit, or are oversubscribed, yield */ + /* pause is in the following code */ + KMP_YIELD( TCR_4( __kmp_nth ) > __kmp_avail_proc ); + KMP_YIELD_SPIN( spins ); + } + KMP_FSYNC_SPIN_ACQUIRED( obj ); +} + } // extern "C" #ifdef KMP_GOMP_COMPAT Index: runtime/src/kmp_lock.h =================================================================== --- runtime/src/kmp_lock.h +++ runtime/src/kmp_lock.h @@ -23,6 +23,8 @@ #include "kmp_debug.h" #ifdef __cplusplus +#include + extern "C" { #endif // __cplusplus @@ -233,17 +235,26 @@ // Ticket locks. // ---------------------------------------------------------------------------- +#ifdef __cplusplus + struct kmp_base_ticket_lock { // `initialized' must be the first entry in the lock data structure! - volatile union kmp_ticket_lock * initialized; // points to the lock union if in initialized state - ident_t const * location; // Source code location of omp_init_lock(). - volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires - volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock - volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked, for nested locks only - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock + std::atomic initialized; + volatile union kmp_ticket_lock *self; // points to the lock union + ident_t const * location; // Source code location of omp_init_lock(). + std::atomic next_ticket; // ticket number to give to next thread which acquires + std::atomic now_serving; // ticket number for thread which holds the lock + std::atomic owner_id; // (gtid+1) of owning thread, 0 if unlocked + std::atomic depth_locked; // depth locked, for nested locks only + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock }; +#else // __cplusplus + +struct kmp_base_ticket_lock; + +#endif // !__cplusplus + typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; union KMP_ALIGN_CACHE kmp_ticket_lock { @@ -260,7 +271,13 @@ // kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); // Note the macro argument. It is important to make var properly initialized. // -#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { (kmp_ticket_lock_t *) & (lock), NULL, 0, 0, 0, -1 } } +#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { ATOMIC_VAR_INIT(true), \ + &(lock), \ + NULL, \ + ATOMIC_VAR_INIT(0U), \ + ATOMIC_VAR_INIT(0U), \ + ATOMIC_VAR_INIT(0), \ + ATOMIC_VAR_INIT(-1) } } extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); Index: runtime/src/kmp_lock.cpp =================================================================== --- runtime/src/kmp_lock.cpp +++ runtime/src/kmp_lock.cpp @@ -14,6 +14,7 @@ #include +#include #include "kmp.h" #include "kmp_itt.h" @@ -719,47 +720,39 @@ static kmp_int32 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) { - return TCR_4( lck->lk.owner_id ) - 1; + return std::atomic_load( &lck->lk.owner_id ) - 1; } static inline bool __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) { - return lck->lk.depth_locked != -1; + return std::atomic_load( &lck->lk.depth_locked ) != -1; } static kmp_uint32 -__kmp_bakery_check(kmp_uint32 value, kmp_uint32 checker) +__kmp_bakery_check(void *value, kmp_uint32 checker) { - register kmp_uint32 pause; - - if (value == checker) { + if ( std::atomic_load( (std::atomic *)value ) == checker ) { return TRUE; } - for (pause = checker - value; pause != 0; --pause); return FALSE; } __forceinline static int __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) { - kmp_uint32 my_ticket; - KMP_MB(); - - my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket ); + kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_acquire ); #ifdef USE_LOCK_PROFILE - if ( TCR_4( lck->lk.now_serving ) != my_ticket ) + if ( std::atomic_load( &lck->lk.now_serving ) != my_ticket ) __kmp_printf( "LOCK CONTENTION: %p\n", lck ); /* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ - if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { - KMP_FSYNC_ACQUIRED(lck); + if ( std::atomic_load( &lck->lk.now_serving ) == my_ticket ) { return KMP_LOCK_ACQUIRED_FIRST; } - KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); - KMP_FSYNC_ACQUIRED(lck); + KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); return KMP_LOCK_ACQUIRED_FIRST; } @@ -773,7 +766,11 @@ __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) { char const * const func = "omp_set_lock"; - if ( lck->lk.initialized != lck ) { + + if ( ! std::atomic_load( &lck->lk.initialized ) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( lck->lk.self != lck ) { KMP_FATAL( LockIsUninitialized, func ); } if ( __kmp_is_ticket_lock_nestable( lck ) ) { @@ -785,19 +782,19 @@ __kmp_acquire_ticket_lock( lck, gtid ); - lck->lk.owner_id = gtid + 1; + std::atomic_store( &lck->lk.owner_id, gtid + 1); return KMP_LOCK_ACQUIRED_FIRST; } int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) { - kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket ); - if ( TCR_4( lck->lk.now_serving ) == my_ticket ) { + kmp_uint32 my_ticket = std::atomic_load( &lck->lk.next_ticket ); + + if ( std::atomic_load( &lck->lk.now_serving ) == my_ticket ) { kmp_uint32 next_ticket = my_ticket + 1; - if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket, - my_ticket, next_ticket ) ) { - KMP_FSYNC_ACQUIRED( lck ); + if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket, + &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) { return TRUE; } } @@ -808,7 +805,11 @@ __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) { char const * const func = "omp_test_lock"; - if ( lck->lk.initialized != lck ) { + + if ( ! std::atomic_load( &lck->lk.initialized ) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( lck->lk.self != lck ) { KMP_FATAL( LockIsUninitialized, func ); } if ( __kmp_is_ticket_lock_nestable( lck ) ) { @@ -818,7 +819,7 @@ int retval = __kmp_test_ticket_lock( lck, gtid ); if ( retval ) { - lck->lk.owner_id = gtid + 1; + std::atomic_store( &lck->lk.owner_id, gtid + 1 ); } return retval; } @@ -826,16 +827,9 @@ int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) { - kmp_uint32 distance; - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - KMP_FSYNC_RELEASING(lck); - distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) ); + kmp_uint32 distance = std::atomic_load( &lck->lk.next_ticket ) - std::atomic_load( &lck->lk.now_serving ); - KMP_ST_REL32( &(lck->lk.now_serving), lck->lk.now_serving + 1 ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ + std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release ); KMP_YIELD( distance > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); @@ -846,8 +840,11 @@ __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) { char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { + + if ( ! std::atomic_load( &lck->lk.initialized ) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( lck->lk.self != lck ) { KMP_FATAL( LockIsUninitialized, func ); } if ( __kmp_is_ticket_lock_nestable( lck ) ) { @@ -868,11 +865,12 @@ __kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) { lck->lk.location = NULL; - TCW_4( lck->lk.next_ticket, 0 ); - TCW_4( lck->lk.now_serving, 0 ); - lck->lk.owner_id = 0; // no thread owns the lock. - lck->lk.depth_locked = -1; // -1 => not a nested lock. - lck->lk.initialized = (kmp_ticket_lock *)lck; + lck->lk.self = lck; + std::atomic_store( &lck->lk.next_ticket, 0U ); + std::atomic_store( &lck->lk.now_serving, 0U ); + std::atomic_store( &lck->lk.owner_id, 0 ); // no thread owns the lock. + std::atomic_store( &lck->lk.depth_locked, -1 ); // -1 => not a nested lock. + std::atomic_store( &lck->lk.initialized, true ); } static void @@ -884,19 +882,24 @@ void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) { - lck->lk.initialized = NULL; - lck->lk.location = NULL; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; - lck->lk.depth_locked = -1; + std::atomic_store( &lck->lk.initialized, false ); + lck->lk.self = NULL; + lck->lk.location = NULL; + std::atomic_store( &lck->lk.next_ticket, 0U ); + std::atomic_store( &lck->lk.now_serving, 0U ); + std::atomic_store( &lck->lk.owner_id, 0 ); + std::atomic_store( &lck->lk.depth_locked, -1 ); } static void __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) { char const * const func = "omp_destroy_lock"; - if ( lck->lk.initialized != lck ) { + + if ( ! std::atomic_load( &lck->lk.initialized ) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( lck->lk.self != lck ) { KMP_FATAL( LockIsUninitialized, func ); } if ( __kmp_is_ticket_lock_nestable( lck ) ) { @@ -919,15 +922,13 @@ KMP_DEBUG_ASSERT( gtid >= 0 ); if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; + std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_acquire ); return KMP_LOCK_ACQUIRED_NEXT; } else { __kmp_acquire_ticket_lock_timed_template( lck, gtid ); - KMP_MB(); - lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; + std::atomic_store( &lck->lk.depth_locked, 1 ); + std::atomic_store( &lck->lk.owner_id, gtid + 1 ); return KMP_LOCK_ACQUIRED_FIRST; } } @@ -936,7 +937,11 @@ __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) { char const * const func = "omp_set_nest_lock"; - if ( lck->lk.initialized != lck ) { + + if ( ! std::atomic_load( &lck->lk.initialized ) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( lck->lk.self != lck ) { KMP_FATAL( LockIsUninitialized, func ); } if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { @@ -953,16 +958,15 @@ KMP_DEBUG_ASSERT( gtid >= 0 ); if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; + retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_acquire ) + 1; } else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { retval = 0; } else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; + std::atomic_store( &lck->lk.depth_locked, 1 ); + std::atomic_store( &lck->lk.owner_id, gtid + 1 ); + retval = 1; } return retval; } @@ -972,7 +976,11 @@ kmp_int32 gtid ) { char const * const func = "omp_test_nest_lock"; - if ( lck->lk.initialized != lck ) { + + if ( ! std::atomic_load( &lck->lk.initialized ) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( lck->lk.self != lck ) { KMP_FATAL( LockIsUninitialized, func ); } if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { @@ -986,10 +994,8 @@ { KMP_DEBUG_ASSERT( gtid >= 0 ); - KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - KMP_MB(); - lck->lk.owner_id = 0; + if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_release ) - 1 ) == 0 ) { + std::atomic_store( &lck->lk.owner_id, 0 ); __kmp_release_ticket_lock( lck, gtid ); return KMP_LOCK_RELEASED; } @@ -1000,8 +1006,11 @@ __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) { char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { + + if ( ! std::atomic_load( &lck->lk.initialized ) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( lck->lk.self != lck ) { KMP_FATAL( LockIsUninitialized, func ); } if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { @@ -1020,7 +1029,7 @@ __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) { __kmp_init_ticket_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks + std::atomic_store( &lck->lk.depth_locked, 0 ); // >= 0 for nestable locks, -1 for simple locks } static void @@ -1033,14 +1042,18 @@ __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) { __kmp_destroy_ticket_lock( lck ); - lck->lk.depth_locked = 0; + std::atomic_store( &lck->lk.depth_locked, 0 ); } static void __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) { char const * const func = "omp_destroy_nest_lock"; - if ( lck->lk.initialized != lck ) { + + if ( ! std::atomic_load( &lck->lk.initialized ) ) { + KMP_FATAL( LockIsUninitialized, func ); + } + if ( lck->lk.self != lck ) { KMP_FATAL( LockIsUninitialized, func ); } if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { @@ -1060,7 +1073,7 @@ static int __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) { - return lck == lck->lk.initialized; + return std::atomic_load( &lck->lk.initialized ) && ( lck->lk.self == lck ); } static const ident_t * Index: runtime/src/kmp_os.h =================================================================== --- runtime/src/kmp_os.h +++ runtime/src/kmp_os.h @@ -646,6 +646,7 @@ #endif #define KMP_WAIT_YIELD __kmp_wait_yield_4 +#define KMP_WAIT_YIELD_PTR __kmp_wait_yield_4_ptr #define KMP_EQ __kmp_eq_4 #define KMP_NEQ __kmp_neq_4 #define KMP_LT __kmp_lt_4 Index: runtime/src/kmp_tasking.c =================================================================== --- runtime/src/kmp_tasking.c +++ runtime/src/kmp_tasking.c @@ -1730,12 +1730,14 @@ if ( !is_constrained ) { taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ]; + KMP_ASSERT(taskdata); // Bump head pointer and Wrap. victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK; } else { // While we have postponed tasks let's steal from tail of the deque (smaller tasks) kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index. taskdata = victim_td -> td.td_deque[ tail ]; + KMP_ASSERT(taskdata); // we need to check if the candidate obeys task scheduling constraint: // only child of current task can be scheduled kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;