Index: runtime/src/include/40/omp.h.var =================================================================== --- runtime/src/include/40/omp.h.var +++ runtime/src/include/40/omp.h.var @@ -84,6 +84,20 @@ extern void __KAI_KMPC_CONVENTION omp_destroy_nest_lock (omp_nest_lock_t *); extern int __KAI_KMPC_CONVENTION omp_test_nest_lock (omp_nest_lock_t *); + /* lock hint type for dynamic user lock */ + typedef enum kmp_lock_hint_t { + kmp_lock_hint_none = 0, + kmp_lock_hint_contended, + kmp_lock_hint_uncontended, + kmp_lock_hint_nonspeculative, + kmp_lock_hint_speculative, + kmp_lock_hint_adaptive, + } kmp_lock_hint_t; + + /* hinted lock initializers */ + extern void __KAI_KMPC_CONVENTION kmp_init_lock_hinted(omp_lock_t *, kmp_lock_hint_t); + extern void __KAI_KMPC_CONVENTION kmp_init_nest_lock_hinted(omp_nest_lock_t *, kmp_lock_hint_t); + /* time API functions */ extern double __KAI_KMPC_CONVENTION omp_get_wtime (void); extern double __KAI_KMPC_CONVENTION omp_get_wtick (void); Index: runtime/src/include/40/omp_lib.h.var =================================================================== --- runtime/src/include/40/omp_lib.h.var +++ runtime/src/include/40/omp_lib.h.var @@ -28,6 +28,7 @@ integer, parameter :: kmp_pointer_kind = int_ptr_kind() integer, parameter :: kmp_size_t_kind = int_ptr_kind() integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() + integer, parameter :: kmp_lock_hint_kind = omp_integer_kind integer (kind=omp_integer_kind), parameter :: openmp_version = $OMP_VERSION integer (kind=omp_integer_kind), parameter :: kmp_version_major = $KMP_VERSION_MAJOR @@ -47,6 +48,13 @@ integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_close = 3 integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_spread = 4 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_none = 0 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_uncontended = 1 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_contended = 2 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_nonspeculative = 3 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_speculative = 4 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 5 + interface ! *** @@ -413,6 +421,18 @@ subroutine kmp_set_warnings_off() bind(c) end subroutine kmp_set_warnings_off + subroutine kmp_init_lock_hinted(lockvar, lockhint) bind(c) + import + integer (kind=omp_lock_kind) lockvar + integer (kind=kmp_lock_hint_kind), value :: lockhint + end subroutine kmp_init_lock_hinted + + subroutine kmp_init_nest_lock_hinted(lockvar, lockhint) bind(c) + import + integer (kind=omp_lock_kind) lockvar + integer (kind=kmp_lock_hint_kind), value :: lockhint + end subroutine kmp_init_nest_lock_hinted + end interface !DIR$ IF DEFINED (__INTEL_OFFLOAD) @@ -480,6 +500,8 @@ !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_free !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_on !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_warnings_off +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_init_lock_hinted +!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_init_nest_lock_hinted !DIR$ IF(__INTEL_COMPILER.GE.1400) !$omp declare target(omp_set_num_threads ) @@ -546,6 +568,8 @@ !$omp declare target(kmp_free ) !$omp declare target(kmp_set_warnings_on ) !$omp declare target(kmp_set_warnings_off ) +!$omp declare target(kmp_init_lock_hinted ) +!$omp declare target(kmp_init_nest_lock_hinted ) !DIR$ ENDIF !DIR$ ENDIF Index: runtime/src/include/40/omp_lib.f.var =================================================================== --- runtime/src/include/40/omp_lib.f.var +++ runtime/src/include/40/omp_lib.f.var @@ -31,6 +31,7 @@ integer, parameter :: kmp_size_t_kind = int_ptr_kind() integer, parameter :: kmp_affinity_mask_kind = int_ptr_kind() integer, parameter :: kmp_cancel_kind = omp_integer_kind + integer, parameter :: kmp_lock_hint_kind = omp_integer_kind end module omp_lib_kinds @@ -60,6 +61,13 @@ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_none = 0 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_uncontended = 1 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_contended = 2 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_nonspeculative = 3 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_speculative = 4 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 5 + interface ! *** @@ -436,6 +444,19 @@ integer (kind=kmp_cancel_kind) cancelkind logical (kind=omp_logical_kind) kmp_get_cancellation_status end function kmp_get_cancellation_status + + subroutine kmp_init_lock_hinted(lockvar, lockhint) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + integer (kind=kmp_lock_hint_kind) lockhint + end subroutine kmp_init_lock_hinted + + subroutine kmp_init_nest_lock_hinted(lockvar, lockhint) + use omp_lib_kinds + integer (kind=omp_nest_lock_kind) lockvar + integer (kind=kmp_lock_hint_kind) lockhint + end subroutine kmp_init_nest_lock_hinted + end interface !dec$ if defined(_WIN32) @@ -521,6 +542,9 @@ !dec$ attributes alias:'KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status +!dec$ attributes alias:'KMP_INIT_LOCK_HINTED'::kmp_init_lock_hinted +!dec$ attributes alias:'KMP_INIT_NEST_LOCK_HINTED'::kmp_init_nest_lock_hinted + !dec$ else !*** @@ -597,6 +621,9 @@ !dec$ attributes alias:'_KMP_GET_CANCELLATION_STATUS' :: kmp_get_cancellation_status +!dec$ attributes alias:'_KMP_INIT_LOCK_HINTED'::kmp_init_lock_hinted +!dec$ attributes alias:'_KMP_INIT_NEST_LOCK_HINTED'::kmp_init_nest_lock_hinted + !dec$ endif !dec$ endif @@ -675,6 +702,9 @@ !dec$ attributes alias:'kmp_set_warnings_off_'::kmp_set_warnings_off !dec$ attributes alias:'kmp_get_cancellation_status_'::kmp_get_cancellation_status +!dec$ attributes alias:'kmp_init_lock_hinted_'::kmp_init_lock_hinted +!dec$ attributes alias:'kmp_init_nest_lock_hinted_'::kmp_init_nest_lock_hinted + !dec$ endif !dec$ if defined(__APPLE__) @@ -751,6 +781,9 @@ !dec$ attributes alias:'_kmp_get_cancellation_status_'::kmp_get_cancellation_status +!dec$ attributes alias:'_kmp_init_lock_hinted_'::kmp_init_lock_hinted +!dec$ attributes alias:'_kmp_init_nest_lock_hinted_'::kmp_init_nest_lock_hinted + !dec$ endif end module omp_lib Index: runtime/src/include/40/omp_lib.f90.var =================================================================== --- runtime/src/include/40/omp_lib.f90.var +++ runtime/src/include/40/omp_lib.f90.var @@ -27,6 +27,7 @@ integer, parameter :: kmp_size_t_kind = c_size_t integer, parameter :: kmp_affinity_mask_kind = c_intptr_t integer, parameter :: kmp_cancel_kind = omp_integer_kind + integer, parameter :: kmp_lock_hint_kind = omp_integer_kind end module omp_lib_kinds @@ -58,6 +59,13 @@ integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_sections = 3 integer (kind=kmp_cancel_kind), parameter :: kmp_cancel_taskgroup = 4 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_none = 0 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_uncontended = 1 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_contended = 2 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_nonspeculative = 3 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_speculative = 4 + integer (kind=kmp_lock_hint_kind), parameter :: kmp_lock_hint_adaptive = 5 + interface ! *** @@ -438,6 +446,18 @@ logical (kind=omp_logical_kind) kmp_get_cancellation_status end function kmp_get_cancellation_status + subroutine kmp_init_lock_hinted(lockvar, lockhint) bind(c) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + integer (kind=kmp_lock_hint_kind), value :: lockhint + end subroutine kmp_init_lock_hinted + + subroutine kmp_init_nest_lock_hinted(lockvar, lockhint) bind(c) + use omp_lib_kinds + integer (kind=omp_lock_kind) lockvar + integer (kind=kmp_lock_hint_kind), value :: lockhint + end subroutine kmp_init_nest_lock_hinted + end interface end module omp_lib Index: runtime/src/kmp_csupport.c =================================================================== --- runtime/src/kmp_csupport.c +++ runtime/src/kmp_csupport.c @@ -667,10 +667,17 @@ status = 1; if ( __kmp_env_consistency_check ) { +#if KMP_USE_DYNAMIC_LOCK + if (status) + __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 ); + else + __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 ); +#else if (status) __kmp_push_sync( global_tid, ct_master, loc, NULL ); else __kmp_check_sync( global_tid, ct_master, loc, NULL ); +#endif } return status; @@ -764,6 +771,144 @@ __kmp_parallel_dxo( & gtid, & cid, loc ); } +#if KMP_USE_DYNAMIC_LOCK + +static __forceinline kmp_indirect_lock_t * +__kmp_get_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_dyna_lockseq_t seq) +{ + // Code from __kmp_get_critical_section_ptr + // This function returns an indirect lock object instead of a user lock. + kmp_indirect_lock_t **lck, *ret; + lck = (kmp_indirect_lock_t **)crit; + ret = (kmp_indirect_lock_t *)TCR_PTR(*lck); + if (ret == NULL) { + void *idx; + kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq); + kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); + ret = ilk; + DYNA_I_LOCK_FUNC(ilk, init)(ilk->lock); + DYNA_SET_I_LOCK_LOCATION(ilk, loc); + DYNA_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); + KA_TRACE(20, ("__kmp_get_indirect_csptr: initialized indirect lock #%d\n", tag)); +#if USE_ITT_BUILD + __kmp_itt_critical_creating(ilk->lock, loc); +#endif + int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk); + if (status == 0) { +#if USE_ITT_BUILD + __kmp_itt_critical_destroyed(ilk->lock); +#endif + // Postponing destroy, to avoid costly dispatch here. + //DYNA_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); + ret = (kmp_indirect_lock_t *)TCR_PTR(*lck); + KMP_DEBUG_ASSERT(ret != NULL); + } + } + return ret; +} + +// Fast-path acquire tas lock +#define DYNA_ACQUIRE_TAS_LOCK(lock, gtid) { \ + kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ + if (l->lk.poll != DYNA_LOCK_FREE(tas) || \ + ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE(l); \ + KMP_INIT_YIELD(spins); \ + if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + while (l->lk.poll != DYNA_LOCK_FREE(tas) || \ + ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas))) { \ + if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + } \ + } \ + KMP_FSYNC_ACQUIRED(l); \ +} + +// Fast-path test tas lock +#define DYNA_TEST_TAS_LOCK(lock, gtid, rc) { \ + kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ + rc = l->lk.poll == DYNA_LOCK_FREE(tas) && \ + KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas)); \ +} + +// Fast-path release tas lock +#define DYNA_RELEASE_TAS_LOCK(lock, gtid) { \ + TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, DYNA_LOCK_FREE(tas)); \ + KMP_MB(); \ +} + +#if DYNA_HAS_FUTEX + +# include +# include +# ifndef FUTEX_WAIT +# define FUTEX_WAIT 0 +# endif +# ifndef FUTEX_WAKE +# define FUTEX_WAKE 1 +# endif + +// Fast-path acquire futex lock +#define DYNA_ACQUIRE_FUTEX_LOCK(lock, gtid) { \ + kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ + kmp_int32 gtid_code = (gtid+1) << 1; \ + KMP_MB(); \ + KMP_FSYNC_PREPARE(ftx); \ + kmp_int32 poll_val; \ + while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), \ + DYNA_LOCK_BUSY(gtid_code, futex))) != DYNA_LOCK_FREE(futex)) { \ + kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; \ + if (!cond) { \ + if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex))) { \ + continue; \ + } \ + poll_val |= DYNA_LOCK_BUSY(1, futex); \ + } \ + kmp_int32 rc; \ + if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \ + continue; \ + } \ + gtid_code |= 1; \ + } \ + KMP_FSYNC_ACQUIRED(ftx); \ +} + +// Fast-path test futex lock +#define DYNA_TEST_FUTEX_LOCK(lock, gtid, rc) { \ + kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ + if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1)) { \ + KMP_FSYNC_ACQUIRED(ftx); \ + rc = TRUE; \ + } else { \ + rc = FALSE; \ + } \ +} + +// Fast-path release futex lock +#define DYNA_RELEASE_FUTEX_LOCK(lock, gtid) { \ + kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ + KMP_MB(); \ + KMP_FSYNC_RELEASING(ftx); \ + kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), DYNA_LOCK_FREE(futex)); \ + if (DYNA_LOCK_STRIP(poll_val) & 1) { \ + syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0); \ + } \ + KMP_MB(); \ + KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ +} + +#endif // DYNA_HAS_FUTEX + +#else // KMP_USE_DYNAMIC_LOCK + static kmp_user_lock_p __kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid ) { @@ -815,6 +960,8 @@ return lck; } +#endif // KMP_USE_DYNAMIC_LOCK + /*! @ingroup WORK_SHARING @param loc source location information. @@ -833,6 +980,47 @@ KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) ); +#if KMP_USE_DYNAMIC_LOCK + // Assumption: all direct locks fit in OMP_CRITICAL_SIZE. + // The global sequence __kmp_user_lock_seq is used unless compiler pushes a value. + if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) { + lck = (kmp_user_lock_p)crit; + // The thread that reaches here first needs to tag the lock word. + if (*((kmp_dyna_lock_t *)lck) == 0) { + KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq)); + } + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); + } +# if USE_ITT_BUILD + __kmp_itt_critical_acquiring(lck); +# endif +# if DYNA_USE_FAST_TAS + if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { + DYNA_ACQUIRE_TAS_LOCK(lck, global_tid); + } else +# elif DYNA_USE_FAST_FUTEX + if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { + DYNA_ACQUIRE_FUTEX_LOCK(lck, global_tid); + } else +# endif + { + DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid); + } + } else { + kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq); + lck = ilk->lock; + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); + } +# if USE_ITT_BUILD + __kmp_itt_critical_acquiring(lck); +# endif + DYNA_I_LOCK_FUNC(ilk, set)(lck, global_tid); + } + +#else // KMP_USE_DYNAMIC_LOCK + //TODO: add THR_OVHD_STATE KMP_CHECK_USER_LOCK_INIT(); @@ -864,9 +1052,10 @@ __kmp_itt_critical_acquiring( lck ); #endif /* USE_ITT_BUILD */ // Value of 'crit' should be good for using as a critical_id of the critical section directive. - __kmp_acquire_user_lock_with_checks( lck, global_tid ); +#endif // KMP_USE_DYNAMIC_LOCK + #if USE_ITT_BUILD __kmp_itt_critical_acquired( lck ); #endif /* USE_ITT_BUILD */ @@ -890,6 +1079,43 @@ KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid )); +#if KMP_USE_DYNAMIC_LOCK + if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) { + lck = (kmp_user_lock_p)crit; + KMP_ASSERT(lck != NULL); + if (__kmp_env_consistency_check) { + __kmp_pop_sync(global_tid, ct_critical, loc); + } +# if USE_ITT_BUILD + __kmp_itt_critical_releasing( lck ); +# endif +# if DYNA_USE_FAST_TAS + if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { + DYNA_RELEASE_TAS_LOCK(lck, global_tid); + } else +# elif DYNA_USE_FAST_FUTEX + if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { + DYNA_RELEASE_FUTEX_LOCK(lck, global_tid); + } else +# endif + { + DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); + } + } else { + kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); + KMP_ASSERT(ilk != NULL); + lck = ilk->lock; + if (__kmp_env_consistency_check) { + __kmp_pop_sync(global_tid, ct_critical, loc); + } +# if USE_ITT_BUILD + __kmp_itt_critical_releasing( lck ); +# endif + DYNA_I_LOCK_FUNC(ilk, unset)(lck, global_tid); + } + +#else // KMP_USE_DYNAMIC_LOCK + if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { lck = (kmp_user_lock_p)crit; @@ -913,9 +1139,10 @@ __kmp_itt_critical_releasing( lck ); #endif /* USE_ITT_BUILD */ // Value of 'crit' should be good for using as a critical_id of the critical section directive. - __kmp_release_user_lock_with_checks( lck, global_tid ); +#endif // KMP_USE_DYNAMIC_LOCK + KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid )); } @@ -1319,6 +1546,27 @@ /* initialize the lock */ void __kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_lock"); + } + if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) { + DYNA_INIT_D_LOCK(user_lock, __kmp_user_lock_seq); +# if USE_ITT_BUILD + __kmp_itt_lock_creating((kmp_user_lock_p)user_lock, NULL); +# endif + } else { + DYNA_INIT_I_LOCK(user_lock, __kmp_user_lock_seq); + kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock); + DYNA_SET_I_LOCK_LOCATION(ilk, loc); +# if USE_ITT_BUILD + __kmp_itt_lock_creating(ilk->lock, loc); +# endif + } + +#else // KMP_USE_DYNAMIC_LOCK + static char const * const func = "omp_init_lock"; kmp_user_lock_p lck; KMP_DEBUG_ASSERT( __kmp_init_serial ); @@ -1350,11 +1598,42 @@ #if USE_ITT_BUILD __kmp_itt_lock_creating( lck ); #endif /* USE_ITT_BUILD */ + +#endif // KMP_USE_DYNAMIC_LOCK } // __kmpc_init_lock /* initialize the lock */ void __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); + } + // Invoke init function after converting to nested version. + kmp_dyna_lockseq_t nested_seq; + switch (__kmp_user_lock_seq) { + case lockseq_tas: nested_seq = lockseq_nested_tas; break; +#if DYNA_HAS_FUTEX + case lockseq_futex: nested_seq = lockseq_nested_futex; break; +#endif + case lockseq_ticket: nested_seq = lockseq_nested_ticket; break; + case lockseq_queuing: nested_seq = lockseq_nested_queuing; break; + case lockseq_drdpa: nested_seq = lockseq_nested_drdpa; break; + default: nested_seq = lockseq_nested_queuing; break; + // Use nested queuing lock for lock kinds without "nested" implementation. + } + DYNA_INIT_I_LOCK(user_lock, nested_seq); + // All nested locks are indirect locks. + kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock); + DYNA_SET_I_LOCK_LOCATION(ilk, loc); +# if USE_ITT_BUILD + __kmp_itt_lock_creating(ilk->lock, loc); +# endif + +#else // KMP_USE_DYNAMIC_LOCK + static char const * const func = "omp_init_nest_lock"; kmp_user_lock_p lck; KMP_DEBUG_ASSERT( __kmp_init_serial ); @@ -1388,11 +1667,25 @@ #if USE_ITT_BUILD __kmp_itt_lock_creating( lck ); #endif /* USE_ITT_BUILD */ + +#endif // KMP_USE_DYNAMIC_LOCK } // __kmpc_init_nest_lock void __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK +# if USE_ITT_BUILD + kmp_user_lock_p lck; + if (DYNA_EXTRACT_D_TAG(user_lock) == 0) { + lck = ((kmp_indirect_lock_t *)DYNA_LOOKUP_I_LOCK(user_lock))->lock; + } else { + lck = (kmp_user_lock_p)user_lock; + } + __kmp_itt_lock_destroyed(lck); +# endif + DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); +#else kmp_user_lock_p lck; if ( ( __kmp_user_lock_kind == lk_tas ) @@ -1427,11 +1720,21 @@ else { __kmp_user_lock_free( user_lock, gtid, lck ); } +#endif // KMP_USE_DYNAMIC_LOCK } // __kmpc_destroy_lock /* destroy the lock */ void __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + +# if USE_ITT_BUILD + kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(user_lock); + __kmp_itt_lock_destroyed(ilk->lock); +# endif + DYNA_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); + +#else // KMP_USE_DYNAMIC_LOCK kmp_user_lock_p lck; @@ -1470,11 +1773,35 @@ else { __kmp_user_lock_free( user_lock, gtid, lck ); } +#endif // KMP_USE_DYNAMIC_LOCK } // __kmpc_destroy_nest_lock void __kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { KMP_COUNT_BLOCK(OMP_set_lock); +#if KMP_USE_DYNAMIC_LOCK + int tag = DYNA_EXTRACT_D_TAG(user_lock); +# if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object. +# endif +# if DYNA_USE_FAST_TAS + if (tag == locktag_tas && !__kmp_env_consistency_check) { + DYNA_ACQUIRE_TAS_LOCK(user_lock, gtid); + } else +# elif DYNA_USE_FAST_FUTEX + if (tag == locktag_futex && !__kmp_env_consistency_check) { + DYNA_ACQUIRE_FUTEX_LOCK(user_lock, gtid); + } else +# endif + { + __kmp_direct_set_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid); + } +# if USE_ITT_BUILD + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); +# endif + +#else // KMP_USE_DYNAMIC_LOCK + kmp_user_lock_p lck; if ( ( __kmp_user_lock_kind == lk_tas ) @@ -1500,11 +1827,23 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquired( lck ); #endif /* USE_ITT_BUILD */ -} +#endif // KMP_USE_DYNAMIC_LOCK +} void __kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + +# if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); +# endif + DYNA_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); +# if USE_ITT_BUILD + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); +#endif + +#else // KMP_USE_DYNAMIC_LOCK kmp_user_lock_p lck; if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) @@ -1531,11 +1870,33 @@ #if USE_ITT_BUILD __kmp_itt_lock_acquired( lck ); #endif /* USE_ITT_BUILD */ +#endif // KMP_USE_DYNAMIC_LOCK } void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + + int tag = DYNA_EXTRACT_D_TAG(user_lock); +# if USE_ITT_BUILD + __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); +# endif +# if DYNA_USE_FAST_TAS + if (tag == locktag_tas && !__kmp_env_consistency_check) { + DYNA_RELEASE_TAS_LOCK(user_lock, gtid); + } else +# elif DYNA_USE_FAST_FUTEX + if (tag == locktag_futex && !__kmp_env_consistency_check) { + DYNA_RELEASE_FUTEX_LOCK(user_lock, gtid); + } else +# endif + { + __kmp_direct_unset_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid); + } + +#else // KMP_USE_DYNAMIC_LOCK + kmp_user_lock_p lck; /* Can't use serial interval since not block structured */ @@ -1570,12 +1931,23 @@ #endif /* USE_ITT_BUILD */ RELEASE_LOCK( lck, gtid ); + +#endif // KMP_USE_DYNAMIC_LOCK } /* release the lock */ void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + +# if USE_ITT_BUILD + __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); +# endif + DYNA_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); + +#else // KMP_USE_DYNAMIC_LOCK + kmp_user_lock_p lck; /* Can't use serial interval since not block structured */ @@ -1613,6 +1985,8 @@ #endif /* USE_ITT_BUILD */ RELEASE_NESTED_LOCK( lck, gtid ); + +#endif // KMP_USE_DYNAMIC_LOCK } /* try to acquire the lock */ @@ -1621,6 +1995,39 @@ { KMP_COUNT_BLOCK(OMP_test_lock); KMP_TIME_BLOCK(OMP_test_lock); + +#if KMP_USE_DYNAMIC_LOCK + int rc; + int tag = DYNA_EXTRACT_D_TAG(user_lock); +# if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); +# endif +# if DYNA_USE_FAST_TAS + if (tag == locktag_tas && !__kmp_env_consistency_check) { + DYNA_TEST_TAS_LOCK(user_lock, gtid, rc); + } else +# elif DYNA_USE_FAST_FUTEX + if (tag == locktag_futex && !__kmp_env_consistency_check) { + DYNA_TEST_FUTEX_LOCK(user_lock, gtid, rc); + } else +# endif + { + rc = __kmp_direct_test_ops[tag]((kmp_dyna_lock_t *)user_lock, gtid); + } + if (rc) { +# if USE_ITT_BUILD + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); +# endif + return FTN_TRUE; + } else { +# if USE_ITT_BUILD + __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); +# endif + return FTN_FALSE; + } + +#else // KMP_USE_DYNAMIC_LOCK + kmp_user_lock_p lck; int rc; @@ -1653,12 +2060,31 @@ return ( rc ? FTN_TRUE : FTN_FALSE ); /* Can't use serial interval since not block structured */ + +#endif // KMP_USE_DYNAMIC_LOCK } /* try to acquire the lock */ int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) { +#if KMP_USE_DYNAMIC_LOCK + int rc; +# if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); +# endif + rc = DYNA_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); +# if USE_ITT_BUILD + if (rc) { + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); + } else { + __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); + } +# endif + return rc; + +#else // KMP_USE_DYNAMIC_LOCK + kmp_user_lock_p lck; int rc; @@ -1692,6 +2118,8 @@ return rc; /* Can't use serial interval since not block structured */ + +#endif // KMP_USE_DYNAMIC_LOCK } @@ -1723,6 +2151,29 @@ // should we keep it visible in new reduce block? kmp_user_lock_p lck; +#if KMP_USE_DYNAMIC_LOCK + + if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) { + lck = (kmp_user_lock_p)crit; + if (*((kmp_dyna_lock_t *)lck) == 0) { + KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)lck, 0, DYNA_GET_D_TAG(__kmp_user_lock_seq)); + } + KMP_DEBUG_ASSERT(lck != NULL); + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); + } + DYNA_D_LOCK_FUNC(lck, set)((kmp_dyna_lock_t *)lck, global_tid); + } else { + kmp_indirect_lock_t *ilk = __kmp_get_indirect_csptr(crit, loc, global_tid, __kmp_user_lock_seq); + KMP_DEBUG_ASSERT(ilk != NULL); + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, ilk->lock, __kmp_user_lock_seq); + } + DYNA_I_LOCK_FUNC(ilk, set)(ilk->lock, global_tid); + } + +#else // KMP_USE_DYNAMIC_LOCK + // We know that the fast reduction code is only emitted by Intel compilers // with 32 byte critical sections. If there isn't enough space, then we // have to use a pointer. @@ -1738,6 +2189,8 @@ __kmp_push_sync( global_tid, ct_critical, loc, lck ); __kmp_acquire_user_lock_with_checks( lck, global_tid ); + +#endif // KMP_USE_DYNAMIC_LOCK } // used in a critical section reduce block @@ -1746,6 +2199,22 @@ kmp_user_lock_p lck; +#if KMP_USE_DYNAMIC_LOCK + + if (DYNA_IS_D_LOCK(__kmp_user_lock_seq)) { + lck = (kmp_user_lock_p)crit; + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_critical, loc); + DYNA_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); + } else { + kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_critical, loc); + DYNA_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); + } + +#else // KMP_USE_DYNAMIC_LOCK + // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical // sections. If there isn't enough space, then we have to use a pointer. if ( __kmp_base_user_lock_size > 32 ) { @@ -1760,6 +2229,7 @@ __kmp_release_user_lock_with_checks( lck, global_tid ); +#endif // KMP_USE_DYNAMIC_LOCK } // __kmp_end_critical_section_reduce_block @@ -1802,8 +2272,13 @@ __kmp_parallel_initialize(); // check correctness of reduce block nesting +#if KMP_USE_DYNAMIC_LOCK + if ( __kmp_env_consistency_check ) + __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); +#else if ( __kmp_env_consistency_check ) __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); +#endif #if OMP_40_ENABLED th = __kmp_thread_from_gtid(global_tid); @@ -1991,8 +2466,13 @@ __kmp_parallel_initialize(); // check correctness of reduce block nesting +#if KMP_USE_DYNAMIC_LOCK + if ( __kmp_env_consistency_check ) + __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); +#else if ( __kmp_env_consistency_check ) __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); +#endif packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck ); __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method ); Index: runtime/src/kmp_dispatch.cpp =================================================================== --- runtime/src/kmp_dispatch.cpp +++ runtime/src/kmp_dispatch.cpp @@ -355,7 +355,11 @@ th = __kmp_threads[*gtid_ref]; if ( th -> th.th_root -> r.r_active && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) { +#if KMP_USE_DYNAMIC_LOCK + __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 ); +#else __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL ); +#endif } } } @@ -377,7 +381,11 @@ pr = reinterpret_cast< dispatch_private_info_template< UT >* > ( th -> th.th_dispatch -> th_dispatch_pr_current ); if ( pr -> pushed_ws != ct_none ) { +#if KMP_USE_DYNAMIC_LOCK + __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 ); +#else __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL ); +#endif } } Index: runtime/src/kmp_error.h =================================================================== --- runtime/src/kmp_error.h +++ runtime/src/kmp_error.h @@ -31,10 +31,18 @@ void __kmp_push_parallel( int gtid, ident_t const * ident ); void __kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident ); +#if KMP_USE_DYNAMIC_LOCK +void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); +#else void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); +#endif void __kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident ); +#if KMP_USE_DYNAMIC_LOCK +void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); +#else void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); +#endif void __kmp_pop_parallel( int gtid, ident_t const * ident ); enum cons_type __kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident ); Index: runtime/src/kmp_error.c =================================================================== --- runtime/src/kmp_error.c +++ runtime/src/kmp_error.c @@ -287,7 +287,11 @@ } void +#if KMP_USE_DYNAMIC_LOCK +__kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) +#else __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) +#endif { struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; @@ -345,7 +349,11 @@ } } } else if ( ct == ct_critical ) { +#if KMP_USE_DYNAMIC_LOCK + if ( lck != NULL && __kmp_get_user_lock_owner( lck, seq ) == gtid ) { /* this same thread already has lock for this critical section */ +#else if ( lck != NULL && __kmp_get_user_lock_owner( lck ) == gtid ) { /* this same thread already has lock for this critical section */ +#endif int index = p->s_top; struct cons_data cons = { NULL, ct_critical, 0, NULL }; /* walk up construct stack and try to find critical with matching name */ @@ -380,14 +388,22 @@ } void +#if KMP_USE_DYNAMIC_LOCK +__kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck, kmp_uint32 seq ) +#else __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) +#endif { int tos; struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; KMP_ASSERT( gtid == __kmp_get_gtid() ); KE_TRACE( 10, ("__kmp_push_sync (gtid=%d)\n", gtid ) ); +#if KMP_USE_DYNAMIC_LOCK + __kmp_check_sync( gtid, ct, ident, lck, seq ); +#else __kmp_check_sync( gtid, ct, ident, lck ); +#endif KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) ); tos = ++ p->stack_top; p->stack_data[ tos ].type = ct; Index: runtime/src/kmp_ftn_entry.h =================================================================== --- runtime/src/kmp_ftn_entry.h +++ runtime/src/kmp_ftn_entry.h @@ -802,6 +802,28 @@ typedef enum { UNINIT = -1, UNLOCKED, LOCKED } kmp_stub_lock_t; #endif /* KMP_STUB */ +#if KMP_USE_DYNAMIC_LOCK +void FTN_STDCALL +FTN_INIT_LOCK_HINTED( void **user_lock, int KMP_DEREF hint ) +{ + #ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; + #else + __kmp_init_lock_hinted( user_lock, KMP_DEREF hint ); + #endif +} + +void FTN_STDCALL +FTN_INIT_NEST_LOCK_HINTED( void **user_lock, int KMP_DEREF hint ) +{ + #ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; + #else + __kmp_init_nest_lock_hinted( user_lock, KMP_DEREF hint ); + #endif +} +#endif + /* initialize the lock */ void FTN_STDCALL xexpand(FTN_INIT_LOCK)( void **user_lock ) Index: runtime/src/kmp_ftn_os.h =================================================================== --- runtime/src/kmp_ftn_os.h +++ runtime/src/kmp_ftn_os.h @@ -79,6 +79,10 @@ #define FTN_GET_TEAM_NUM omp_get_team_num #endif #define FTN_INIT_LOCK omp_init_lock +#if KMP_USE_DYNAMIC_LOCK + #define FTN_INIT_LOCK_HINTED kmp_init_lock_hinted + #define FTN_INIT_NEST_LOCK_HINTED kmp_init_nest_lock_hinted +#endif #define FTN_DESTROY_LOCK omp_destroy_lock #define FTN_SET_LOCK omp_set_lock #define FTN_UNSET_LOCK omp_unset_lock @@ -171,6 +175,10 @@ #define FTN_GET_TEAM_NUM omp_get_team_num_ #endif #define FTN_INIT_LOCK omp_init_lock_ +#if KMP_USE_DYNAMIC_LOCK + #define FTN_INIT_LOCK_HINTED kmp_init_lock_hinted_ + #define FTN_INIT_NEST_LOCK_HINTED kmp_init_nest_lock_hinted_ +#endif #define FTN_DESTROY_LOCK omp_destroy_lock_ #define FTN_SET_LOCK omp_set_lock_ #define FTN_UNSET_LOCK omp_unset_lock_ @@ -264,6 +272,10 @@ #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM #endif #define FTN_INIT_LOCK OMP_INIT_LOCK +#if KMP_USE_DYNAMIC_LOCK + #define FTN_INIT_LOCK_HINTED KMP_INIT_LOCK_HINTED + #define FTN_INIT_NEST_LOCK_HINTED KMP_INIT_NEST_LOCK_HINTED +#endif #define FTN_DESTROY_LOCK OMP_DESTROY_LOCK #define FTN_SET_LOCK OMP_SET_LOCK #define FTN_UNSET_LOCK OMP_UNSET_LOCK @@ -357,6 +369,10 @@ #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM_ #endif #define FTN_INIT_LOCK OMP_INIT_LOCK_ +#if KMP_USE_DYNAMIC_LOCK + #define FTN_INIT_LOCK_HINTED KMP_INIT_LOCK_HINTED_ + #define FTN_INIT_NEST_LOCK_HINTED KMP_INIT_NEST_LOCK_HINTED_ +#endif #define FTN_DESTROY_LOCK OMP_DESTROY_LOCK_ #define FTN_SET_LOCK OMP_SET_LOCK_ #define FTN_UNSET_LOCK OMP_UNSET_LOCK_ Index: runtime/src/kmp_itt.h =================================================================== --- runtime/src/kmp_itt.h +++ runtime/src/kmp_itt.h @@ -84,7 +84,11 @@ __kmp_inline void __kmp_itt_task_finished( void * object ); // --- Lock reporting --- +#if KMP_USE_DYNAMIC_LOCK +__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * ); +#else __kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock ); +#endif __kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock ); @@ -92,7 +96,11 @@ __kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock ); // --- Critical reporting --- +#if KMP_USE_DYNAMIC_LOCK +__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * ); +#else __kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock ); +#endif __kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock ); __kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock ); Index: runtime/src/kmp_itt.inl =================================================================== --- runtime/src/kmp_itt.inl +++ runtime/src/kmp_itt.inl @@ -734,6 +734,21 @@ // ------------------------------------------------------------------------------------------------- +#if KMP_USE_DYNAMIC_LOCK +// Takes location information directly +__kmp_inline +void +___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) { +#if USE_ITT_NOTIFY + if ( __itt_sync_create_ptr ) { + char const * src = ( loc == NULL ? NULL : loc->psource ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create( lock, type, src, 0 ); + KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); + } +#endif +} +#else // KMP_USE_DYNAMIC_LOCK // Internal guts -- common code for locks and critical sections, do not call directly. __kmp_inline void @@ -750,6 +765,7 @@ }; // if #endif } // ___kmp_itt_lock_init +#endif // KMP_USE_DYNAMIC_LOCK // Internal guts -- common code for locks and critical sections, do not call directly. __kmp_inline @@ -765,29 +781,82 @@ // ------------------------------------------------------------------------------------------------- +#if KMP_USE_DYNAMIC_LOCK +void +__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) { + ___kmp_itt_lock_init( lock, "OMP Lock", loc ); +} +#else void __kmp_itt_lock_creating( kmp_user_lock_p lock ) { ___kmp_itt_lock_init( lock, "OMP Lock" ); } // __kmp_itt_lock_creating +#endif void __kmp_itt_lock_acquiring( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + // postpone lock object access + if ( __itt_sync_prepare_ptr ) { + if ( DYNA_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); + __itt_sync_prepare( ilk->lock ); + } else { + __itt_sync_prepare( lock ); + } + } +#else __itt_sync_prepare( lock ); +#endif } // __kmp_itt_lock_acquiring void __kmp_itt_lock_acquired( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + // postpone lock object access + if ( __itt_sync_acquired_ptr ) { + if ( DYNA_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); + __itt_sync_acquired( ilk->lock ); + } else { + __itt_sync_acquired( lock ); + } + } +#else __itt_sync_acquired( lock ); +#endif } // __kmp_itt_lock_acquired void __kmp_itt_lock_releasing( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + if ( __itt_sync_releasing_ptr ) { + if ( DYNA_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); + __itt_sync_releasing( ilk->lock ); + } else { + __itt_sync_releasing( lock ); + } + } +#else __itt_sync_releasing( lock ); +#endif } // __kmp_itt_lock_releasing void __kmp_itt_lock_cancelled( kmp_user_lock_p lock ) { +#if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY + if ( __itt_sync_cancel_ptr ) { + if ( DYNA_EXTRACT_D_TAG(lock) == 0 ) { + kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); + __itt_sync_cancel( ilk->lock ); + } else { + __itt_sync_cancel( lock ); + } + } +#else __itt_sync_cancel( lock ); +#endif } // __kmp_itt_lock_cancelled void @@ -802,11 +871,17 @@ Critical sections are treated exactly as locks (but have different object type). ------------------------------------------------------------------------------------------------ */ - +#if KMP_USE_DYNAMIC_LOCK +void +__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) { + ___kmp_itt_lock_init( lock, "OMP Critical", loc); +} +#else void __kmp_itt_critical_creating( kmp_user_lock_p lock ) { ___kmp_itt_lock_init( lock, "OMP Critical" ); } // __kmp_itt_critical_creating +#endif void __kmp_itt_critical_acquiring( kmp_user_lock_p lock ) { Index: runtime/src/kmp_lock.h =================================================================== --- runtime/src/kmp_lock.h +++ runtime/src/kmp_lock.h @@ -619,6 +619,8 @@ typedef union kmp_user_lock *kmp_user_lock_p; +#if ! KMP_USE_DYNAMIC_LOCK + extern size_t __kmp_base_user_lock_size; extern size_t __kmp_user_lock_size; @@ -1015,9 +1017,220 @@ } \ } +#endif // KMP_USE_DYNAMIC_LOCK + #undef KMP_PAD #undef KMP_GTID_DNE +#if KMP_USE_DYNAMIC_LOCK + +#define DYNA_HAS_FUTEX (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) +#define DYNA_HAS_HLE (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC) +#define DYNA_USE_FAST_FUTEX 0 && DYNA_HAS_FUTEX +#define DYNA_USE_FAST_TAS 1 && DYNA_HAS_FUTEX + +// List of lock definitions; all nested locks are indirect locks. +// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. +// All nested locks are indirect lock types. +#if DYNA_HAS_FUTEX +# if DYNA_HAS_HLE +# define FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) +# define DYNA_LAST_D_LOCK_SEQ lockseq_hle +# else +# define FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) +# define DYNA_LAST_D_LOCK_SEQ lockseq_futex +# endif // DYNA_HAS_HLE +# if KMP_USE_ADAPTIVE_LOCKS +# define FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) \ + m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# else +# define FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ + m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# endif // KMP_USE_ADAPTIVE_LOCKS +#else +# if DYNA_HAS_HLE +# define FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) +# define DYNA_LAST_D_LOCK_SEQ lockseq_hle +# else +# define FOREACH_D_LOCK(m, a) m(tas, a) +# define DYNA_LAST_D_LOCK_SEQ lockseq_tas +# endif // DYNA_HAS_HLE +# if KMP_USE_ADAPTIVE_LOCKS +# define FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) \ + m(nested_tas, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# else +# define FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ + m(nested_tas, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +# endif // KMP_USE_ADAPTIVE_LOCKS +#endif // DYNA_HAS_FUTEX + +// Information used in dynamic dispatch +#define DYNA_LOCK_VALUE_SHIFT 8 +#define DYNA_LOCK_TYPE_MASK ((1<= lockseq_tas && seq <= DYNA_LAST_D_LOCK_SEQ) +#define DYNA_IS_I_LOCK(seq) (seq >= lockseq_ticket && seq <= lockseq_nested_drdpa) +#define DYNA_GET_I_TAG(seq) (kmp_indirect_locktag_t)(seq - lockseq_ticket) +#define DYNA_GET_D_TAG(seq) (seq<<1 | 1) + +// Enumerates direct lock tags starting from indirect tag. +typedef enum { +#define expand_tag(l,a) locktag_##l = DYNA_GET_D_TAG(lockseq_##l), + FOREACH_D_LOCK(expand_tag, 0) +#undef expand_tag +} kmp_direct_locktag_t; + +// Indirect lock type +typedef struct { + kmp_user_lock_p lock; + kmp_indirect_locktag_t type; +} kmp_indirect_lock_t; + +// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking. +extern void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); +extern void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *); +extern void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32); +extern void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32); + +// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking. +extern void (*__kmp_indirect_init_ops[])(kmp_user_lock_p); +extern void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p); +extern void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32); +extern void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32); + +// Extracts direct lock tag from a user lock pointer +#define DYNA_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & DYNA_LOCK_TYPE_MASK & -(*((kmp_dyna_lock_t *)(l)) & 1)) + +// Extracts indirect lock index from a user lock pointer +#define DYNA_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) + +// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type). +#define DYNA_D_LOCK_FUNC(l, op) __kmp_direct_##op##_ops[DYNA_EXTRACT_D_TAG(l)] + +// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type). +#define DYNA_I_LOCK_FUNC(l, op) __kmp_indirect_##op##_ops[((kmp_indirect_lock_t *)(l))->type] + +// Initializes a direct lock with the given lock pointer and lock sequence. +#define DYNA_INIT_D_LOCK(l, seq) __kmp_direct_init_ops[DYNA_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) + +// Initializes an indirect lock with the given lock pointer and lock sequence. +#define DYNA_INIT_I_LOCK(l, seq) __kmp_direct_init_ops[0]((kmp_dyna_lock_t *)(l), seq) + +// Returns "free" lock value for the given lock type. +#define DYNA_LOCK_FREE(type) (locktag_##type) + +// Returns "busy" lock value for the given lock teyp. +#define DYNA_LOCK_BUSY(v, type) ((v)<>DYNA_LOCK_VALUE_SHIFT) + +// Updates __kmp_user_lock_seq with the give lock type. +#define DYNA_STORE_LOCK_SEQ(type) (__kmp_user_lock_seq = lockseq_##type) + +// Internal entries for hinted lock initializers. +extern void __kmp_init_lock_hinted(void **, int); +extern void __kmp_init_nest_lock_hinted(void **, int); + +// Initializes global states and data structures for managing dynamic user locks. +extern void __kmp_init_dynamic_user_locks(); + +// Allocates and returns an indirect lock with the given indirect lock tag. +extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); + +// Cleans up global states and data structures for managing dynamic user locks. +extern void __kmp_cleanup_indirect_user_locks(); + +// Default user lock sequence when not using hinted locks. +extern kmp_dyna_lockseq_t __kmp_user_lock_seq; + +// Jump table for "set lock location", available only for indirect locks. +extern void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *); +#define DYNA_SET_I_LOCK_LOCATION(lck, loc) { \ + if (__kmp_indirect_set_location[(lck)->type] != NULL) \ + __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ +} + +// Jump table for "set lock flags", available only for indirect locks. +extern void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t); +#define DYNA_SET_I_LOCK_FLAGS(lck, flag) { \ + if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ + __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ +} + +// Jump table for "get lock location", available only for indirect locks. +extern const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p); +#define DYNA_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \ + ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ + : NULL ) + +// Jump table for "get lock flags", available only for indirect locks. +extern kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p); +#define DYNA_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \ + ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ + : NULL ) + +// +// Lock table for indirect locks. +// +// Simple linear structure is used to keep pointers to allocated indirect locks. +extern kmp_indirect_lock_t **__kmp_indirect_lock_table; +// Current size of the lock table; it may increase but never shrink. +extern kmp_lock_index_t __kmp_indirect_lock_table_size; +// Next index to be used for a new indirect lock (= number of indirect locks allocated). +extern kmp_lock_index_t __kmp_indirect_lock_table_next; +// Number of locks in a lock block, which is fixed to "1" now. +// TODO: No lock block implementation now. If we do support, we need to manage lock block data +// structure for each indirect lock type. +extern int __kmp_num_locks_in_block; + +// Fast lock table lookup without consistency checking +#define DYNA_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \ + ? __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(l)] \ + : *((kmp_indirect_lock_t **)l) ) + +// Used once in kmp_error.c +extern kmp_int32 +__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); + +#else // KMP_USE_DYNAMIC_LOCK + +# define DYNA_LOCK_BUSY(v, type) (v) +# define DYNA_LOCK_FREE(type) 0 +# define DYNA_LOCK_STRIP(v) (v) +# define DYNA_STORE_LOCK_SEQ(seq) + +#endif // KMP_USE_DYNAMIC_LOCK + #ifdef __cplusplus } // extern "C" #endif // __cplusplus Index: runtime/src/kmp_lock.cpp =================================================================== --- runtime/src/kmp_lock.cpp +++ runtime/src/kmp_lock.cpp @@ -75,7 +75,7 @@ static kmp_int32 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) { - return TCR_4( lck->lk.poll ) - 1; + return DYNA_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; } static inline bool @@ -96,8 +96,8 @@ /* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ - if ( ( lck->lk.poll == 0 ) - && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) { + if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) + && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { KMP_FSYNC_ACQUIRED(lck); return; } @@ -113,8 +113,8 @@ KMP_YIELD_SPIN( spins ); } - while ( ( lck->lk.poll != 0 ) || - ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) ) { + while ( ( lck->lk.poll != DYNA_LOCK_FREE(tas) ) || + ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) ) { // // FIXME - use exponential backoff here // @@ -152,8 +152,8 @@ int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) { - if ( ( lck->lk.poll == 0 ) - && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) { + if ( ( lck->lk.poll == DYNA_LOCK_FREE(tas) ) + && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(tas), DYNA_LOCK_BUSY(gtid+1, tas) ) ) { KMP_FSYNC_ACQUIRED( lck ); return TRUE; } @@ -177,8 +177,7 @@ KMP_MB(); /* Flush all pending memory write invalidates. */ KMP_FSYNC_RELEASING(lck); - KMP_ST_REL32( &(lck->lk.poll), 0 ); - + KMP_ST_REL32( &(lck->lk.poll), DYNA_LOCK_FREE(tas) ); KMP_MB(); /* Flush all pending memory write invalidates. */ KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : @@ -207,7 +206,7 @@ void __kmp_init_tas_lock( kmp_tas_lock_t * lck ) { - TCW_4( lck->lk.poll, 0 ); + TCW_4( lck->lk.poll, DYNA_LOCK_FREE(tas) ); } static void @@ -370,7 +369,7 @@ static kmp_int32 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) { - return ( TCR_4( lck->lk.poll ) >> 1 ) - 1; + return DYNA_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; } static inline bool @@ -398,9 +397,11 @@ lck, lck->lk.poll, gtid ) ); kmp_int32 poll_val; - while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), 0, - gtid_code ) ) != 0 ) { - kmp_int32 cond = poll_val & 1; + + while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), + DYNA_LOCK_BUSY(gtid_code, futex) ) ) != DYNA_LOCK_FREE(futex) ) { + + kmp_int32 cond = DYNA_LOCK_STRIP(poll_val) & 1; KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", lck, gtid, poll_val, cond ) ); @@ -417,13 +418,12 @@ // Try to set the lsb in the poll to indicate to the owner // thread that they need to wake this thread up. // - if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), - poll_val, poll_val | 1 ) ) { + if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | DYNA_LOCK_BUSY(1, futex) ) ) { KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", lck, lck->lk.poll, gtid ) ); continue; } - poll_val |= 1; + poll_val |= DYNA_LOCK_BUSY(1, futex); KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, lck->lk.poll, gtid ) ); @@ -479,7 +479,7 @@ int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) { - if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) { + if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex), DYNA_LOCK_BUSY(gtid+1, futex) << 1 ) ) { KMP_FSYNC_ACQUIRED( lck ); return TRUE; } @@ -507,15 +507,15 @@ KMP_FSYNC_RELEASING(lck); - kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), 0 ); + kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), DYNA_LOCK_FREE(futex) ); KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", lck, gtid, poll_val ) ); - if ( poll_val & 1 ) { + if ( DYNA_LOCK_STRIP(poll_val) & 1 ) { KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", lck, gtid ) ); - syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 ); + syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, DYNA_LOCK_BUSY(1, futex), NULL, NULL, 0 ); } KMP_MB(); /* Flush all pending memory write invalidates. */ @@ -549,7 +549,7 @@ void __kmp_init_futex_lock( kmp_futex_lock_t * lck ) { - TCW_4( lck->lk.poll, 0 ); + TCW_4( lck->lk.poll, DYNA_LOCK_FREE(futex) ); } static void @@ -2933,6 +2933,576 @@ lck->lk.flags = flags; } +#if KMP_USE_DYNAMIC_LOCK + +// Definitions of lock hints. +# ifndef __OMP_H +typedef enum kmp_lock_hint_t { + kmp_lock_hint_none = 0, + kmp_lock_hint_contended, + kmp_lock_hint_uncontended, + kmp_lock_hint_nonspeculative, + kmp_lock_hint_speculative, + kmp_lock_hint_adaptive, +} kmp_lock_hint_t; +# endif + +// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. +#define expand_init_lock(l, a) \ +static void init_##l##_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) { \ + *lck = DYNA_LOCK_FREE(l); \ + KA_TRACE(20, ("Initialized direct lock, tag = %x\n", *lck)); \ +} +FOREACH_D_LOCK(expand_init_lock, 0) +#undef expand_init_lock + +#if DYNA_HAS_HLE + +// HLE lock functions - imported from the testbed runtime. +#if KMP_MIC +# define machine_pause() _mm_delay_32(10) // TODO: find the right argument +#else +# define machine_pause() _mm_pause() +#endif +#define HLE_ACQUIRE ".byte 0xf2;" +#define HLE_RELEASE ".byte 0xf3;" + +static inline kmp_uint32 +swap4(kmp_uint32 volatile *p, kmp_uint32 v) +{ + __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" + : "+r"(v), "+m"(*p) + : + : "memory"); + return v; +} + +static void +__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) +{ + *lck = 0; +} + +static void +__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + // Use gtid for DYNA_LOCK_BUSY if necessary + if (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)) { + int delay = 1; + do { + while (*(kmp_uint32 volatile *)lck != DYNA_LOCK_FREE(hle)) { + for (int i = delay; i != 0; --i) + machine_pause(); + delay = ((delay << 1) | 1) & 7; + } + } while (swap4(lck, DYNA_LOCK_BUSY(1, hle)) != DYNA_LOCK_FREE(hle)); + } +} + +static void +__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks +} + +static void +__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + __asm__ volatile(HLE_RELEASE "movl %1,%0" + : "=m"(*lck) + : "r"(DYNA_LOCK_FREE(hle)) + : "memory"); +} + +static void +__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + __kmp_release_hle_lock(lck, gtid); // TODO: add checks +} + +static int +__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + return swap4(lck, DYNA_LOCK_BUSY(1, hle)) == DYNA_LOCK_FREE(hle); +} + +static int +__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) +{ + return __kmp_test_hle_lock(lck, gtid); // TODO: add checks +} + +#endif // DYNA_HAS_HLE + +// Entry functions for indirect locks (first element of direct_*_ops[]). +static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); +static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); +static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); +static void __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); +static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); +static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); +static void __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); +static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); + +// +// Jump tables for the indirect lock functions. +// Only fill in the odd entries, that avoids the need to shift out the low bit. +// +#define expand_func0(l, op) 0,op##_##l##_##lock, +void (*__kmp_direct_init_ops[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) + = { __kmp_init_indirect_lock, 0, FOREACH_D_LOCK(expand_func0, init) }; + +#define expand_func1(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_##lock, +void (*__kmp_direct_destroy_ops[])(kmp_dyna_lock_t *) + = { __kmp_destroy_indirect_lock, 0, FOREACH_D_LOCK(expand_func1, destroy) }; + +// Differentiates *lock and *lock_with_checks. +#define expand_func2(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, +#define expand_func2c(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, +static void (*direct_set_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) + = { { __kmp_set_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, acquire) }, + { __kmp_set_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, acquire) } }; +static void (*direct_unset_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) + = { { __kmp_unset_indirect_lock, 0, FOREACH_D_LOCK(expand_func2, release) }, + { __kmp_unset_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func2c, release) } }; + +#define expand_func3(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock, +#define expand_func3c(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, +static int (*direct_test_tab[][DYNA_NUM_D_LOCKS*2+2])(kmp_dyna_lock_t *, kmp_int32) + = { { __kmp_test_indirect_lock, 0, FOREACH_D_LOCK(expand_func3, test) }, + { __kmp_test_indirect_lock_with_checks, 0, FOREACH_D_LOCK(expand_func3c, test) } }; + +// Exposes only one set of jump tables (*lock or *lock_with_checks). +void (*(*__kmp_direct_set_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; +void (*(*__kmp_direct_unset_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; +int (*(*__kmp_direct_test_ops))(kmp_dyna_lock_t *, kmp_int32) = 0; + +// +// Jump tables for the indirect lock functions. +// +#define expand_func4(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, +void (*__kmp_indirect_init_ops[])(kmp_user_lock_p) + = { FOREACH_I_LOCK(expand_func4, init) }; +void (*__kmp_indirect_destroy_ops[])(kmp_user_lock_p) + = { FOREACH_I_LOCK(expand_func4, destroy) }; + +// Differentiates *lock and *lock_with_checks. +#define expand_func5(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, +#define expand_func5c(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, +static void (*indirect_set_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) + = { { FOREACH_I_LOCK(expand_func5, acquire) }, + { FOREACH_I_LOCK(expand_func5c, acquire) } }; +static void (*indirect_unset_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) + = { { FOREACH_I_LOCK(expand_func5, release) }, + { FOREACH_I_LOCK(expand_func5c, release) } }; + +#define expand_func6(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, +#define expand_func6c(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, +static int (*indirect_test_tab[][DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_int32) + = { { FOREACH_I_LOCK(expand_func6, test) }, + { FOREACH_I_LOCK(expand_func6c, test) } }; + +// Exposes only one set of jump tables (*lock or *lock_with_checks). +void (*(*__kmp_indirect_set_ops))(kmp_user_lock_p, kmp_int32) = 0; +void (*(*__kmp_indirect_unset_ops))(kmp_user_lock_p, kmp_int32) = 0; +int (*(*__kmp_indirect_test_ops))(kmp_user_lock_p, kmp_int32) = 0; + +// Lock index table. +kmp_indirect_lock_t **__kmp_indirect_lock_table; +kmp_lock_index_t __kmp_indirect_lock_table_size; +kmp_lock_index_t __kmp_indirect_lock_table_next; + +// Size of indirect locks. +static kmp_uint32 __kmp_indirect_lock_size[DYNA_NUM_I_LOCKS] = { + sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), +#if KMP_USE_ADAPTIVE_LOCKS + sizeof(kmp_adaptive_lock_t), +#endif + sizeof(kmp_drdpa_lock_t), + sizeof(kmp_tas_lock_t), +#if DYNA_HAS_FUTEX + sizeof(kmp_futex_lock_t), +#endif + sizeof(kmp_ticket_lock_t), sizeof(kmp_queuing_lock_t), + sizeof(kmp_drdpa_lock_t) +}; + +// Jump tables for lock accessor/modifier. +void (*__kmp_indirect_set_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; +void (*__kmp_indirect_set_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; +const ident_t * (*__kmp_indirect_get_location[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; +kmp_lock_flags_t (*__kmp_indirect_get_flags[DYNA_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; + +// Use different lock pools for different lock types. +static kmp_indirect_lock_t * __kmp_indirect_lock_pool[DYNA_NUM_I_LOCKS] = { 0 }; + +// Inserts the given lock ptr to the lock table. +kmp_lock_index_t +__kmp_insert_indirect_lock(kmp_indirect_lock_t *lck) +{ + kmp_lock_index_t next = __kmp_indirect_lock_table_next; + // Check capacity and double the size if required + if (next >= __kmp_indirect_lock_table_size) { + kmp_lock_index_t i; + kmp_lock_index_t size = __kmp_indirect_lock_table_size; + kmp_indirect_lock_t **old_table = __kmp_indirect_lock_table; + __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(2*next*sizeof(kmp_indirect_lock_t *)); + memcpy(__kmp_indirect_lock_table, old_table, next*sizeof(kmp_indirect_lock_t *)); + __kmp_free(old_table); + __kmp_indirect_lock_table_size = 2*next; + } + // Insert lck to the table and return the index. + __kmp_indirect_lock_table[next] = lck; + __kmp_indirect_lock_table_next++; + return next; +} + +// User lock allocator for dynamically dispatched locks. +kmp_indirect_lock_t * +__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) +{ + kmp_indirect_lock_t *lck; + kmp_lock_index_t idx; + + __kmp_acquire_lock(&__kmp_global_lock, gtid); + + if (__kmp_indirect_lock_pool[tag] != NULL) { + lck = __kmp_indirect_lock_pool[tag]; + if (OMP_LOCK_T_SIZE < sizeof(void *)) + idx = lck->lock->pool.index; + __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; + } else { + lck = (kmp_indirect_lock_t *)__kmp_allocate(sizeof(kmp_indirect_lock_t)); + lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); + if (OMP_LOCK_T_SIZE < sizeof(void *)) + idx = __kmp_insert_indirect_lock(lck); + } + + __kmp_release_lock(&__kmp_global_lock, gtid); + + lck->type = tag; + + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. + } else { + *((kmp_indirect_lock_t **)user_lock) = lck; + } + + return lck; +} + +// User lock lookup for dynamically dispatched locks. +static __forceinline +kmp_indirect_lock_t * +__kmp_lookup_indirect_lock(void **user_lock, const char *func) +{ + if (__kmp_env_consistency_check) { + kmp_indirect_lock_t *lck = NULL; + if (user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, func); + } + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + kmp_lock_index_t idx = DYNA_EXTRACT_I_INDEX(user_lock); + if (idx < 0 || idx >= __kmp_indirect_lock_table_size) { + KMP_FATAL(LockIsUninitialized, func); + } + lck = __kmp_indirect_lock_table[idx]; + } else { + lck = *((kmp_indirect_lock_t **)user_lock); + } + if (lck == NULL) { + KMP_FATAL(LockIsUninitialized, func); + } + return lck; + } else { + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + return __kmp_indirect_lock_table[DYNA_EXTRACT_I_INDEX(user_lock)]; + } else { + return *((kmp_indirect_lock_t **)user_lock); + } + } +} + +static void +__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) +{ +#if KMP_USE_ADAPTIVE_LOCKS + if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { + KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); + seq = lockseq_queuing; + } +#endif + kmp_indirect_locktag_t tag = DYNA_GET_I_TAG(seq); + kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); + DYNA_I_LOCK_FUNC(l, init)(l->lock); + KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock, tag = %x\n", l->type)); +} + +static void +__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) +{ + kmp_uint32 gtid = __kmp_entry_gtid(); + kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); + DYNA_I_LOCK_FUNC(l, destroy)(l->lock); + kmp_indirect_locktag_t tag = l->type; + + __kmp_acquire_lock(&__kmp_global_lock, gtid); + + // Use the base lock's space to keep the pool chain. + l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + l->lock->pool.index = DYNA_EXTRACT_I_INDEX(lock); + } + __kmp_indirect_lock_pool[tag] = l; + + __kmp_release_lock(&__kmp_global_lock, gtid); +} + +static void +__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); + DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); +} + +static void +__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); + DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); +} + +static int +__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = DYNA_LOOKUP_I_LOCK(lock); + return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); +} + +static void +__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); + DYNA_I_LOCK_FUNC(l, set)(l->lock, gtid); +} + +static void +__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); + DYNA_I_LOCK_FUNC(l, unset)(l->lock, gtid); +} + +static int +__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) +{ + kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); + return DYNA_I_LOCK_FUNC(l, test)(l->lock, gtid); +} + +kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; + +// Initialize a hinted lock. +void +__kmp_init_lock_hinted(void **lock, int hint) +{ + kmp_dyna_lockseq_t seq; + switch (hint) { + case kmp_lock_hint_uncontended: + seq = lockseq_tas; + break; + case kmp_lock_hint_speculative: +#if DYNA_HAS_HLE + seq = lockseq_hle; +#else + seq = lockseq_tas; +#endif + break; + case kmp_lock_hint_adaptive: +#if KMP_USE_ADAPTIVE_LOCKS + seq = lockseq_adaptive; +#else + seq = lockseq_queuing; +#endif + break; + // Defaults to queuing locks. + case kmp_lock_hint_contended: + case kmp_lock_hint_nonspeculative: + default: + seq = lockseq_queuing; + break; + } + if (DYNA_IS_D_LOCK(seq)) { + DYNA_INIT_D_LOCK(lock, seq); +#if USE_ITT_BUILD + __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); +#endif + } else { + DYNA_INIT_I_LOCK(lock, seq); +#if USE_ITT_BUILD + kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); + __kmp_itt_lock_creating(ilk->lock, NULL); +#endif + } +} + +// This is used only in kmp_error.c when consistency checking is on. +kmp_int32 +__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) +{ + switch (seq) { + case lockseq_tas: + case lockseq_nested_tas: + return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); +#if DYNA_HAS_FUTEX + case lockseq_futex: + case lockseq_nested_futex: + return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); +#endif + case lockseq_ticket: + case lockseq_nested_ticket: + return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); + case lockseq_queuing: + case lockseq_nested_queuing: +#if KMP_USE_ADAPTIVE_LOCKS + case lockseq_adaptive: + return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); +#endif + case lockseq_drdpa: + case lockseq_nested_drdpa: + return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); + default: + return 0; + } +} + +// The value initialized from KMP_LOCK_KIND needs to be translated to its +// nested version. +void +__kmp_init_nest_lock_hinted(void **lock, int hint) +{ + kmp_dyna_lockseq_t seq; + switch (hint) { + case kmp_lock_hint_uncontended: + seq = lockseq_nested_tas; + break; + // Defaults to queuing locks. + case kmp_lock_hint_contended: + case kmp_lock_hint_nonspeculative: + default: + seq = lockseq_nested_queuing; + break; + } + DYNA_INIT_I_LOCK(lock, seq); +#if USE_ITT_BUILD + kmp_indirect_lock_t *ilk = DYNA_LOOKUP_I_LOCK(lock); + __kmp_itt_lock_creating(ilk->lock, NULL); +#endif +} + +// Initializes the lock table for indirect locks. +static void +__kmp_init_indirect_lock_table() +{ + __kmp_indirect_lock_table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)*1024); + __kmp_indirect_lock_table_size = 1024; + __kmp_indirect_lock_table_next = 0; +} + +#if KMP_USE_ADAPTIVE_LOCKS +# define init_lock_func(table, expand) { \ + table[locktag_ticket] = expand(ticket); \ + table[locktag_queuing] = expand(queuing); \ + table[locktag_adaptive] = expand(queuing); \ + table[locktag_drdpa] = expand(drdpa); \ + table[locktag_nested_ticket] = expand(ticket); \ + table[locktag_nested_queuing] = expand(queuing); \ + table[locktag_nested_drdpa] = expand(drdpa); \ +} +#else +# define init_lock_func(table, expand) { \ + table[locktag_ticket] = expand(ticket); \ + table[locktag_queuing] = expand(queuing); \ + table[locktag_drdpa] = expand(drdpa); \ + table[locktag_nested_ticket] = expand(ticket); \ + table[locktag_nested_queuing] = expand(queuing); \ + table[locktag_nested_drdpa] = expand(drdpa); \ +} +#endif // KMP_USE_ADAPTIVE_LOCKS + +// Initializes data for dynamic user locks. +void +__kmp_init_dynamic_user_locks() +{ + // Initialize jump table location + int offset = (__kmp_env_consistency_check)? 1: 0; + __kmp_direct_set_ops = direct_set_tab[offset]; + __kmp_direct_unset_ops = direct_unset_tab[offset]; + __kmp_direct_test_ops = direct_test_tab[offset]; + __kmp_indirect_set_ops = indirect_set_tab[offset]; + __kmp_indirect_unset_ops = indirect_unset_tab[offset]; + __kmp_indirect_test_ops = indirect_test_tab[offset]; + __kmp_init_indirect_lock_table(); + + // Initialize lock accessor/modifier + // Could have used designated initializer, but -TP /Qstd=c99 did not work with icl.exe. +#define expand_func(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location + init_lock_func(__kmp_indirect_set_location, expand_func); +#undef expand_func +#define expand_func(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags + init_lock_func(__kmp_indirect_set_flags, expand_func); +#undef expand_func +#define expand_func(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location + init_lock_func(__kmp_indirect_get_location, expand_func); +#undef expand_func +#define expand_func(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags + init_lock_func(__kmp_indirect_get_flags, expand_func); +#undef expand_func + + __kmp_init_user_locks = TRUE; +} + +// Clean up the lock table. +void +__kmp_cleanup_indirect_user_locks() +{ + kmp_lock_index_t i; + int k; + + // Clean up locks in the pools first (they were already destroyed before going into the pools). + for (k = 0; k < DYNA_NUM_I_LOCKS; ++k) { + kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; + while (l != NULL) { + kmp_indirect_lock_t *ll = l; + l = (kmp_indirect_lock_t *)l->lock->pool.next; + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + __kmp_indirect_lock_table[ll->lock->pool.index] = NULL; + } + __kmp_free(ll->lock); + __kmp_free(ll); + } + } + // Clean up the remaining undestroyed locks. + for (i = 0; i < __kmp_indirect_lock_table_next; i++) { + kmp_indirect_lock_t *l = __kmp_indirect_lock_table[i]; + if (l != NULL) { + // Locks not destroyed explicitly need to be destroyed here. + DYNA_I_LOCK_FUNC(l, destroy)(l->lock); + __kmp_free(l->lock); + __kmp_free(l); + } + } + // Free the table + __kmp_free(__kmp_indirect_lock_table); + + __kmp_init_user_locks = FALSE; +} + +enum kmp_lock_kind __kmp_user_lock_kind = lk_default; +int __kmp_num_locks_in_block = 1; // FIXME - tune this value + +#else // KMP_USE_DYNAMIC_LOCK + /* ------------------------------------------------------------------------ */ /* user locks * @@ -3539,3 +4109,4 @@ TCW_4(__kmp_init_user_locks, FALSE); } +#endif // KMP_USE_DYNAMIC_LOCK Index: runtime/src/kmp_omp.h =================================================================== --- runtime/src/kmp_omp.h +++ runtime/src/kmp_omp.h @@ -79,7 +79,9 @@ addr_and_size_t roots; // Pointer to __kmp_root. addr_and_size_t capacity; // Pointer to __kmp_threads_capacity. addr_and_size_t monitor; // Pointer to __kmp_monitor. +#if ! KMP_USE_DYNAMIC_LOCK addr_and_size_t lock_table; // Pointer to __kmp_lock_table. +#endif addr_and_size_t func_microtask; addr_and_size_t func_fork; addr_and_size_t func_fork_teams; @@ -159,11 +161,13 @@ offset_and_size_t lk_depth_locked; offset_and_size_t lk_lock_flags; +#if ! KMP_USE_DYNAMIC_LOCK /* lock_table_t */ kmp_int32 lt_size_of_struct; /* Size and layout of kmp_lock_table_t. */ offset_and_size_t lt_used; offset_and_size_t lt_allocated; offset_and_size_t lt_table; +#endif /* task_team_t */ kmp_int32 tt_sizeof_struct; Index: runtime/src/kmp_os.h =================================================================== --- runtime/src/kmp_os.h +++ runtime/src/kmp_os.h @@ -815,6 +815,11 @@ # define USE_CMPXCHG_FIX 1 #endif +// Enable dynamic user lock +#ifndef KMP_USE_DYNAMIC_LOCK +# define KMP_USE_DYNAMIC_LOCK 0 +#endif + // Warning levels enum kmp_warnings_level { kmp_warnings_off = 0, /* No warnings */ Index: runtime/src/kmp_runtime.c =================================================================== --- runtime/src/kmp_runtime.c +++ runtime/src/kmp_runtime.c @@ -716,7 +716,11 @@ if( __kmp_env_consistency_check ) { if( __kmp_threads[gtid]->th.th_root->r.r_active ) +#if KMP_USE_DYNAMIC_LOCK + __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 ); +#else __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL ); +#endif } #ifdef BUILD_PARALLEL_ORDERED if( !team->t.t_serialized ) { @@ -6735,7 +6739,11 @@ __kmp_root = NULL; __kmp_threads_capacity = 0; +#if KMP_USE_DYNAMIC_LOCK + __kmp_cleanup_indirect_user_locks(); +#else __kmp_cleanup_user_locks(); +#endif #if KMP_AFFINITY_SUPPORTED KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file ); Index: runtime/src/kmp_settings.c =================================================================== --- runtime/src/kmp_settings.c +++ runtime/src/kmp_settings.c @@ -3996,11 +3996,13 @@ || __kmp_str_match( "testand-set", 2, value ) || __kmp_str_match( "testandset", 2, value ) ) { __kmp_user_lock_kind = lk_tas; + DYNA_STORE_LOCK_SEQ(tas); } #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) else if ( __kmp_str_match( "futex", 1, value ) ) { if ( __kmp_futex_determine_capable() ) { __kmp_user_lock_kind = lk_futex; + DYNA_STORE_LOCK_SEQ(futex); } else { KMP_WARNING( FutexNotSupported, name, value ); @@ -4009,10 +4011,12 @@ #endif else if ( __kmp_str_match( "ticket", 2, value ) ) { __kmp_user_lock_kind = lk_ticket; + DYNA_STORE_LOCK_SEQ(ticket); } else if ( __kmp_str_match( "queuing", 1, value ) || __kmp_str_match( "queue", 1, value ) ) { __kmp_user_lock_kind = lk_queuing; + DYNA_STORE_LOCK_SEQ(queuing); } else if ( __kmp_str_match( "drdpa ticket", 1, value ) || __kmp_str_match( "drdpa_ticket", 1, value ) @@ -4020,17 +4024,25 @@ || __kmp_str_match( "drdpaticket", 1, value ) || __kmp_str_match( "drdpa", 1, value ) ) { __kmp_user_lock_kind = lk_drdpa; + DYNA_STORE_LOCK_SEQ(drdpa); } #if KMP_USE_ADAPTIVE_LOCKS else if ( __kmp_str_match( "adaptive", 1, value ) ) { if( __kmp_cpuinfo.rtm ) { // ??? Is cpuinfo available here? __kmp_user_lock_kind = lk_adaptive; + DYNA_STORE_LOCK_SEQ(adaptive); } else { KMP_WARNING( AdaptiveNotSupported, name, value ); __kmp_user_lock_kind = lk_queuing; + DYNA_STORE_LOCK_SEQ(queuing); } } #endif // KMP_USE_ADAPTIVE_LOCKS +#if KMP_USE_DYNAMIC_LOCK + else if ( __kmp_str_match("hle", 1, value) ) { + DYNA_STORE_LOCK_SEQ(hle); + } +#endif else { KMP_WARNING( StgInvalidValue, name, value ); } @@ -5057,16 +5069,24 @@ if ( __kmp_user_lock_kind == lk_default ) { __kmp_user_lock_kind = lk_queuing; } +#if KMP_USE_DYNAMIC_LOCK + __kmp_init_dynamic_user_locks(); +#else __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); +#endif } else { KMP_DEBUG_ASSERT( string != NULL); // kmp_set_defaults() was called KMP_DEBUG_ASSERT( __kmp_user_lock_kind != lk_default ); - __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); // Binds lock functions again to follow the transition between different // KMP_CONSISTENCY_CHECK values. Calling this again is harmless as long // as we do not allow lock kind changes after making a call to any // user lock functions (true). +#if KMP_USE_DYNAMIC_LOCK + __kmp_init_dynamic_user_locks(); +#else + __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); +#endif } #if KMP_AFFINITY_SUPPORTED Index: runtime/src/kmp_taskq.c =================================================================== --- runtime/src/kmp_taskq.c +++ runtime/src/kmp_taskq.c @@ -48,7 +48,11 @@ kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq; if ( __kmp_env_consistency_check ) +#if KMP_USE_DYNAMIC_LOCK + __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 ); +#else __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL ); +#endif if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) { KMP_MB(); /* Flush all pending memory write invalidates. */ Index: runtime/src/z_Linux_util.c =================================================================== --- runtime/src/z_Linux_util.c +++ runtime/src/z_Linux_util.c @@ -1579,10 +1579,12 @@ __kmp_init_common = FALSE; TCW_4(__kmp_init_user_locks, FALSE); +#if ! KMP_USE_DYNAMIC_LOCK __kmp_user_lock_table.used = 1; __kmp_user_lock_table.allocated = 0; __kmp_user_lock_table.table = NULL; __kmp_lock_blocks = NULL; +#endif __kmp_all_nth = 0; TCW_4(__kmp_nth, 0);