Index: runtime/src/kmp_affinity.h =================================================================== --- runtime/src/kmp_affinity.h +++ runtime/src/kmp_affinity.h @@ -618,10 +618,8 @@ }; static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { - const Address *aa = - (const Address *)&(((AddrUnsPair *)CCAST(void *, a))->first); - const Address *bb = - (const Address *)&(((AddrUnsPair *)CCAST(void *, b))->first); + const Address *aa = &(((const AddrUnsPair *)a)->first); + const Address *bb = &(((const AddrUnsPair *)b)->first); unsigned depth = aa->depth; unsigned i; KMP_DEBUG_ASSERT(depth == bb->depth); Index: runtime/src/kmp_affinity.cpp =================================================================== --- runtime/src/kmp_affinity.cpp +++ runtime/src/kmp_affinity.cpp @@ -1691,8 +1691,8 @@ static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b) { unsigned i; - const unsigned *aa = *(RCAST(unsigned **, CCAST(void *, a))); - const unsigned *bb = *(RCAST(unsigned **, CCAST(void *, b))); + const unsigned *aa = *(unsigned *const *)a; + const unsigned *bb = *(unsigned *const *)b; for (i = maxIndex;; i--) { if (aa[i] < bb[i]) return -1; @@ -1732,7 +1732,7 @@ // FIXME - this will match "node_ " unsigned level; - if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { + if (KMP_SSCANF(buf, "node_%u id", &level) == 1) { if (nodeIdIndex + level >= maxIndex) { maxIndex = nodeIdIndex + level; } @@ -3786,10 +3786,8 @@ return; static int __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) { - const Address *aa = - (const Address *)&(((AddrUnsPair *)CCAST(void *, a))->first); - const Address *bb = - (const Address *)&(((AddrUnsPair *)CCAST(void *, b))->first); + const Address *aa = &(((const AddrUnsPair *)a)->first); + const Address *bb = &(((const AddrUnsPair *)b)->first); unsigned depth = aa->depth; unsigned i; KMP_DEBUG_ASSERT(depth == bb->depth); Index: runtime/src/kmp_alloc.cpp =================================================================== --- runtime/src/kmp_alloc.cpp +++ runtime/src/kmp_alloc.cpp @@ -299,7 +299,7 @@ { volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, - CCAST(void *, old_value), NULL)) { + CCAST(void *, old_value), nullptr)) { KMP_CPU_PAUSE(); old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); } @@ -1696,7 +1696,7 @@ // threads only) // pop the head of the sync free list, push NULL instead while (!KMP_COMPARE_AND_STORE_PTR( - &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL)) { + &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) { KMP_CPU_PAUSE(); ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); } Index: runtime/src/kmp_barrier.cpp =================================================================== --- runtime/src/kmp_barrier.cpp +++ runtime/src/kmp_barrier.cpp @@ -856,8 +856,8 @@ (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { - if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on - // my b_arrived flag + if (thr_bar->leaf_kids) { + // First, wait for leaf children to check-in on my b_arrived flag kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state @@ -884,8 +884,7 @@ ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); } // clear leaf_state bits - KMP_TEST_THEN_AND64(CCAST(kmp_uint64 *, &thr_bar->b_arrived), - ~(thr_bar->leaf_state)); + KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state)); } // Next, wait for higher level children on each child's b_arrived flag for (kmp_uint32 d = 1; d < thr_bar->my_level; Index: runtime/src/kmp_csupport.cpp =================================================================== --- runtime/src/kmp_csupport.cpp +++ runtime/src/kmp_csupport.cpp @@ -869,7 +869,7 @@ #if USE_ITT_BUILD __kmp_itt_critical_creating(ilk->lock, loc); #endif - int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk); + int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk); if (status == 0) { #if USE_ITT_BUILD __kmp_itt_critical_destroyed(ilk->lock); @@ -3258,8 +3258,7 @@ iter_number >>= 5; // divided by 32 flag = 1 << shft; if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) - KMP_TEST_THEN_OR32( - CCAST(kmp_uint32 *, &pr_buf->th_doacross_flags[iter_number]), flag); + KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag); KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, (iter_number << 5) + shft)); } Index: runtime/src/kmp_dispatch.cpp =================================================================== --- runtime/src/kmp_dispatch.cpp +++ runtime/src/kmp_dispatch.cpp @@ -172,7 +172,7 @@ __forceinline kmp_int32 test_then_add(volatile kmp_int32 *p, kmp_int32 d) { kmp_int32 r; - r = KMP_TEST_THEN_ADD32(CCAST(kmp_int32 *, p), d); + r = KMP_TEST_THEN_ADD32(p, d); return r; } @@ -180,7 +180,7 @@ __forceinline kmp_int64 test_then_add(volatile kmp_int64 *p, kmp_int64 d) { kmp_int64 r; - r = KMP_TEST_THEN_ADD64(CCAST(kmp_int64 *, p), d); + r = KMP_TEST_THEN_ADD64(p, d); return r; } @@ -190,14 +190,14 @@ template <> __forceinline kmp_int32 test_then_inc_acq(volatile kmp_int32 *p) { kmp_int32 r; - r = KMP_TEST_THEN_INC_ACQ32(CCAST(kmp_int32 *, p)); + r = KMP_TEST_THEN_INC_ACQ32(p); return r; } template <> __forceinline kmp_int64 test_then_inc_acq(volatile kmp_int64 *p) { kmp_int64 r; - r = KMP_TEST_THEN_INC_ACQ64(CCAST(kmp_int64 *, p)); + r = KMP_TEST_THEN_INC_ACQ64(p); return r; } @@ -207,14 +207,14 @@ template <> __forceinline kmp_int32 test_then_inc(volatile kmp_int32 *p) { kmp_int32 r; - r = KMP_TEST_THEN_INC32(CCAST(kmp_int32 *, p)); + r = KMP_TEST_THEN_INC32(p); return r; } template <> __forceinline kmp_int64 test_then_inc(volatile kmp_int64 *p) { kmp_int64 r; - r = KMP_TEST_THEN_INC64(CCAST(kmp_int64 *, p)); + r = KMP_TEST_THEN_INC64(p); return r; } @@ -1163,8 +1163,7 @@ th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr; th->th.th_dispatch->th_dispatch_sh_current = - RCAST(dispatch_shared_info_t *, - CCAST(dispatch_shared_info_template *, sh)); + CCAST(dispatch_shared_info_t *, (volatile dispatch_shared_info_t *)sh); #if USE_ITT_BUILD if (pr->ordered) { __kmp_itt_ordered_init(gtid); @@ -1981,7 +1980,7 @@ // use dynamic-style shcedule // atomically inrement iterations, get old value init = test_then_add( - RCAST(ST *, CCAST(UT *, &sh->u.s.iteration)), (ST)chunkspec); + RCAST(volatile ST *, &sh->u.s.iteration), (ST)chunkspec); remaining = trip - init; if (remaining <= 0) { status = 0; // all iterations got by other threads @@ -1998,7 +1997,7 @@ } // if limit = init + (UT)(remaining * *(double *)&pr->u.p.parm3); // divide by K*nproc - if (compare_and_swap(RCAST(ST *, CCAST(UT *, &sh->u.s.iteration)), + if (compare_and_swap(RCAST(volatile ST *, &sh->u.s.iteration), (ST)init, (ST)limit)) { // CAS was successful, chunk obtained status = 1; @@ -2060,7 +2059,7 @@ // use dynamic-style shcedule // atomically inrement iterations, get old value init = test_then_add( - RCAST(ST *, CCAST(UT *, &sh->u.s.iteration)), (ST)chunk); + RCAST(volatile ST *, &sh->u.s.iteration), (ST)chunk); remaining = trip - init; if (remaining <= 0) { status = 0; // all iterations got by other threads @@ -2082,7 +2081,7 @@ if (rem) // adjust so that span%chunk == 0 span += chunk - rem; limit = init + span; - if (compare_and_swap(RCAST(ST *, CCAST(UT *, &sh->u.s.iteration)), + if (compare_and_swap(RCAST(volatile ST *, &sh->u.s.iteration), (ST)init, (ST)limit)) { // CAS was successful, chunk obtained status = 1; Index: runtime/src/kmp_lock.cpp =================================================================== --- runtime/src/kmp_lock.cpp +++ runtime/src/kmp_lock.cpp @@ -1468,8 +1468,8 @@ /* try (h,h)->(-1,0) */ dequeued = KMP_COMPARE_AND_STORE_REL64( - RCAST(kmp_int64 *, CCAST(kmp_int32 *, tail_id_p)), - KMP_PACK_64(head, head), KMP_PACK_64(-1, 0)); + RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head), + KMP_PACK_64(-1, 0)); #ifdef DEBUG_QUEUING_LOCKS TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); #endif @@ -2289,8 +2289,8 @@ __forceinline static int __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { - kmp_uint64 ticket = KMP_TEST_THEN_INC64( - RCAST(kmp_int64 *, CCAST(kmp_uint64 *, &lck->lk.next_ticket))); + kmp_uint64 ticket = + KMP_TEST_THEN_INC64(RCAST(volatile kmp_int64 *, &lck->lk.next_ticket)); kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = lck->lk.polls; Index: runtime/src/kmp_os.h =================================================================== --- runtime/src/kmp_os.h +++ runtime/src/kmp_os.h @@ -184,10 +184,7 @@ #define KMP_INT_MIN ((kmp_int32)0x80000000) #ifdef __cplusplus -#define CAST_FLT_INT(a) \ - reinterpret_cast(const_cast(a)) -#define CAST_DBL_INT(a) \ - reinterpret_cast(const_cast(a)) +// macros to cast out qualifiers and to re-interpret types #define CCAST(type, var) const_cast(var) #define RCAST(type, var) reinterpret_cast(var) //------------------------------------------------------------------------- @@ -236,8 +233,6 @@ }; //------------------------------------------------------------------------- #else -#define CAST_FLT_INT(a) (kmp_int32 *)(a) -#define CAST_DBL_INT(a) (kmp_int64 *)(a) #define CCAST(type, var) (type)(var) #define RCAST(type, var) (type)(var) #endif // __cplusplus @@ -330,9 +325,6 @@ #define KMP_TEST_THEN_ADD32(p, v) \ InterlockedExchangeAdd((volatile long *)(p), (v)) -extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ InterlockedCompareExchange((volatile long *)(p), (long)(sv), (long)(cv)) @@ -347,12 +339,15 @@ } // Routines that we still need to implement in assembly. +extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); -extern kmp_int32 __kmp_test_then_or32(volatile kmp_int32 *p, kmp_int32 v); -extern kmp_int32 __kmp_test_then_and32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v); +extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v); extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); -extern kmp_int64 __kmp_test_then_or64(volatile kmp_int64 *p, kmp_int64 v); -extern kmp_int64 __kmp_test_then_and64(volatile kmp_int64 *p, kmp_int64 v); +extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v); +extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v); extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); @@ -377,32 +372,25 @@ extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); -#define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v)) -//# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 -//) -#define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v)) -#define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v)) -//# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 -//) +//#define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32((p), 1) +//#define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32((p), 1) #define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64((p), 1LL) #define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64((p), 1LL) -//# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 -//) -//# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 -//) +//#define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32((p), 4) +//#define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32((p), 4) #define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64((p), 4LL) #define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64((p), 4LL) -//# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 -//) -//# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 -//) +//#define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32((p), -1) +//#define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32((p), -1) #define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64((p), -1LL) #define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64((p), -1LL) -//# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), -//(v) ) +//#define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32((p), (v)) +#define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v)) #define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64((p), (v)) +#define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v)) +#define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v)) #define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32((p), (v)) #define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32((p), (v)) #define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64((p), (v)) @@ -417,32 +405,35 @@ #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ __kmp_compare_and_store16((p), (cv), (sv)) #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ - __kmp_compare_and_store32((p), (cv), (sv)) + __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ + (kmp_int32)(sv)) #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ - __kmp_compare_and_store32((p), (cv), (sv)) + __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ + (kmp_int32)(sv)) #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ - __kmp_compare_and_store64((p), (cv), (sv)) + __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ - __kmp_compare_and_store64((p), (cv), (sv)) + __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) #if KMP_ARCH_X86 #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ - __kmp_compare_and_store32(RCAST(volatile kmp_int32 *, p), \ - RCAST(kmp_int32, cv), RCAST(kmp_int32, sv)) + __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ + (kmp_int32)(sv)) #else /* 64 bit pointers */ #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ - __kmp_compare_and_store64(RCAST(volatile kmp_int64 *, p), \ - RCAST(kmp_int64, cv), RCAST(kmp_int64, sv)) + __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) #endif /* KMP_ARCH_X86 */ #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ __kmp_compare_and_store_ret8((p), (cv), (sv)) #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ __kmp_compare_and_store_ret16((p), (cv), (sv)) -//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( -//(p), (cv), (sv) ) #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ - __kmp_compare_and_store_ret64((p), (cv), (sv)) + __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) #define KMP_XCHG_FIXED8(p, v) \ __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); @@ -453,30 +444,51 @@ #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); #elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64) -#define KMP_TEST_THEN_ADD8(p, v) __sync_fetch_and_add((kmp_int8 *)(p), (v)) /* cast p to correct type so that proper intrinsic will be used */ -#define KMP_TEST_THEN_INC32(p) __sync_fetch_and_add((kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_OR8(p, v) __sync_fetch_and_or((kmp_int8 *)(p), (v)) -#define KMP_TEST_THEN_AND8(p, v) __sync_fetch_and_and((kmp_int8 *)(p), (v)) -#define KMP_TEST_THEN_INC_ACQ32(p) __sync_fetch_and_add((kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_INC64(p) __sync_fetch_and_add((kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_INC_ACQ64(p) __sync_fetch_and_add((kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_ADD4_32(p) __sync_fetch_and_add((kmp_int32 *)(p), 4) -#define KMP_TEST_THEN_ADD4_ACQ32(p) __sync_fetch_and_add((kmp_int32 *)(p), 4) -#define KMP_TEST_THEN_ADD4_64(p) __sync_fetch_and_add((kmp_int64 *)(p), 4LL) -#define KMP_TEST_THEN_ADD4_ACQ64(p) __sync_fetch_and_add((kmp_int64 *)(p), 4LL) -#define KMP_TEST_THEN_DEC32(p) __sync_fetch_and_sub((kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_DEC_ACQ32(p) __sync_fetch_and_sub((kmp_int32 *)(p), 1) -#define KMP_TEST_THEN_DEC64(p) __sync_fetch_and_sub((kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_DEC_ACQ64(p) __sync_fetch_and_sub((kmp_int64 *)(p), 1LL) -#define KMP_TEST_THEN_ADD32(p, v) __sync_fetch_and_add((kmp_int32 *)(p), (v)) -#define KMP_TEST_THEN_ADD64(p, v) __sync_fetch_and_add((kmp_int64 *)(p), (v)) - -#define KMP_TEST_THEN_OR32(p, v) __sync_fetch_and_or((kmp_uint32 *)(p), (v)) -#define KMP_TEST_THEN_AND32(p, v) __sync_fetch_and_and((kmp_uint32 *)(p), (v)) -#define KMP_TEST_THEN_OR64(p, v) __sync_fetch_and_or((kmp_uint64 *)(p), (v)) -#define KMP_TEST_THEN_AND64(p, v) __sync_fetch_and_and((kmp_uint64 *)(p), (v)) +#define KMP_TEST_THEN_INC32(p) \ + __sync_fetch_and_add((volatile kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_INC_ACQ32(p) \ + __sync_fetch_and_add((volatile kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_INC64(p) \ + __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_INC_ACQ64(p) \ + __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_ADD4_32(p) \ + __sync_fetch_and_add((volatile kmp_int32 *)(p), 4) +#define KMP_TEST_THEN_ADD4_ACQ32(p) \ + __sync_fetch_and_add((volatile kmp_int32 *)(p), 4) +#define KMP_TEST_THEN_ADD4_64(p) \ + __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL) +#define KMP_TEST_THEN_ADD4_ACQ64(p) \ + __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL) +#define KMP_TEST_THEN_DEC32(p) \ + __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_DEC_ACQ32(p) \ + __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_DEC64(p) \ + __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_DEC_ACQ64(p) \ + __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_ADD8(p, v) \ + __sync_fetch_and_add((volatile kmp_int8 *)(p), (kmp_int8)(v)) +#define KMP_TEST_THEN_ADD32(p, v) \ + __sync_fetch_and_add((volatile kmp_int32 *)(p), (kmp_int32)(v)) +#define KMP_TEST_THEN_ADD64(p, v) \ + __sync_fetch_and_add((volatile kmp_int64 *)(p), (kmp_int64)(v)) + +#define KMP_TEST_THEN_OR8(p, v) \ + __sync_fetch_and_or((volatile kmp_int8 *)(p), (kmp_int8)(v)) +#define KMP_TEST_THEN_AND8(p, v) \ + __sync_fetch_and_and((volatile kmp_int8 *)(p), (kmp_int8)(v)) +#define KMP_TEST_THEN_OR32(p, v) \ + __sync_fetch_and_or((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) +#define KMP_TEST_THEN_AND32(p, v) \ + __sync_fetch_and_and((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) +#define KMP_TEST_THEN_OR64(p, v) \ + __sync_fetch_and_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) +#define KMP_TEST_THEN_AND64(p, v) \ + __sync_fetch_and_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ @@ -528,27 +540,29 @@ #define KMP_XCHG_FIXED64(p, v) \ __sync_lock_test_and_set((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) -extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); -extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) { - kmp_int32 tmp = __sync_lock_test_and_set(CAST_FLT_INT(p), *(kmp_int32 *)&v); + kmp_int32 tmp = + __sync_lock_test_and_set((volatile kmp_uint32 *)(p), *(kmp_uint32 *)&v); return *(kmp_real32 *)&tmp; } inline kmp_real64 KMP_XCHG_REAL64(volatile kmp_real64 *p, kmp_real64 v) { - kmp_int64 tmp = __sync_lock_test_and_set(CAST_DBL_INT(p), *(kmp_int64 *)&v); + kmp_int64 tmp = + __sync_lock_test_and_set((volatile kmp_uint64 *)(p), *(kmp_uint64 *)&v); return *(kmp_real64 *)&tmp; } #else +extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); -extern kmp_int32 __kmp_test_then_or32(volatile kmp_int32 *p, kmp_int32 v); -extern kmp_int32 __kmp_test_then_and32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v); +extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v); extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); -extern kmp_int64 __kmp_test_then_or64(volatile kmp_int64 *p, kmp_int64 v); -extern kmp_int64 __kmp_test_then_and64(volatile kmp_int64 *p, kmp_int64 v); +extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v); +extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v); extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); @@ -572,56 +586,85 @@ extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v); extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); -#define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v)) extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); -#define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32((p), 1) -#define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v)) -#define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v)) -#define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32((p), 1) -#define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64((p), 1LL) -#define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64((p), 1LL) -#define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32((p), 4) -#define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32((p), 4) -#define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64((p), 4LL) -#define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64((p), 4LL) -#define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32((p), -1) -#define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32((p), -1) -#define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64((p), -1LL) -#define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64((p), -1LL) -#define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32((p), (v)) -#define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64((p), (v)) - -#define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32((p), (v)) -#define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32((p), (v)) -#define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64((p), (v)) -#define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64((p), (v)) +#define KMP_TEST_THEN_INC32(p) \ + __kmp_test_then_add32((volatile kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_INC_ACQ32(p) \ + __kmp_test_then_add32((volatile kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_INC64(p) \ + __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_INC_ACQ64(p) \ + __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_ADD4_32(p) \ + __kmp_test_then_add32((volatile kmp_int32 *)(p), 4) +#define KMP_TEST_THEN_ADD4_ACQ32(p) \ + __kmp_test_then_add32((volatile kmp_int32 *)(p), 4) +#define KMP_TEST_THEN_ADD4_64(p) \ + __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL) +#define KMP_TEST_THEN_ADD4_ACQ64(p) \ + __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL) +#define KMP_TEST_THEN_DEC32(p) \ + __kmp_test_then_add32((volatile kmp_int32 *)(p), -1) +#define KMP_TEST_THEN_DEC_ACQ32(p) \ + __kmp_test_then_add32((volatile kmp_int32 *)(p), -1) +#define KMP_TEST_THEN_DEC64(p) \ + __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL) +#define KMP_TEST_THEN_DEC_ACQ64(p) \ + __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL) +#define KMP_TEST_THEN_ADD8(p, v) \ + __kmp_test_then_add8((volatile kmp_int8 *)(p), (kmp_int8)(v)) +#define KMP_TEST_THEN_ADD32(p, v) \ + __kmp_test_then_add32((volatile kmp_int32 *)(p), (kmp_int32)(v)) +#define KMP_TEST_THEN_ADD64(p, v) \ + __kmp_test_then_add64((volatile kmp_int64 *)(p), (kmp_int64)(v)) + +#define KMP_TEST_THEN_OR8(p, v) \ + __kmp_test_then_or8((volatile kmp_int8 *)(p), (kmp_int8)(v)) +#define KMP_TEST_THEN_AND8(p, v) \ + __kmp_test_then_and8((volatile kmp_int8 *)(p), (kmp_int8)(v)) +#define KMP_TEST_THEN_OR32(p, v) \ + __kmp_test_then_or32((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) +#define KMP_TEST_THEN_AND32(p, v) \ + __kmp_test_then_and32((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) +#define KMP_TEST_THEN_OR64(p, v) \ + __kmp_test_then_or64((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) +#define KMP_TEST_THEN_AND64(p, v) \ + __kmp_test_then_and64((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ - __kmp_compare_and_store8((p), (cv), (sv)) + __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \ + (kmp_int8)(sv)) #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ - __kmp_compare_and_store8((p), (cv), (sv)) + __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \ + (kmp_int8)(sv)) #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ - __kmp_compare_and_store16((p), (cv), (sv)) + __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \ + (kmp_int16)(sv)) #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ - __kmp_compare_and_store16((p), (cv), (sv)) + __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \ + (kmp_int16)(sv)) #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ - __kmp_compare_and_store32((p), (cv), (sv)) + __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ + (kmp_int32)(sv)) #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ - __kmp_compare_and_store32((p), (cv), (sv)) + __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ + (kmp_int32)(sv)) #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ - __kmp_compare_and_store64((p), (cv), (sv)) + __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ - __kmp_compare_and_store64((p), (cv), (sv)) + __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) #if KMP_ARCH_X86 #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ - __kmp_compare_and_store32(RCAST(volatile kmp_int32 *, p), \ - RCAST(kmp_int32, cv), RCAST(kmp_int32, sv)) + __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ + (kmp_int32)(sv)) #else /* 64 bit pointers */ #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ - __kmp_compare_and_store64(RCAST(volatile kmp_int64 *, p), \ - RCAST(kmp_int64, cv), RCAST(kmp_int64, sv)) + __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) #endif /* KMP_ARCH_X86 */ #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ @@ -629,9 +672,11 @@ #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ __kmp_compare_and_store_ret16((p), (cv), (sv)) #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ - __kmp_compare_and_store_ret32((p), (cv), (sv)) + __kmp_compare_and_store_ret32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ + (kmp_int32)(sv)) #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ - __kmp_compare_and_store_ret64((p), (cv), (sv)) + __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) #define KMP_XCHG_FIXED8(p, v) \ __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); @@ -684,8 +729,6 @@ #define KMP_LD_ACQ64(A) (*(A)) #endif -#define TCR_1(a) (a) -#define TCW_1(a, b) (a) = (b) /* ------------------------------------------------------------------------ */ // FIXME - maybe this should this be // @@ -698,6 +741,8 @@ // I'm fairly certain this is the correct thing to do, but I'm afraid // of performance regressions. +#define TCR_1(a) (a) +#define TCW_1(a, b) (a) = (b) #define TCR_4(a) (a) #define TCW_4(a, b) (a) = (b) #define TCI_4(a) (++(a)) Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -5703,7 +5703,7 @@ // so there are no harmful side effects. if (thread->th.th_active_in_pool) { thread->th.th_active_in_pool = FALSE; - KMP_TEST_THEN_DEC32(CCAST(kmp_int32 *, &__kmp_thread_pool_active_nth)); + KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth); KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); } Index: runtime/src/kmp_settings.cpp =================================================================== --- runtime/src/kmp_settings.cpp +++ runtime/src/kmp_settings.cpp @@ -4585,8 +4585,8 @@ } // __kmp_stg_find static int __kmp_stg_cmp(void const *_a, void const *_b) { - kmp_setting_t *a = RCAST(kmp_setting_t *, CCAST(void *, _a)); - kmp_setting_t *b = RCAST(kmp_setting_t *, CCAST(void *, _b)); + const kmp_setting_t *a = RCAST(const kmp_setting_t *, _a); + const kmp_setting_t *b = RCAST(const kmp_setting_t *, _b); // Process KMP_AFFINITY last. // It needs to come after OMP_PLACES and GOMP_CPU_AFFINITY. Index: runtime/src/kmp_tasking.cpp =================================================================== --- runtime/src/kmp_tasking.cpp +++ runtime/src/kmp_tasking.cpp @@ -579,9 +579,8 @@ #endif KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); - kmp_int32 children = KMP_TEST_THEN_DEC32(CCAST( - kmp_int32 *, &taskdata->td_allocated_child_tasks)) - - 1; + kmp_int32 children = + KMP_TEST_THEN_DEC32(&taskdata->td_allocated_child_tasks) - 1; KMP_DEBUG_ASSERT(children >= 0); // Now, go up the ancestor tree to see if any ancestors can now be freed. @@ -603,9 +602,7 @@ return; // Predecrement simulated by "- 1" calculation - children = KMP_TEST_THEN_DEC32( - CCAST(kmp_int32 *, &taskdata->td_allocated_child_tasks)) - - 1; + children = KMP_TEST_THEN_DEC32(&taskdata->td_allocated_child_tasks) - 1; KMP_DEBUG_ASSERT(children >= 0); } @@ -684,8 +681,7 @@ if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) { // Predecrement simulated by "- 1" calculation children = - KMP_TEST_THEN_DEC32(CCAST( - kmp_int32 *, &taskdata->td_parent->td_incomplete_child_tasks)) - + KMP_TEST_THEN_DEC32(&taskdata->td_parent->td_incomplete_child_tasks) - 1; KMP_DEBUG_ASSERT(children >= 0); #if OMP_40_ENABLED @@ -1110,8 +1106,7 @@ if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) #endif { - KMP_TEST_THEN_INC32( - CCAST(kmp_int32 *, &parent_task->td_incomplete_child_tasks)); + KMP_TEST_THEN_INC32(&parent_task->td_incomplete_child_tasks); #if OMP_40_ENABLED if (parent_task->td_taskgroup) KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count)); @@ -1119,8 +1114,7 @@ // Only need to keep track of allocated child tasks for explicit tasks since // implicit not deallocated if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) { - KMP_TEST_THEN_INC32( - CCAST(kmp_int32 *, &taskdata->td_parent->td_allocated_child_tasks)); + KMP_TEST_THEN_INC32(&taskdata->td_parent->td_allocated_child_tasks); } } @@ -2057,7 +2051,7 @@ // master victim) might be prematurely released from the barrier!!! kmp_int32 count; - count = KMP_TEST_THEN_INC32(CCAST(kmp_int32 *, unfinished_threads)); + count = KMP_TEST_THEN_INC32(unfinished_threads); KA_TRACE( 20, @@ -2269,7 +2263,7 @@ if (!*thread_finished) { kmp_int32 count; - count = KMP_TEST_THEN_DEC32(CCAST(kmp_int32 *, unfinished_threads)) - 1; + count = KMP_TEST_THEN_DEC32(unfinished_threads) - 1; KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec " "unfinished_threads to %d task_team=%p\n", gtid, count, task_team)); @@ -2964,7 +2958,7 @@ &flag USE_ITT_BUILD_ARG(NULL), 0)) { #if USE_ITT_BUILD // TODO: What about itt_sync_obj?? - KMP_FSYNC_SPIN_PREPARE(CCAST(void *, RCAST(volatile void *, spin))); + KMP_FSYNC_SPIN_PREPARE(CCAST(kmp_uint32 *, spin)); #endif /* USE_ITT_BUILD */ if (TCR_4(__kmp_global.g.g_done)) { @@ -2975,7 +2969,7 @@ KMP_YIELD(TRUE); // GH: We always yield here } #if USE_ITT_BUILD - KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, RCAST(volatile void *, spin))); + KMP_FSYNC_SPIN_ACQUIRED(CCAST(kmp_uint32 *, spin)); #endif /* USE_ITT_BUILD */ } @@ -3099,9 +3093,7 @@ // Predecrement simulated by "- 1" calculation children = - KMP_TEST_THEN_DEC32( - CCAST(kmp_int32 *, &taskdata->td_parent->td_incomplete_child_tasks)) - - 1; + KMP_TEST_THEN_DEC32(&taskdata->td_parent->td_incomplete_child_tasks) - 1; KMP_DEBUG_ASSERT(children >= 0); // Remove the imaginary children @@ -3252,15 +3244,13 @@ // Only need to keep track of child task counts if team parallel and tasking // not serialized if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) { - KMP_TEST_THEN_INC32( - CCAST(kmp_int32 *, &parent_task->td_incomplete_child_tasks)); + KMP_TEST_THEN_INC32(&parent_task->td_incomplete_child_tasks); if (parent_task->td_taskgroup) KMP_TEST_THEN_INC32(&parent_task->td_taskgroup->count); // Only need to keep track of allocated child tasks for explicit tasks since // implicit not deallocated if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) - KMP_TEST_THEN_INC32( - CCAST(kmp_int32 *, &taskdata->td_parent->td_allocated_child_tasks)); + KMP_TEST_THEN_INC32(&taskdata->td_parent->td_allocated_child_tasks); } KA_TRACE(20, Index: runtime/src/kmp_taskq.cpp =================================================================== --- runtime/src/kmp_taskq.cpp +++ runtime/src/kmp_taskq.cpp @@ -1921,8 +1921,8 @@ if (in_parallel) { #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - KMP_TEST_THEN_OR32(CCAST(kmp_int32 *, &queue->tq_flags), - (kmp_int32)TQF_ALL_TASKS_QUEUED); + KMP_TEST_THEN_OR32(RCAST(volatile kmp_uint32 *, &queue->tq_flags), + TQF_ALL_TASKS_QUEUED); #else { __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); @@ -1952,8 +1952,8 @@ queue->tq_flags |= TQF_IS_LAST_TASK; } else { #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - KMP_TEST_THEN_OR32(CCAST(kmp_int32 *, &queue->tq_flags), - (kmp_int32)TQF_IS_LAST_TASK); + KMP_TEST_THEN_OR32(RCAST(volatile kmp_uint32 *, &queue->tq_flags), + TQF_IS_LAST_TASK); #else { __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); Index: runtime/src/kmp_wait_release.h =================================================================== --- runtime/src/kmp_wait_release.h +++ runtime/src/kmp_wait_release.h @@ -235,7 +235,7 @@ in_pool = !!TCR_4(this_thr->th.th_in_pool); if (in_pool != !!this_thr->th.th_active_in_pool) { if (in_pool) { // Recently transferred from team to pool - KMP_TEST_THEN_INC32(CCAST(kmp_int32 *, &__kmp_thread_pool_active_nth)); + KMP_TEST_THEN_INC32(&__kmp_thread_pool_active_nth); this_thr->th.th_active_in_pool = TRUE; /* Here, we cannot assert that: KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= @@ -245,7 +245,7 @@ inc/dec'd asynchronously by the workers. The two can get out of sync for brief periods of time. */ } else { // Recently transferred from pool to team - KMP_TEST_THEN_DEC32(CCAST(kmp_int32 *, &__kmp_thread_pool_active_nth)); + KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth); KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); this_thr->th.th_active_in_pool = FALSE; } @@ -374,13 +374,13 @@ static const flag_type t = flag32; static inline flag_t tcr(flag_t f) { return TCR_4(f); } static inline flag_t test_then_add4(volatile flag_t *f) { - return KMP_TEST_THEN_ADD4_32(RCAST(kmp_int32 *, CCAST(flag_t *, f))); + return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f)); } static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_OR32(CCAST(flag_t *, f), v); + return KMP_TEST_THEN_OR32(f, v); } static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_AND32(CCAST(flag_t *, f), v); + return KMP_TEST_THEN_AND32(f, v); } }; @@ -389,13 +389,13 @@ static const flag_type t = flag64; static inline flag_t tcr(flag_t f) { return TCR_8(f); } static inline flag_t test_then_add4(volatile flag_t *f) { - return KMP_TEST_THEN_ADD4_64(RCAST(kmp_int64 *, CCAST(flag_t *, f))); + return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f)); } static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_OR64(CCAST(flag_t *, f), v); + return KMP_TEST_THEN_OR64(f, v); } static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { - return KMP_TEST_THEN_AND64(CCAST(flag_t *, f), v); + return KMP_TEST_THEN_AND64(f, v); } }; @@ -562,7 +562,7 @@ itt_sync_obj; /**< ITT object that must be passed to new flag location. */ #endif unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { - return RCAST(unsigned char *, CCAST(kmp_uint64 *, loc))[offset]; + return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset]; } public: @@ -626,16 +626,14 @@ } else { kmp_uint64 mask = 0; byteref(&mask, offset) = 1; - KMP_TEST_THEN_OR64(CCAST(kmp_uint64 *, get()), mask); + KMP_TEST_THEN_OR64(get(), mask); } } kmp_uint64 set_sleeping() { - return KMP_TEST_THEN_OR64(CCAST(kmp_uint64 *, get()), - KMP_BARRIER_SLEEP_STATE); + return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE); } kmp_uint64 unset_sleeping() { - return KMP_TEST_THEN_AND64(CCAST(kmp_uint64 *, get()), - ~KMP_BARRIER_SLEEP_STATE); + return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE); } bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; Index: runtime/src/thirdparty/ittnotify/ittnotify_config.h =================================================================== --- runtime/src/thirdparty/ittnotify/ittnotify_config.h +++ runtime/src/thirdparty/ittnotify/ittnotify_config.h @@ -300,8 +300,8 @@ { long result; __asm__ __volatile__("lock\nxadd %0,%1" - : "=r"(result),"=m"(*(int*)ptr) - : "0"(addend), "m"(*(int*)ptr) + : "=r"(result),"=m"(*(volatile int*)ptr) + : "0"(addend), "m"(*(volatile int*)ptr) : "memory"); return result; } Index: runtime/src/z_Linux_util.cpp =================================================================== --- runtime/src/z_Linux_util.cpp +++ runtime/src/z_Linux_util.cpp @@ -336,8 +336,8 @@ return old_value; } -kmp_int32 __kmp_test_then_or32(volatile kmp_int32 *p, kmp_int32 d) { - kmp_int32 old_value, new_value; +kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) { + kmp_uint32 old_value, new_value; old_value = TCR_4(*p); new_value = old_value | d; @@ -350,8 +350,8 @@ return old_value; } -kmp_int32 __kmp_test_then_and32(volatile kmp_int32 *p, kmp_int32 d) { - kmp_int32 old_value, new_value; +kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) { + kmp_uint32 old_value, new_value; old_value = TCR_4(*p); new_value = old_value & d; @@ -394,8 +394,8 @@ } #endif /* KMP_ARCH_X86 */ -kmp_int64 __kmp_test_then_or64(volatile kmp_int64 *p, kmp_int64 d) { - kmp_int64 old_value, new_value; +kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) { + kmp_uint64 old_value, new_value; old_value = TCR_8(*p); new_value = old_value | d; @@ -407,8 +407,8 @@ return old_value; } -kmp_int64 __kmp_test_then_and64(volatile kmp_int64 *p, kmp_int64 d) { - kmp_int64 old_value, new_value; +kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) { + kmp_uint64 old_value, new_value; old_value = TCR_8(*p); new_value = old_value & d; @@ -1460,8 +1460,7 @@ th->th.th_active = FALSE; if (th->th.th_active_in_pool) { th->th.th_active_in_pool = FALSE; - KMP_TEST_THEN_DEC32( - CCAST(kmp_int32 *, &__kmp_thread_pool_active_nth)); + KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth); KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); } deactivated = TRUE; @@ -1517,7 +1516,7 @@ if (deactivated) { th->th.th_active = TRUE; if (TCR_4(th->th.th_in_pool)) { - KMP_TEST_THEN_INC32(CCAST(kmp_int32 *, &__kmp_thread_pool_active_nth)); + KMP_TEST_THEN_INC32(&__kmp_thread_pool_active_nth); th->th.th_active_in_pool = TRUE; } } Index: runtime/src/z_Windows_NT-586_util.cpp =================================================================== --- runtime/src/z_Windows_NT-586_util.cpp +++ runtime/src/z_Windows_NT-586_util.cpp @@ -47,13 +47,14 @@ return old_value; } -kmp_int32 __kmp_test_then_or32(volatile kmp_int32 *p, kmp_int32 d) { - kmp_int32 old_value, new_value; +kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) { + kmp_uint32 old_value, new_value; old_value = TCR_4(*p); new_value = old_value | d; - while (!__kmp_compare_and_store32(p, old_value, new_value)) { + while (!__kmp_compare_and_store32((volatile kmp_int32 *)p, old_value, + new_value)) { KMP_CPU_PAUSE(); old_value = TCR_4(*p); new_value = old_value | d; @@ -61,13 +62,14 @@ return old_value; } -kmp_int32 __kmp_test_then_and32(volatile kmp_int32 *p, kmp_int32 d) { - kmp_int32 old_value, new_value; +kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) { + kmp_uint32 old_value, new_value; old_value = TCR_4(*p); new_value = old_value & d; - while (!__kmp_compare_and_store32(p, old_value, new_value)) { + while (!__kmp_compare_and_store32((volatile kmp_int32 *)p, old_value, + new_value)) { KMP_CPU_PAUSE(); old_value = TCR_4(*p); new_value = old_value & d; @@ -103,12 +105,13 @@ } #endif /* KMP_ARCH_X86 */ -kmp_int64 __kmp_test_then_or64(volatile kmp_int64 *p, kmp_int64 d) { - kmp_int64 old_value, new_value; +kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) { + kmp_uint64 old_value, new_value; old_value = TCR_8(*p); new_value = old_value | d; - while (!__kmp_compare_and_store64(p, old_value, new_value)) { + while (!__kmp_compare_and_store64((volatile kmp_int64 *)p, old_value, + new_value)) { KMP_CPU_PAUSE(); old_value = TCR_8(*p); new_value = old_value | d; @@ -117,12 +120,13 @@ return old_value; } -kmp_int64 __kmp_test_then_and64(volatile kmp_int64 *p, kmp_int64 d) { - kmp_int64 old_value, new_value; +kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) { + kmp_uint64 old_value, new_value; old_value = TCR_8(*p); new_value = old_value & d; - while (!__kmp_compare_and_store64(p, old_value, new_value)) { + while (!__kmp_compare_and_store64((volatile kmp_int64 *)p, old_value, + new_value)) { KMP_CPU_PAUSE(); old_value = TCR_8(*p); new_value = old_value & d;