Diff 358312

openmp/runtime/cmake/config-ix.cmake

	Show First 20 Lines • Show All 103 Lines • ▼ Show 20 Lines
	# Check for Unix shared memory			# Check for Unix shared memory
	check_symbol_exists(shm_open "sys/mman.h" LIBOMP_HAVE_SHM_OPEN_NO_LRT)			check_symbol_exists(shm_open "sys/mman.h" LIBOMP_HAVE_SHM_OPEN_NO_LRT)
	if (NOT LIBOMP_HAVE_SHM_OPEN_NO_LRT)			if (NOT LIBOMP_HAVE_SHM_OPEN_NO_LRT)
	set(CMAKE_REQUIRED_LIBRARIES -lrt)			set(CMAKE_REQUIRED_LIBRARIES -lrt)
	check_symbol_exists(shm_open "sys/mman.h" LIBOMP_HAVE_SHM_OPEN_WITH_LRT)			check_symbol_exists(shm_open "sys/mman.h" LIBOMP_HAVE_SHM_OPEN_WITH_LRT)
	set(CMAKE_REQUIRED_LIBRARIES)			set(CMAKE_REQUIRED_LIBRARIES)
	endif()			endif()

				# Check for aligned memory allocator function
				check_include_file(xmmintrin.h LIBOMP_HAVE_XMMINTRIN_H)
				set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
				if (LIBOMP_HAVE_XMMINTRIN_H)
				set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -DLIBOMP_HAVE_XMMINTRIN_H")
				endif()
				set(source_code "// check for _mm_malloc
				#ifdef LIBOMP_HAVE_XMMINTRIN_H
				#include <xmmintrin.h>
				#endif
				int main() { void ptr = _mm_malloc(sizeof(int) 1000, 64); _mm_free(ptr); return 0; }")
				check_cxx_source_compiles("${source_code}" LIBOMP_HAVE__MM_MALLOC)
				set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
				check_symbol_exists(aligned_alloc "stdlib.h" LIBOMP_HAVE_ALIGNED_ALLOC)
				check_symbol_exists(posix_memalign "stdlib.h" LIBOMP_HAVE_POSIX_MEMALIGN)
				check_symbol_exists(_aligned_malloc "malloc.h" LIBOMP_HAVE__ALIGNED_MALLOC)

	# Check linker flags			# Check linker flags
	if(WIN32)			if(WIN32)
	libomp_check_linker_flag(/SAFESEH LIBOMP_HAVE_SAFESEH_FLAG)			libomp_check_linker_flag(/SAFESEH LIBOMP_HAVE_SAFESEH_FLAG)
	elseif(NOT APPLE)			elseif(NOT APPLE)
	libomp_check_linker_flag(-Wl,-x LIBOMP_HAVE_X_FLAG)			libomp_check_linker_flag(-Wl,-x LIBOMP_HAVE_X_FLAG)
	libomp_check_linker_flag(-Wl,--warn-shared-textrel LIBOMP_HAVE_WARN_SHARED_TEXTREL_FLAG)			libomp_check_linker_flag(-Wl,--warn-shared-textrel LIBOMP_HAVE_WARN_SHARED_TEXTREL_FLAG)
	libomp_check_linker_flag(-Wl,--as-needed LIBOMP_HAVE_AS_NEEDED_FLAG)			libomp_check_linker_flag(-Wl,--as-needed LIBOMP_HAVE_AS_NEEDED_FLAG)
	libomp_check_linker_flag("-Wl,--version-script=${LIBOMP_SRC_DIR}/exports_so.txt" LIBOMP_HAVE_VERSION_SCRIPT_FLAG)			libomp_check_linker_flag("-Wl,--version-script=${LIBOMP_SRC_DIR}/exports_so.txt" LIBOMP_HAVE_VERSION_SCRIPT_FLAG)
	▲ Show 20 Lines • Show All 223 Lines • Show Last 20 Lines

openmp/runtime/src/i18n/en_US.txt

	Show First 20 Lines • Show All 263 Lines • ▼ Show 20 Lines
	FatalSysError "Fatal system error detected."			FatalSysError "Fatal system error detected."
	OutOfHeapMemory "Out of heap memory."			OutOfHeapMemory "Out of heap memory."
	OBSOLETE "Clearing __KMP_REGISTERED_LIB env var failed."			OBSOLETE "Clearing __KMP_REGISTERED_LIB env var failed."
	OBSOLETE "Registering library with env var failed."			OBSOLETE "Registering library with env var failed."
	Using_int_Value "%1$s value \"%2$d\" will be used."			Using_int_Value "%1$s value \"%2$d\" will be used."
	Using_uint_Value "%1$s value \"%2$u\" will be used."			Using_uint_Value "%1$s value \"%2$u\" will be used."
	Using_uint64_Value "%1$s value \"%2$s\" will be used."			Using_uint64_Value "%1$s value \"%2$s\" will be used."
	Using_str_Value "%1$s value \"%2$s\" will be used."			Using_str_Value "%1$s value \"%2$s\" will be used."
				BarrierPatternOverride "Mixing other barrier patterns with dist is prohibited. Using dist for all barrier patterns."
	MaxValueUsing "%1$s maximum value \"%2$d\" will be used."			MaxValueUsing "%1$s maximum value \"%2$d\" will be used."
	MinValueUsing "%1$s minimum value \"%2$d\" will be used."			MinValueUsing "%1$s minimum value \"%2$d\" will be used."
	MemoryAllocFailed "Memory allocation failed."			MemoryAllocFailed "Memory allocation failed."
	FileNameTooLong "File name too long."			FileNameTooLong "File name too long."
	OBSOLETE "Lock table overflow."			OBSOLETE "Lock table overflow."
	ManyThreadsForTPDirective "Too many threads to use threadprivate directive."			ManyThreadsForTPDirective "Too many threads to use threadprivate directive."
	AffinityInvalidMask "%1$s: invalid mask."			AffinityInvalidMask "%1$s: invalid mask."
	WrongDefinition "Wrong definition."			WrongDefinition "Wrong definition."
	▲ Show 20 Lines • Show All 253 Lines • Show Last 20 Lines

openmp/runtime/src/kmp.h

Show First 20 Lines • Show All 109 Lines • ▼ Show 20 Lines

#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64		#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
#include <xmmintrin.h>		#include <xmmintrin.h>
#endif		#endif

#include "kmp_debug.h"		#include "kmp_debug.h"
#include "kmp_lock.h"		#include "kmp_lock.h"
#include "kmp_version.h"		#include "kmp_version.h"
		#include "kmp_barrier.h"
#if USE_DEBUGGER		#if USE_DEBUGGER
#include "kmp_debugger.h"		#include "kmp_debugger.h"
#endif		#endif
#include "kmp_i18n.h"		#include "kmp_i18n.h"

#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX \|\| KMP_OS_WINDOWS)		#define KMP_HANDLE_SIGNALS (KMP_OS_UNIX \|\| KMP_OS_WINDOWS)

#include "kmp_wrapper_malloc.h"		#include "kmp_wrapper_malloc.h"
▲ Show 20 Lines • Show All 132 Lines • ▼ Show 20 Lines
typedef struct kmp_taskdata kmp_taskdata_t;		typedef struct kmp_taskdata kmp_taskdata_t;
typedef union kmp_task_team kmp_task_team_t;		typedef union kmp_task_team kmp_task_team_t;
typedef union kmp_team kmp_team_p;		typedef union kmp_team kmp_team_p;
typedef union kmp_info kmp_info_p;		typedef union kmp_info kmp_info_p;
typedef union kmp_root kmp_root_p;		typedef union kmp_root kmp_root_p;

template <bool C = false, bool S = true> class kmp_flag_32;		template <bool C = false, bool S = true> class kmp_flag_32;
template <bool C = false, bool S = true> class kmp_flag_64;		template <bool C = false, bool S = true> class kmp_flag_64;
		template <bool C = false, bool S = true> class kmp_atomic_flag_64;
class kmp_flag_oncore;		class kmp_flag_oncore;

#ifdef __cplusplus		#ifdef __cplusplus
extern "C" {		extern "C" {
#endif		#endif

/* ------------------------------------------------------------------------ */		/* ------------------------------------------------------------------------ */

▲ Show 20 Lines • Show All 1,600 Lines • ▼ Show 20 Lines	#define KMP_BARRIER_SWITCH_TO_OWN_FLAG \
3 // Special state; tells worker to shift from parent to own b_go		3 // Special state; tells worker to shift from parent to own b_go
#define KMP_BARRIER_SWITCHING \		#define KMP_BARRIER_SWITCHING \
4 // Special state; worker resets appropriate flag on wake-up		4 // Special state; worker resets appropriate flag on wake-up

#define KMP_NOT_SAFE_TO_REAP \		#define KMP_NOT_SAFE_TO_REAP \
0 // Thread th_reap_state: not safe to reap (tasking)		0 // Thread th_reap_state: not safe to reap (tasking)
#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)		#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)

		// The flag_type describes the storage used for the flag.
		enum flag_type {
		flag32, /*< atomic 32 bit flags /
		flag64, /*< 64 bit flags /
		atomic_flag64, /*< atomic 64 bit flags /
		flag_oncore, /*< special 64-bit flag for on-core barrier (hierarchical) /
		flag_unset
		};

enum barrier_type {		enum barrier_type {
bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction		bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
barriers if enabled) */		barriers if enabled) */
bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */		bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
#if KMP_FAST_REDUCTION_BARRIER		#if KMP_FAST_REDUCTION_BARRIER
bs_reduction_barrier, /* 2, All barriers that are used in reduction */		bs_reduction_barrier, /* 2, All barriers that are used in reduction */
#endif // KMP_FAST_REDUCTION_BARRIER		#endif // KMP_FAST_REDUCTION_BARRIER
bs_last_barrier /* Just a placeholder to mark the end */		bs_last_barrier /* Just a placeholder to mark the end */
};		};

// to work with reduction barriers just like with plain barriers		// to work with reduction barriers just like with plain barriers
#if !KMP_FAST_REDUCTION_BARRIER		#if !KMP_FAST_REDUCTION_BARRIER
#define bs_reduction_barrier bs_plain_barrier		#define bs_reduction_barrier bs_plain_barrier
#endif // KMP_FAST_REDUCTION_BARRIER		#endif // KMP_FAST_REDUCTION_BARRIER

typedef enum kmp_bar_pat { /* Barrier communication patterns */		typedef enum kmp_bar_pat { /* Barrier communication patterns */
bp_linear_bar =		bp_linear_bar =
0, /* Single level (degenerate) tree */		0, /* Single level (degenerate) tree */
bp_tree_bar =		bp_tree_bar =
1, /* Balanced tree with branching factor 2^n */		1, /* Balanced tree with branching factor 2^n */
bp_hyper_bar = 2, /* Hypercube-embedded tree with min		bp_hyper_bar = 2, /* Hypercube-embedded tree with min
branching factor 2^n */		branching factor 2^n */
bp_hierarchical_bar = 3, /* Machine hierarchy tree */		bp_hierarchical_bar = 3, /* Machine hierarchy tree */
		bp_dist_bar = 4, /* Distributed barrier */
bp_last_bar /* Placeholder to mark the end */		bp_last_bar /* Placeholder to mark the end */
} kmp_bar_pat_e;		} kmp_bar_pat_e;

#define KMP_BARRIER_ICV_PUSH 1		#define KMP_BARRIER_ICV_PUSH 1

/* Record for holding the values of the internal controls stack records */		/* Record for holding the values of the internal controls stack records */
typedef struct kmp_internal_control {		typedef struct kmp_internal_control {
int serial_nesting_level; /* corresponds to the value of the		int serial_nesting_level; /* corresponds to the value of the
▲ Show 20 Lines • Show All 708 Lines • ▼ Show 20 Lines	#endif

/* The following are also read by the primary thread during reinit */		/* The following are also read by the primary thread during reinit */
struct common_table *th_pri_common;		struct common_table *th_pri_common;

volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */		volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
/* while awaiting queuing lock acquire */		/* while awaiting queuing lock acquire */

volatile void *th_sleep_loc; // this points at a kmp_flag<T>		volatile void *th_sleep_loc; // this points at a kmp_flag<T>
		flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc

ident_t *th_ident;		ident_t *th_ident;
unsigned th_x; // Random number generator data		unsigned th_x; // Random number generator data
unsigned th_a; // Random number generator data		unsigned th_a; // Random number generator data

/* Tasking-related data for the thread */		/* Tasking-related data for the thread */
kmp_task_team_t *th_task_team; // Task team struct		kmp_task_team_t *th_task_team; // Task team struct
kmp_taskdata_t *th_current_task; // Innermost Task being executed		kmp_taskdata_t *th_current_task; // Innermost Task being executed
kmp_uint8 th_task_state; // alternating 0/1 for task team identification		kmp_uint8 th_task_state; // alternating 0/1 for task team identification
kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state		kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
// at nested levels		// at nested levels
kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack		kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack		kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
kmp_uint32 th_reap_state; // Non-zero indicates thread is not		kmp_uint32 th_reap_state; // Non-zero indicates thread is not
// tasking, thus safe to reap		// tasking, thus safe to reap

/* More stuff for keeping track of active/sleeping threads (this part is		/* More stuff for keeping track of active/sleeping threads (this part is
written by the worker thread) */		written by the worker thread) */
kmp_uint8 th_active_in_pool; // included in count of #active threads in pool		kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
int th_active; // ! sleeping; 32 bits for TCR/TCW		int th_active; // ! sleeping; 32 bits for TCR/TCW
		std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team
		// 0 = not used in team; 1 = used in team;
		// 2 = transitioning to not used in team; 3 = transitioning to used in team
struct cons_header *th_cons; // used for consistency check		struct cons_header *th_cons; // used for consistency check
#if KMP_USE_HIER_SCHED		#if KMP_USE_HIER_SCHED
// used for hierarchical scheduling		// used for hierarchical scheduling
kmp_hier_private_bdata_t *th_hier_bar_data;		kmp_hier_private_bdata_t *th_hier_bar_data;
#endif		#endif

/* Add the syncronizing data which is cache aligned and padded. */		/* Add the syncronizing data which is cache aligned and padded. */
KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];		KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];
▲ Show 20 Lines • Show All 163 Lines • ▼ Show 20 Lines	#endif
int t_master_active; // save on fork, restore on join		int t_master_active; // save on fork, restore on join
void *t_copypriv_data; // team specific pointer to copyprivate data array		void *t_copypriv_data; // team specific pointer to copyprivate data array
#if KMP_OS_WINDOWS		#if KMP_OS_WINDOWS
std::atomic<kmp_uint32> t_copyin_counter;		std::atomic<kmp_uint32> t_copyin_counter;
#endif		#endif
#if USE_ITT_BUILD		#if USE_ITT_BUILD
void *t_stack_id; // team specific stack stitching id (for ittnotify)		void *t_stack_id; // team specific stack stitching id (for ittnotify)
#endif /* USE_ITT_BUILD */		#endif /* USE_ITT_BUILD */
		distributedBarrier *b; // Distributed barrier data associated with team
		Lint: Pre-merge checks Inline Actions clang-tidy: error: unknown type name 'distributedBarrier' [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: unknown type name 'distributedBarrier' [clang-diagnostic-error] [[https…
} kmp_base_team_t;		} kmp_base_team_t;

union KMP_ALIGN_CACHE kmp_team {		union KMP_ALIGN_CACHE kmp_team {
kmp_base_team_t t;		kmp_base_team_t t;
double t_align; /* use worst case alignment */		double t_align; /* use worst case alignment */
char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];		char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];
};		};

▲ Show 20 Lines • Show All 1,285 Lines • ▼ Show 20 Lines
#ifdef __cplusplus		#ifdef __cplusplus
}		}
#endif		#endif

template <bool C, bool S>		template <bool C, bool S>
extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);		extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>		template <bool C, bool S>
extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);		extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);
		template <bool C, bool S>
		extern void __kmp_atomic_suspend_64(int th_gtid,
		kmp_atomic_flag_64<C, S> *flag);
extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);		extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
#if KMP_HAVE_MWAIT \|\| KMP_HAVE_UMWAIT		#if KMP_HAVE_MWAIT \|\| KMP_HAVE_UMWAIT
template <bool C, bool S>		template <bool C, bool S>
extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);		extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);
template <bool C, bool S>		template <bool C, bool S>
extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);		extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);
		template <bool C, bool S>
		extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);
extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);		extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);
#endif		#endif
template <bool C, bool S>		template <bool C, bool S>
extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);		extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: function '__kmp_resume_32<false, true>' is within a recursive call chain [misc-no-recursion] not useful Lint: Pre-merge checks: clang-tidy: warning: function '__kmp_resume_32<false, true>' is within a recursive call chain…
template <bool C, bool S>		template <bool C, bool S>
extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);		extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);
		template <bool C, bool S>
		extern void __kmp_atomic_resume_64(int target_gtid,
		kmp_atomic_flag_64<C, S> *flag);
extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);		extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);

template <bool C, bool S>		template <bool C, bool S>
int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,		int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_32<C, S> *flag, int final_spin,		kmp_flag_32<C, S> *flag, int final_spin,
int *thread_finished,		int *thread_finished,
#if USE_ITT_BUILD		#if USE_ITT_BUILD
void *itt_sync_obj,		void *itt_sync_obj,
#endif /* USE_ITT_BUILD */		#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);		kmp_int32 is_constrained);
template <bool C, bool S>		template <bool C, bool S>
int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,		int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_64<C, S> *flag, int final_spin,		kmp_flag_64<C, S> *flag, int final_spin,
int *thread_finished,		int *thread_finished,
#if USE_ITT_BUILD		#if USE_ITT_BUILD
void *itt_sync_obj,		void *itt_sync_obj,
#endif /* USE_ITT_BUILD */		#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);		kmp_int32 is_constrained);
		template <bool C, bool S>
		int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
		kmp_atomic_flag_64<C, S> *flag,
		int final_spin, int *thread_finished,
		#if USE_ITT_BUILD
		void *itt_sync_obj,
		#endif /* USE_ITT_BUILD */
		kmp_int32 is_constrained);
int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,		int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
kmp_flag_oncore *flag, int final_spin,		kmp_flag_oncore *flag, int final_spin,
int *thread_finished,		int *thread_finished,
#if USE_ITT_BUILD		#if USE_ITT_BUILD
void *itt_sync_obj,		void *itt_sync_obj,
#endif /* USE_ITT_BUILD */		#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);		kmp_int32 is_constrained);

▲ Show 20 Lines • Show All 174 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_atomic.cpp

Show First 20 Lines • Show All 726 Lines • ▼ Show 20 Lines	#define OP_CRITICAL(OP, LCK_ID) \
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \		__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
\		\
(*lhs) OP(rhs); \		(*lhs) OP(rhs); \
\		\
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);		__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);

#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \		#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \		__kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
(lhs) = (TYPE)((lhs)OP((TYPE)rhs)); \		(lhs) = (TYPE)((lhs)OP rhs); \
__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);		__kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);

// ------------------------------------------------------------------------		// ------------------------------------------------------------------------
// For GNU compatibility, we may need to use a critical section,		// For GNU compatibility, we may need to use a critical section,
// even though it is not required by the ISA.		// even though it is not required by the ISA.
//		//
// On IA-32 architecture, all atomic operations except for fixed 4 byte add,		// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common		// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines
// Operation on *lhs, rhs using "compare_and_store" routine		// Operation on *lhs, rhs using "compare_and_store" routine
// TYPE - operands' type		// TYPE - operands' type
// BITS - size in bits, used to distinguish low level calls		// BITS - size in bits, used to distinguish low level calls
// OP - operator		// OP - operator
#define OP_CMPXCHG(TYPE, BITS, OP) \		#define OP_CMPXCHG(TYPE, BITS, OP) \
{ \		{ \
TYPE old_value, new_value; \		TYPE old_value, new_value; \
old_value = (TYPE volatile )lhs; \		old_value = (TYPE volatile )lhs; \
new_value = (TYPE)(old_value OP((TYPE)rhs)); \		new_value = (TYPE)(old_value OP rhs); \
while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \		while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
(kmp_int##BITS )lhs, VOLATILE_CAST(kmp_int##BITS *) & old_value, \		(kmp_int##BITS )lhs, VOLATILE_CAST(kmp_int##BITS *) & old_value, \
VOLATILE_CAST(kmp_int##BITS ) & new_value)) { \		VOLATILE_CAST(kmp_int##BITS ) & new_value)) { \
KMP_DO_PAUSE; \		KMP_DO_PAUSE; \
\		\
old_value = (TYPE volatile )lhs; \		old_value = (TYPE volatile )lhs; \
new_value = (TYPE)(old_value OP((TYPE)rhs)); \		new_value = (TYPE)(old_value OP rhs); \
} \		} \
}		}

#if USE_CMPXCHG_FIX		#if USE_CMPXCHG_FIX
// 2007-06-25:		// 2007-06-25:
// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32		// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
// and win_32e are affected (I verified the asm). Compiler ignores the volatile		// and win_32e are affected (I verified the asm). Compiler ignores the volatile
// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the		// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
▲ Show 20 Lines • Show All 2,884 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_barrier.h

This file was added.

				/*
				* kmp_barrier.h
				*/

				//===----------------------------------------------------------------------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#ifndef KMP_BARRIER_H
				#define KMP_BARRIER_H

				#include "kmp.h"
				#include "kmp_i18n.h"

				#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
				#include <xmmintrin.h>
				#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
				#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
				#elif KMP_HAVE_ALIGNED_ALLOC
				#define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size)
				#define KMP_ALIGNED_FREE(ptr) free(ptr)
				#elif KMP_HAVE_POSIX_MEMALIGN
				static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
				void *ptr;
				int n = posix_memalign(&ptr, alignment, size);
				if (n != 0) {
				if (ptr)
				free(ptr);
				return nullptr;
				}
				return ptr;
				}
				#define KMP_ALIGNED_FREE(ptr) free(ptr)
				#elif KMP_HAVE__ALIGNED_MALLOC
				#include <malloc.h>
				#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
				#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
				#else
				#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
				#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
				#endif

				// Use four cache lines: MLC tends to prefetch the next or previous cache line
				// creating a possible fake conflict between cores, so this is the only way to
				// guarantee that no such prefetch can happen.
				#ifndef KMP_FOURLINE_ALIGN_CACHE
				#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
				#endif

				#define KMP_OPTIMIZE_FOR_REDUCTIONS 0

				class distributedBarrier {
				struct flags_s {
				kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
				};

				struct go_s {
				std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
				};

				struct iter_s {
				kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
				};

				struct sleep_s {
				std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
				};

				void init(size_t nthr);
				void resize(size_t nthr);
				void computeGo(size_t n);
				void computeVarsForN(size_t n);

				public:
				enum {
				MAX_ITERS = 3,
				MAX_GOS = 8,
				IDEAL_GOS = 4,
				IDEAL_CONTENTION = 16,
				};

				flags_s *flags[MAX_ITERS];
				go_s *go;
				iter_s *iter;
				sleep_s *sleep;

				size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
				size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
				// number of go signals each requiring one write per iteration
				size_t KMP_ALIGN_CACHE num_gos;
				// number of groups of gos
				size_t KMP_ALIGN_CACHE num_groups;
				// threads per go signal
				size_t KMP_ALIGN_CACHE threads_per_go;
				bool KMP_ALIGN_CACHE fix_threads_per_go;
				// threads per group
				size_t KMP_ALIGN_CACHE threads_per_group;
				// number of go signals in a group
				size_t KMP_ALIGN_CACHE gos_per_group;
				void *team_icvs;

				distributedBarrier() = delete;
				~distributedBarrier() = delete;

				// Used instead of constructor to create aligned data
				static distributedBarrier *allocate(int nThreads) {
				distributedBarrier d = (distributedBarrier )KMP_ALIGNED_ALLOCATE(
				sizeof(distributedBarrier), 4 * CACHE_LINE);
				if (!d) {
				KMP_FATAL(MemoryAllocFailed);
				}
				d->num_threads = 0;
				d->max_threads = 0;
				for (int i = 0; i < MAX_ITERS; ++i)
				d->flags[i] = NULL;
				d->go = NULL;
				d->iter = NULL;
				d->sleep = NULL;
				d->team_icvs = NULL;
				d->fix_threads_per_go = false;
				// calculate gos and groups ONCE on base size
				d->computeGo(nThreads);
				d->init(nThreads);
				return d;
				}

				static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }

				void update_num_threads(size_t nthr) { init(nthr); }

				bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
				size_t get_num_threads() { return num_threads; }
				kmp_uint64 go_release();
				void go_reset();
				};

				#endif // KMP_BARRIER_H

openmp/runtime/src/kmp_barrier.cpp

/*		/*
* kmp_barrier.cpp		* kmp_barrier.cpp
*/		*/

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.		// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.		// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception		// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "kmp.h"
#include "kmp_wait_release.h"		#include "kmp_wait_release.h"
		#include "kmp_barrier.h"
#include "kmp_itt.h"		#include "kmp_itt.h"
#include "kmp_os.h"		#include "kmp_os.h"
#include "kmp_stats.h"		#include "kmp_stats.h"
#include "ompt-specific.h"		#include "ompt-specific.h"
		// for distributed barrier
		#include "kmp_affinity.h"

#if KMP_MIC		#if KMP_MIC
#include <immintrin.h>		#include <immintrin.h>
#define USE_NGO_STORES 1		#define USE_NGO_STORES 1
#endif // KMP_MIC		#endif // KMP_MIC

#if KMP_MIC && USE_NGO_STORES		#if KMP_MIC && USE_NGO_STORES
// ICV copying		// ICV copying
#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))		#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)		#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)		#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
#define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")		#define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
#else		#else
#define ngo_load(src) ((void)0)		#define ngo_load(src) ((void)0)
#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))		#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)		#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
#define ngo_sync() ((void)0)		#define ngo_sync() ((void)0)
#endif /* KMP_MIC && USE_NGO_STORES */		#endif /* KMP_MIC && USE_NGO_STORES */

void __kmp_print_structure(void); // Forward declaration		void __kmp_print_structure(void); // Forward declaration

// ---------------------------- Barrier Algorithms ----------------------------		// ---------------------------- Barrier Algorithms ----------------------------
		// Distributed barrier

		// Compute how many threads to have polling each cache-line.
		// We want to limit the number of writes to IDEAL_GO_RESOLUTION.
		void distributedBarrier::computeVarsForN(size_t n) {
		int nsockets = 1;
		if (__kmp_topology) {
		int socket_level = __kmp_topology->get_level(KMP_HW_SOCKET);
		int core_level = __kmp_topology->get_level(KMP_HW_CORE);
		int ncores_per_socket =
		__kmp_topology->calculate_ratio(core_level, socket_level);
		nsockets = __kmp_topology->get_count(socket_level);

		if (nsockets <= 0)
		nsockets = 1;
		if (ncores_per_socket <= 0)
		ncores_per_socket = 1;

		threads_per_go = ncores_per_socket >> 1;
		if (!fix_threads_per_go) {
		// Minimize num_gos
		if (threads_per_go > 4) {
		if (KMP_OPTIMIZE_FOR_REDUCTIONS) {
		threads_per_go = threads_per_go >> 1;
		}
		if (threads_per_go > 4 && nsockets == 1)
		threads_per_go = threads_per_go >> 1;
		}
		}
		if (threads_per_go == 0)
		threads_per_go = 1;
		fix_threads_per_go = true;
		num_gos = n / threads_per_go;
		if (n % threads_per_go)
		num_gos++;
		if (nsockets == 1 \|\| num_gos == 1)
		num_groups = 1;
		else {
		num_groups = num_gos / nsockets;
		if (num_gos % nsockets)
		num_groups++;
		}
		if (num_groups <= 0)
		num_groups = 1;
		gos_per_group = num_gos / num_groups;
		if (num_gos % num_groups)
		gos_per_group++;
		threads_per_group = threads_per_go * gos_per_group;
		} else {
		num_gos = n / threads_per_go;
		if (n % threads_per_go)
		num_gos++;
		if (num_gos == 1)
		num_groups = 1;
		else {
		num_groups = num_gos / 2;
		if (num_gos % 2)
		num_groups++;
		}
		gos_per_group = num_gos / num_groups;
		if (num_gos % num_groups)
		gos_per_group++;
		threads_per_group = threads_per_go * gos_per_group;
		}
		}

		void distributedBarrier::computeGo(size_t n) {
		// Minimize num_gos
		for (num_gos = 1;; num_gos++)
		if (IDEAL_CONTENTION * num_gos >= n)
		break;
		threads_per_go = n / num_gos;
		if (n % num_gos)
		threads_per_go++;
		while (num_gos > MAX_GOS) {
		threads_per_go++;
		num_gos = n / threads_per_go;
		if (n % threads_per_go)
		num_gos++;
		}
		computeVarsForN(n);
		}

		// This function is to resize the barrier arrays when the new number of threads
		// exceeds max_threads, which is the current size of all the arrays
		void distributedBarrier::resize(size_t nthr) {
		KMP_DEBUG_ASSERT(nthr > max_threads);

		// expand to requested size * 2
		max_threads = nthr * 2;

		// allocate arrays to new max threads
		for (int i = 0; i < MAX_ITERS; ++i) {
		if (flags[i])
		flags[i] = (flags_s *)KMP_INTERNAL_REALLOC(flags[i],
		max_threads * sizeof(flags_s));
		else
		flags[i] = (flags_s )KMP_INTERNAL_MALLOC(max_threads sizeof(flags_s));
		}

		if (go)
		go = (go_s )KMP_INTERNAL_REALLOC(go, max_threads sizeof(go_s));
		else
		go = (go_s )KMP_INTERNAL_MALLOC(max_threads sizeof(go_s));

		if (iter)
		iter = (iter_s )KMP_INTERNAL_REALLOC(iter, max_threads sizeof(iter_s));
		else
		iter = (iter_s )KMP_INTERNAL_MALLOC(max_threads sizeof(iter_s));

		if (sleep)
		sleep =
		(sleep_s )KMP_INTERNAL_REALLOC(sleep, max_threads sizeof(sleep_s));
		else
		sleep = (sleep_s )KMP_INTERNAL_MALLOC(max_threads sizeof(sleep_s));
		}

		// This function is to set all the go flags that threads might be waiting
		// on, and when blocktime is not infinite, it should be followed by a wake-up
		// call to each thread
		kmp_uint64 distributedBarrier::go_release() {
		kmp_uint64 next_go = iter[0].iter + distributedBarrier::MAX_ITERS;
		for (size_t j = 0; j < num_gos; j++) {
		go[j].go.store(next_go);
		}
		return next_go;
		}

		void distributedBarrier::go_reset() {
		for (size_t j = 0; j < max_threads; ++j) {
		for (size_t i = 0; i < distributedBarrier::MAX_ITERS; ++i) {
		flags[i][j].stillNeed = 1;
		}
		go[j].go.store(0);
		iter[j].iter = 0;
		}
		}

		// This function inits/re-inits the distributed barrier for a particular number
		// of threads. If a resize of arrays is needed, it calls the resize function.
		void distributedBarrier::init(size_t nthr) {
		size_t old_max = max_threads;
		if (nthr > max_threads) { // need more space in arrays
		resize(nthr);
		}

		for (size_t i = 0; i < max_threads; i++) {
		for (size_t j = 0; j < distributedBarrier::MAX_ITERS; j++) {
		flags[j][i].stillNeed = 1;
		}
		go[i].go.store(0);
		iter[i].iter = 0;
		if (i >= old_max)
		sleep[i].sleep = false;
		}

		// Recalculate num_gos, etc. based on new nthr
		computeVarsForN(nthr);

		num_threads = nthr;

		if (team_icvs == NULL)
		team_icvs = __kmp_allocate(sizeof(kmp_internal_control_t));
		}

		// This function is used only when KMP_BLOCKTIME is not infinite.
		// static
		void __kmp_dist_barrier_wakeup(enum barrier_type bt, kmp_team_t *team,
		size_t start, size_t stop, size_t inc,
		size_t tid) {
		KMP_DEBUG_ASSERT(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME);
		if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
		return;

		kmp_info_t **other_threads = team->t.t_threads;
		for (size_t thr = start; thr < stop; thr += inc) {
		KMP_DEBUG_ASSERT(other_threads[thr]);
		int gtid = other_threads[thr]->th.th_info.ds.ds_gtid;
		// Wake up worker regardless of if it appears to be sleeping or not
		__kmp_atomic_resume_64(gtid, (kmp_atomic_flag_64<> *)NULL);
		}
		}

		static void __kmp_dist_barrier_gather(
		enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
		void (reduce)(void , void ) USE_ITT_BUILD_ARG(void itt_sync_obj)) {
		KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_gather);
		kmp_team_t *team;
		distributedBarrier *b;
		kmp_info_t **other_threads;
		kmp_uint64 my_current_iter, my_next_iter;
		kmp_uint32 nproc;
		bool group_leader;

		team = this_thr->th.th_team;
		nproc = this_thr->th.th_team_nproc;
		other_threads = team->t.t_threads;
		b = team->t.b;
		my_current_iter = b->iter[tid].iter;
		my_next_iter = (my_current_iter + 1) % distributedBarrier::MAX_ITERS;
		group_leader = ((tid % b->threads_per_group) == 0);

		KA_TRACE(20,
		("__kmp_dist_barrier_gather: T#%d(%d:%d) enter; barrier type %d\n",
		gtid, team->t.t_id, tid, bt));

		#if USE_ITT_BUILD && USE_ITT_NOTIFY
		// Barrier imbalance - save arrive time to the thread
		if (__kmp_forkjoin_frames_mode == 3 \|\| __kmp_forkjoin_frames_mode == 2) {
		this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
		__itt_get_timestamp();
		}
		#endif

		if (group_leader) {
		// Start from the thread after the group leader
		size_t group_start = tid + 1;
		size_t group_end = tid + b->threads_per_group;
		size_t threads_pending = 0;

		if (group_end > nproc)
		group_end = nproc;
		do { // wait for threads in my group
		threads_pending = 0;
		// Check all the flags every time to avoid branch misspredict
		for (size_t thr = group_start; thr < group_end; thr++) {
		// Each thread uses a different cache line
		threads_pending += b->flags[my_current_iter][thr].stillNeed;
		}
		// Execute tasks here
		if (__kmp_tasking_mode != tskm_immediate_exec) {
		kmp_task_team_t *task_team = this_thr->th.th_task_team;
		if (task_team != NULL) {
		if (TCR_SYNC_4(task_team->tt.tt_active)) {
		if (KMP_TASKING_ENABLED(task_team)) {
		int tasks_completed = FALSE;
		__kmp_atomic_execute_tasks_64(
		this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
		&tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
		} else
		this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
		}
		} else {
		this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
		} // if
		}
		if (TCR_4(__kmp_global.g.g_done)) {
		if (__kmp_global.g.g_abort)
		__kmp_abort_thread();
		break;
		} else if (__kmp_tasking_mode != tskm_immediate_exec &&
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: do not use 'else' after 'break' [llvm-else-after-return] not useful Lint: Pre-merge checks: clang-tidy: warning: do not use 'else' after 'break' [llvm-else-after-return] [[https://github.
		this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
		this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
		}
		} while (threads_pending > 0);

		if (reduce) { // Perform reduction if needed
		OMPT_REDUCTION_DECL(this_thr, gtid);
		OMPT_REDUCTION_BEGIN;
		// Group leader reduces all threads in group
		for (size_t thr = group_start; thr < group_end; thr++) {
		(*reduce)(this_thr->th.th_local.reduce_data,
		other_threads[thr]->th.th_local.reduce_data);
		}
		OMPT_REDUCTION_END;
		}

		// Set flag for next iteration
		b->flags[my_next_iter][tid].stillNeed = 1;
		// Each thread uses a different cache line; resets stillNeed to 0 to
		// indicate it has reached the barrier
		b->flags[my_current_iter][tid].stillNeed = 0;

		do { // wait for all group leaders
		threads_pending = 0;
		for (size_t thr = 0; thr < nproc; thr += b->threads_per_group) {
		threads_pending += b->flags[my_current_iter][thr].stillNeed;
		}
		// Execute tasks here
		if (__kmp_tasking_mode != tskm_immediate_exec) {
		kmp_task_team_t *task_team = this_thr->th.th_task_team;
		if (task_team != NULL) {
		if (TCR_SYNC_4(task_team->tt.tt_active)) {
		if (KMP_TASKING_ENABLED(task_team)) {
		int tasks_completed = FALSE;
		__kmp_atomic_execute_tasks_64(
		this_thr, gtid, (kmp_atomic_flag_64<> *)NULL, FALSE,
		&tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
		} else
		this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
		}
		} else {
		this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
		} // if
		}
		if (TCR_4(__kmp_global.g.g_done)) {
		if (__kmp_global.g.g_abort)
		__kmp_abort_thread();
		break;
		} else if (__kmp_tasking_mode != tskm_immediate_exec &&
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: do not use 'else' after 'break' [llvm-else-after-return] not useful Lint: Pre-merge checks: clang-tidy: warning: do not use 'else' after 'break' [llvm-else-after-return] [[https://github.
		this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
		this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
		}
		} while (threads_pending > 0);

		if (reduce) { // Perform reduction if needed
		if (KMP_MASTER_TID(tid)) { // Master reduces over group leaders
		OMPT_REDUCTION_DECL(this_thr, gtid);
		OMPT_REDUCTION_BEGIN;
		for (size_t thr = b->threads_per_group; thr < nproc;
		thr += b->threads_per_group) {
		(*reduce)(this_thr->th.th_local.reduce_data,
		other_threads[thr]->th.th_local.reduce_data);
		}
		OMPT_REDUCTION_END;
		}
		}
		} else {
		// Set flag for next iteration
		b->flags[my_next_iter][tid].stillNeed = 1;
		// Each thread uses a different cache line; resets stillNeed to 0 to
		// indicate it has reached the barrier
		b->flags[my_current_iter][tid].stillNeed = 0;
		}

		KMP_MFENCE();

		KA_TRACE(20,
		("__kmp_dist_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
		gtid, team->t.t_id, tid, bt));
		}

		static void __kmp_dist_barrier_release(
		enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
		int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
		KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_dist_release);
		kmp_team_t *team;
		distributedBarrier *b;
		kmp_bstate_t *thr_bar;
		kmp_uint64 my_current_iter, next_go;
		size_t my_go_index;
		bool group_leader;

		KA_TRACE(20, ("__kmp_dist_barrier_release: T#%d(%d) enter; barrier type %d\n",
		gtid, tid, bt));

		thr_bar = &this_thr->th.th_bar[bt].bb;

		if (!KMP_MASTER_TID(tid)) {
		// workers and non-master group leaders need to check their presence in team
		do {
		if (this_thr->th.th_used_in_team.load() != 1 &&
		this_thr->th.th_used_in_team.load() != 3) {
		// Thread is not in use in a team. Wait on location in tid's thread
		// struct. The 0 value tells anyone looking that this thread is spinning
		// or sleeping until this location becomes 3 again; 3 is the transition
		// state to get to 1 which is waiting on go and being in the team
		kmp_flag_32<false, false> my_flag(&(this_thr->th.th_used_in_team), 3);
		if (KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 2,
		0) \|\|
		this_thr->th.th_used_in_team.load() == 0) {
		my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
		}
		#if USE_ITT_BUILD && USE_ITT_NOTIFY
		if ((__itt_sync_create_ptr && itt_sync_obj == NULL) \|\| KMP_ITT_DEBUG) {
		// In fork barrier where we could not get the object reliably
		itt_sync_obj =
		__kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
		// Cancel wait on previous parallel region...
		__kmp_itt_task_starting(itt_sync_obj);

		if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
		return;

		itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
		if (itt_sync_obj != NULL)
		// Call prepare as early as possible for "new" barrier
		__kmp_itt_task_finished(itt_sync_obj);
		} else
		#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
		if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
		return;
		}
		if (this_thr->th.th_used_in_team.load() != 1 &&
		this_thr->th.th_used_in_team.load() != 3) // spurious wake-up?
		continue;
		if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
		return;

		// At this point, the thread thinks it is in use in a team, or in
		// transition to be used in a team, but it might have reached this barrier
		// before it was marked unused by the team. Unused threads are awoken and
		// shifted to wait on local thread struct elsewhere. It also might reach
		// this point by being picked up for use by a different team. Either way,
		// we need to update the tid.
		tid = __kmp_tid_from_gtid(gtid);
		team = this_thr->th.th_team;
		KMP_DEBUG_ASSERT(tid >= 0);
		KMP_DEBUG_ASSERT(team);
		b = team->t.b;
		my_current_iter = b->iter[tid].iter;
		next_go = my_current_iter + distributedBarrier::MAX_ITERS;
		my_go_index = tid / b->threads_per_go;
		if (this_thr->th.th_used_in_team.load() == 3) {
		KMP_COMPARE_AND_STORE_ACQ32(&(this_thr->th.th_used_in_team), 3, 1);
		}
		// Check if go flag is set
		if (b->go[my_go_index].go.load() != next_go) {
		// Wait on go flag on team
		kmp_atomic_flag_64<false, true> my_flag(
		&(b->go[my_go_index].go), next_go, &(b->sleep[tid].sleep));
		my_flag.wait(this_thr, true USE_ITT_BUILD_ARG(itt_sync_obj));
		KMP_DEBUG_ASSERT(my_current_iter == b->iter[tid].iter \|\|
		b->iter[tid].iter == 0);
		KMP_DEBUG_ASSERT(b->sleep[tid].sleep == false);
		}

		if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
		return;
		// At this point, the thread's go location was set. This means the primary
		// thread is safely in the barrier, and so this thread's data is
		// up-to-date, but we should check again that this thread is really in
		// use in the team, as it could have been woken up for the purpose of
		// changing team size, or reaping threads at shutdown.
		if (this_thr->th.th_used_in_team.load() == 1)
		break;
		} while (1);

		if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
		return;

		group_leader = ((tid % b->threads_per_group) == 0);
		if (group_leader) {
		// Tell all the threads in my group they can go!
		for (size_t go_idx = my_go_index + 1;
		go_idx < my_go_index + b->gos_per_group; go_idx++) {
		b->go[go_idx].go.store(next_go);
		}
		// Fence added so that workers can see changes to go. sfence inadequate.
		KMP_MFENCE();
		}

		#if KMP_BARRIER_ICV_PUSH
		if (propagate_icvs) { // copy ICVs to final dest
		__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
		tid, FALSE);
		copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
		(kmp_internal_control_t *)team->t.b->team_icvs);
		copy_icvs(&thr_bar->th_fixed_icvs,
		&team->t.t_implicit_task_taskdata[tid].td_icvs);
		}
		#endif
		if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && group_leader) {
		// This thread is now awake and participating in the barrier;
		// wake up the other threads in the group
		size_t nproc = this_thr->th.th_team_nproc;
		size_t group_end = tid + b->threads_per_group;
		if (nproc < group_end)
		group_end = nproc;
		__kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
		}
		} else { // Primary thread
		team = this_thr->th.th_team;
		b = team->t.b;
		my_current_iter = b->iter[tid].iter;
		next_go = my_current_iter + distributedBarrier::MAX_ITERS;
		#if KMP_BARRIER_ICV_PUSH
		if (propagate_icvs) {
		// primary thread has ICVs in final destination; copy
		copy_icvs(&thr_bar->th_fixed_icvs,
		&team->t.t_implicit_task_taskdata[tid].td_icvs);
		}
		#endif
		// Tell all the group leaders they can go!
		for (size_t go_idx = 0; go_idx < b->num_gos; go_idx += b->gos_per_group) {
		b->go[go_idx].go.store(next_go);
		}

		if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
		// Wake-up the group leaders
		size_t nproc = this_thr->th.th_team_nproc;
		__kmp_dist_barrier_wakeup(bt, team, tid + b->threads_per_group, nproc,
		b->threads_per_group, tid);
		}

		// Tell all the threads in my group they can go!
		for (size_t go_idx = 1; go_idx < b->gos_per_group; go_idx++) {
		b->go[go_idx].go.store(next_go);
		}

		// Fence added so that workers can see changes to go. sfence inadequate.
		KMP_MFENCE();

		if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
		// Wake-up the other threads in my group
		size_t nproc = this_thr->th.th_team_nproc;
		size_t group_end = tid + b->threads_per_group;
		if (nproc < group_end)
		group_end = nproc;
		__kmp_dist_barrier_wakeup(bt, team, tid + 1, group_end, 1, tid);
		}
		}
		// Update to next iteration
		KMP_ASSERT(my_current_iter == b->iter[tid].iter);
		b->iter[tid].iter = (b->iter[tid].iter + 1) % distributedBarrier::MAX_ITERS;

		KA_TRACE(
		20, ("__kmp_dist_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
		gtid, team->t.t_id, tid, bt));
		}

// Linear Barrier		// Linear Barrier
template <bool cancellable = false>		template <bool cancellable = false>
static bool __kmp_linear_barrier_gather_template(		static bool __kmp_linear_barrier_gather_template(
enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,		enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
void (reduce)(void , void ) USE_ITT_BUILD_ARG(void itt_sync_obj)) {		void (reduce)(void , void ) USE_ITT_BUILD_ARG(void itt_sync_obj)) {
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);		KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
kmp_team_t *team = this_thr->th.th_team;		kmp_team_t *team = this_thr->th.th_team;
▲ Show 20 Lines • Show All 1,300 Lines • ▼ Show 20 Lines	if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
// use 0 to only setup the current team if nthreads > 1		// use 0 to only setup the current team if nthreads > 1
__kmp_task_team_setup(this_thr, team, 0);		__kmp_task_team_setup(this_thr, team, 0);

if (cancellable) {		if (cancellable) {
cancelled = __kmp_linear_barrier_gather_cancellable(		cancelled = __kmp_linear_barrier_gather_cancellable(
bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));		bt, this_thr, gtid, tid, reduce USE_ITT_BUILD_ARG(itt_sync_obj));
} else {		} else {
switch (__kmp_barrier_gather_pattern[bt]) {		switch (__kmp_barrier_gather_pattern[bt]) {
		case bp_dist_bar: {
		__kmp_dist_barrier_gather(bt, this_thr, gtid, tid,
		reduce USE_ITT_BUILD_ARG(itt_sync_obj));
		break;
		}
case bp_hyper_bar: {		case bp_hyper_bar: {
// don't set branch bits to 0; use linear		// don't set branch bits to 0; use linear
KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);		KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]);
__kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,		__kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
reduce USE_ITT_BUILD_ARG(itt_sync_obj));		reduce USE_ITT_BUILD_ARG(itt_sync_obj));
break;		break;
}		}
case bp_hierarchical_bar: {		case bp_hierarchical_bar: {
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines
#endif /* USE_ITT_BUILD */		#endif /* USE_ITT_BUILD */
}		}
if ((status == 1 \|\| !is_split) && !cancelled) {		if ((status == 1 \|\| !is_split) && !cancelled) {
if (cancellable) {		if (cancellable) {
cancelled = __kmp_linear_barrier_release_cancellable(		cancelled = __kmp_linear_barrier_release_cancellable(
bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));		bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
} else {		} else {
switch (__kmp_barrier_release_pattern[bt]) {		switch (__kmp_barrier_release_pattern[bt]) {
		case bp_dist_bar: {
		KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
		__kmp_dist_barrier_release(bt, this_thr, gtid, tid,
		FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
		break;
		}
case bp_hyper_bar: {		case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);		KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,		__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
FALSE USE_ITT_BUILD_ARG(itt_sync_obj));		FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
break;		break;
}		}
case bp_hierarchical_bar: {		case bp_hierarchical_bar: {
__kmp_hierarchical_barrier_release(		__kmp_hierarchical_barrier_release(
▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines	void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
KMP_DEBUG_ASSERT(bt < bs_last_barrier);		KMP_DEBUG_ASSERT(bt < bs_last_barrier);
int tid = __kmp_tid_from_gtid(gtid);		int tid = __kmp_tid_from_gtid(gtid);
kmp_info_t *this_thr = __kmp_threads[gtid];		kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = this_thr->th.th_team;		kmp_team_t *team = this_thr->th.th_team;

if (!team->t.t_serialized) {		if (!team->t.t_serialized) {
if (KMP_MASTER_GTID(gtid)) {		if (KMP_MASTER_GTID(gtid)) {
switch (__kmp_barrier_release_pattern[bt]) {		switch (__kmp_barrier_release_pattern[bt]) {
		case bp_dist_bar: {
		__kmp_dist_barrier_release(bt, this_thr, gtid, tid,
		FALSE USE_ITT_BUILD_ARG(NULL));
		break;
		}
case bp_hyper_bar: {		case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);		KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,		__kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
FALSE USE_ITT_BUILD_ARG(NULL));		FALSE USE_ITT_BUILD_ARG(NULL));
break;		break;
}		}
case bp_hierarchical_bar: {		case bp_hierarchical_bar: {
__kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,		__kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines	if (!KMP_MASTER_TID(ds_tid))
this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);		this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
#endif		#endif
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;		this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
}		}
#endif		#endif

if (__kmp_tasking_mode == tskm_extra_barrier) {		if (__kmp_tasking_mode == tskm_extra_barrier) {
__kmp_tasking_barrier(team, this_thr, gtid);		__kmp_tasking_barrier(team, this_thr, gtid);
KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,		KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past tasking barrier\n",
team_id, tid));		gtid, team_id, tid));
}		}
#ifdef KMP_DEBUG		#ifdef KMP_DEBUG
if (__kmp_tasking_mode != tskm_immediate_exec) {		if (__kmp_tasking_mode != tskm_immediate_exec) {
KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "		KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
"%p, th_task_team = %p\n",		"%p, th_task_team = %p\n",
__kmp_gtid_from_thread(this_thr), team_id,		__kmp_gtid_from_thread(this_thr), team_id,
team->t.t_task_team[this_thr->th.th_task_state],		team->t.t_task_team[this_thr->th.th_task_state],
this_thr->th.th_task_team));		this_thr->th.th_task_team));
		if (this_thr->th.th_task_team)
KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==		KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
team->t.t_task_team[this_thr->th.th_task_state]);		team->t.t_task_team[this_thr->th.th_task_state]);
}		}
#endif /* KMP_DEBUG */		#endif /* KMP_DEBUG */

/* Copy the blocktime info to the thread, where __kmp_wait_template() can		/* Copy the blocktime info to the thread, where __kmp_wait_template() can
access it when the team struct is not guaranteed to exist. Doing these		access it when the team struct is not guaranteed to exist. Doing these
loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,		loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,
we do not perform the copy if blocktime=infinite, since the values are not		we do not perform the copy if blocktime=infinite, since the values are not
used by __kmp_wait_template() in that case. */		used by __kmp_wait_template() in that case. */
Show All 9 Lines	#endif
}		}

#if USE_ITT_BUILD		#if USE_ITT_BUILD
if (__itt_sync_create_ptr \|\| KMP_ITT_DEBUG)		if (__itt_sync_create_ptr \|\| KMP_ITT_DEBUG)
__kmp_itt_barrier_starting(gtid, itt_sync_obj);		__kmp_itt_barrier_starting(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */		#endif /* USE_ITT_BUILD */

switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {		switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
		case bp_dist_bar: {
		__kmp_dist_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
		NULL USE_ITT_BUILD_ARG(itt_sync_obj));
		break;
		}
case bp_hyper_bar: {		case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);		KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
__kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,		__kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
NULL USE_ITT_BUILD_ARG(itt_sync_obj));		NULL USE_ITT_BUILD_ARG(itt_sync_obj));
break;		break;
}		}
case bp_hierarchical_bar: {		case bp_hierarchical_bar: {
__kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,		__kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
Show All 29 Lines	#if KMP_STATS_ENABLED
// idle.		// idle.
for (int i = 0; i < team->t.t_nproc; ++i) {		for (int i = 0; i < team->t.t_nproc; ++i) {
kmp_info_t *team_thread = team->t.t_threads[i];		kmp_info_t *team_thread = team->t.t_threads[i];
if (team_thread == this_thr)		if (team_thread == this_thr)
continue;		continue;
team_thread->th.th_stats->setIdleFlag();		team_thread->th.th_stats->setIdleFlag();
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&		if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
team_thread->th.th_sleep_loc != NULL)		team_thread->th.th_sleep_loc != NULL)
__kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),		__kmp_null_resume_wrapper(team_thread);
team_thread->th.th_sleep_loc);
}		}
#endif		#endif
#if USE_ITT_BUILD		#if USE_ITT_BUILD
if (__itt_sync_create_ptr \|\| KMP_ITT_DEBUG)		if (__itt_sync_create_ptr \|\| KMP_ITT_DEBUG)
__kmp_itt_barrier_middle(gtid, itt_sync_obj);		__kmp_itt_barrier_middle(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */		#endif /* USE_ITT_BUILD */

#if USE_ITT_BUILD && USE_ITT_NOTIFY		#if USE_ITT_BUILD && USE_ITT_NOTIFY
▲ Show 20 Lines • Show All 128 Lines • ▼ Show 20 Lines	#if KMP_USE_MONITOR
team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;		team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
#else		#else
this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);		this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
#endif		#endif
}		}
} // primary thread		} // primary thread

switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {		switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
		case bp_dist_bar: {
		__kmp_dist_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
		TRUE USE_ITT_BUILD_ARG(NULL));
		break;
		}
case bp_hyper_bar: {		case bp_hyper_bar: {
KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);		KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
__kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,		__kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
TRUE USE_ITT_BUILD_ARG(itt_sync_obj));		TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
break;		break;
}		}
case bp_hierarchical_bar: {		case bp_hierarchical_bar: {
__kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,		__kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
▲ Show 20 Lines • Show All 191 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_config.h.cmake

	Show First 20 Lines • Show All 78 Lines • ▼ Show 20 Lines
	#cmakedefine01 LIBOMP_HAVE_INTRIN_H			#cmakedefine01 LIBOMP_HAVE_INTRIN_H
	#define KMP_HAVE_INTRIN_H LIBOMP_HAVE_INTRIN_H			#define KMP_HAVE_INTRIN_H LIBOMP_HAVE_INTRIN_H
	#cmakedefine01 LIBOMP_HAVE_ATTRIBUTE_WAITPKG			#cmakedefine01 LIBOMP_HAVE_ATTRIBUTE_WAITPKG
	#define KMP_HAVE_ATTRIBUTE_WAITPKG LIBOMP_HAVE_ATTRIBUTE_WAITPKG			#define KMP_HAVE_ATTRIBUTE_WAITPKG LIBOMP_HAVE_ATTRIBUTE_WAITPKG
	#cmakedefine01 LIBOMP_HAVE_ATTRIBUTE_RTM			#cmakedefine01 LIBOMP_HAVE_ATTRIBUTE_RTM
	#define KMP_HAVE_ATTRIBUTE_RTM LIBOMP_HAVE_ATTRIBUTE_RTM			#define KMP_HAVE_ATTRIBUTE_RTM LIBOMP_HAVE_ATTRIBUTE_RTM
	#cmakedefine01 LIBOMP_ARCH_AARCH64_A64FX			#cmakedefine01 LIBOMP_ARCH_AARCH64_A64FX
	#define KMP_ARCH_AARCH64_A64FX LIBOMP_ARCH_AARCH64_A64FX			#define KMP_ARCH_AARCH64_A64FX LIBOMP_ARCH_AARCH64_A64FX
				#cmakedefine01 LIBOMP_HAVE_XMMINTRIN_H
				#define KMP_HAVE_XMMINTRIN_H LIBOMP_HAVE_XMMINTRIN_H
				#cmakedefine01 LIBOMP_HAVE__MM_MALLOC
				#define KMP_HAVE__MM_MALLOC LIBOMP_HAVE__MM_MALLOC
				#cmakedefine01 LIBOMP_HAVE_ALIGNED_ALLOC
				#define KMP_HAVE_ALIGNED_ALLOC LIBOMP_HAVE_ALIGNED_ALLOC
				#cmakedefine01 LIBOMP_HAVE_POSIX_MEMALIGN
				#define KMP_HAVE_POSIX_MEMALIGN LIBOMP_HAVE_POSIX_MEMALIGN
				#cmakedefine01 LIBOMP_HAVE__ALIGNED_MALLOC
				#define KMP_HAVE__ALIGNED_MALLOC LIBOMP_HAVE__ALIGNED_MALLOC

	// Configured cache line based on architecture			// Configured cache line based on architecture
	#if KMP_ARCH_PPC64			#if KMP_ARCH_PPC64
	# define CACHE_LINE 128			# define CACHE_LINE 128
	#elif KMP_ARCH_AARCH64_A64FX			#elif KMP_ARCH_AARCH64_A64FX
	# define CACHE_LINE 256			# define CACHE_LINE 256
	#else			#else
	# define CACHE_LINE 64			# define CACHE_LINE 64
	Show All 33 Lines

openmp/runtime/src/kmp_global.cpp

	Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines
	#endif // KMP_FAST_REDUCTION_BARRIER			#endif // KMP_FAST_REDUCTION_BARRIER
	};			};
	char const *__kmp_barrier_type_name[bs_last_barrier] = {"plain", "forkjoin"			char const *__kmp_barrier_type_name[bs_last_barrier] = {"plain", "forkjoin"
	#if KMP_FAST_REDUCTION_BARRIER			#if KMP_FAST_REDUCTION_BARRIER
	,			,
	"reduction"			"reduction"
	#endif // KMP_FAST_REDUCTION_BARRIER			#endif // KMP_FAST_REDUCTION_BARRIER
	};			};
	char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear", "tree",			char const *__kmp_barrier_pattern_name[bp_last_bar] = {
	"hyper", "hierarchical"};			"linear", "tree", "hyper", "hierarchical", "dist"};

	int __kmp_allThreadsSpecified = 0;			int __kmp_allThreadsSpecified = 0;
	size_t __kmp_align_alloc = CACHE_LINE;			size_t __kmp_align_alloc = CACHE_LINE;

	int __kmp_generate_warnings = kmp_warnings_low;			int __kmp_generate_warnings = kmp_warnings_low;
	int __kmp_reserve_warn = 0;			int __kmp_reserve_warn = 0;
	int __kmp_xproc = 0;			int __kmp_xproc = 0;
	int __kmp_avail_proc = 0;			int __kmp_avail_proc = 0;
	▲ Show 20 Lines • Show All 434 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_os.h

	Show First 20 Lines • Show All 1,013 Lines • ▼ Show 20 Lines
	#define KMP_MB() __sync_synchronize()			#define KMP_MB() __sync_synchronize()
	#endif			#endif
	#endif			#endif

	#ifndef KMP_MB			#ifndef KMP_MB
	#define KMP_MB() /* nothing to do */			#define KMP_MB() /* nothing to do */
	#endif			#endif

				#if KMP_ARCH_X86 \|\| KMP_ARCH_X86_64
				#if KMP_COMPILER_ICC
				#define KMP_MFENCE_() _mm_mfence()
				#define KMP_SFENCE_() _mm_sfence()
				#elif KMP_COMPILER_MSVC
				#define KMP_MFENCE_() MemoryBarrier()
				#define KMP_SFENCE_() MemoryBarrier()
				#else
				#define KMP_MFENCE_() __sync_synchronize()
				#define KMP_SFENCE_() __sync_synchronize()
				#endif
				#define KMP_MFENCE() \
				if (UNLIKELY(!__kmp_cpuinfo.initialized)) { \
				__kmp_query_cpuid(&__kmp_cpuinfo); \
				} \
				if (__kmp_cpuinfo.sse2) { \
				KMP_MFENCE_(); \
				}
				#define KMP_SFENCE() KMP_SFENCE_()
				#else
				#define KMP_MFENCE() KMP_MB()
				#define KMP_SFENCE() KMP_MB()
				#endif

	#ifndef KMP_IMB			#ifndef KMP_IMB
	#define KMP_IMB() /* nothing to do */			#define KMP_IMB() /* nothing to do */
	#endif			#endif

	#ifndef KMP_ST_REL32			#ifndef KMP_ST_REL32
	#define KMP_ST_REL32(A, D) (*(A) = (D))			#define KMP_ST_REL32(A, D) (*(A) = (D))
	#endif			#endif

	▲ Show 20 Lines • Show All 193 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_runtime.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 101 Lines • ▼ Show 20 Lines

static int __kmp_expand_threads(int nNeed);		static int __kmp_expand_threads(int nNeed);
#if KMP_OS_WINDOWS		#if KMP_OS_WINDOWS
static int __kmp_unregister_root_other_thread(int gtid);		static int __kmp_unregister_root_other_thread(int gtid);
#endif		#endif
static void __kmp_reap_thread(kmp_info_t *thread, int is_root);		static void __kmp_reap_thread(kmp_info_t *thread, int is_root);
kmp_info_t *__kmp_thread_pool_insert_pt = NULL;		kmp_info_t *__kmp_thread_pool_insert_pt = NULL;

		void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
		int new_nthreads);
		void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);

/* Calculate the identifier of the current thread */		/* Calculate the identifier of the current thread */
/* fast (and somewhat portable) way to get unique identifier of executing		/* fast (and somewhat portable) way to get unique identifier of executing
thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */		thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
int __kmp_get_global_thread_id() {		int __kmp_get_global_thread_id() {
int i;		int i;
kmp_info_t **other_threads;		kmp_info_t **other_threads;
size_t stack_data;		size_t stack_data;
char *stack_addr;		char *stack_addr;
▲ Show 20 Lines • Show All 1,081 Lines • ▼ Show 20 Lines	#endif
serial_team->t.t_ident = loc;		serial_team->t.t_ident = loc;
serial_team->t.t_serialized = 1;		serial_team->t.t_serialized = 1;
serial_team->t.t_nproc = 1;		serial_team->t.t_nproc = 1;
serial_team->t.t_parent = this_thr->th.th_team;		serial_team->t.t_parent = this_thr->th.th_team;
serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;		serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
this_thr->th.th_team = serial_team;		this_thr->th.th_team = serial_team;
serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;		serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;

KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,		KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
this_thr->th.th_current_task));		this_thr->th.th_current_task));
KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);		KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
this_thr->th.th_current_task->td_flags.executing = 0;		this_thr->th.th_current_task->td_flags.executing = 0;

__kmp_push_current_task_to_thread(this_thr, serial_team, 0);		__kmp_push_current_task_to_thread(this_thr, serial_team, 0);

/* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an		/* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an
implicit task for each serialized task represented by		implicit task for each serialized task represented by
▲ Show 20 Lines • Show All 342 Lines • ▼ Show 20 Lines	#if OMPT_SUPPORT
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,		__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
&ompt_parallel_data, return_address);		&ompt_parallel_data, return_address);
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);		__ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);
}		}
#endif		#endif

/* Change number of threads in the team if requested */		/* Change number of threads in the team if requested */
if (master_set_numthreads) { // The parallel has num_threads clause		if (master_set_numthreads) { // The parallel has num_threads clause
if (master_set_numthreads < master_th->th.th_teams_size.nth) {		if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
// AC: only can reduce number of threads dynamically, can't increase		// AC: only can reduce number of threads dynamically, can't increase
kmp_info_t **other_threads = parent_team->t.t_threads;		kmp_info_t **other_threads = parent_team->t.t_threads;
		// NOTE: if using distributed barrier, we need to run this code block
		// even when the team size appears not to have changed from the max.
		int old_proc = master_th->th.th_teams_size.nth;
		if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
		bp_dist_bar) {
		__kmp_resize_dist_barrier(parent_team, old_proc,
		master_set_numthreads);
		__kmp_add_threads_to_team(parent_team, master_set_numthreads);
		}
parent_team->t.t_nproc = master_set_numthreads;		parent_team->t.t_nproc = master_set_numthreads;
for (i = 0; i < master_set_numthreads; ++i) {		for (i = 0; i < master_set_numthreads; ++i) {
other_threads[i]->th.th_team_nproc = master_set_numthreads;		other_threads[i]->th.th_team_nproc = master_set_numthreads;
}		}
// Keep extra threads hot in the team for possible next parallels
}		}
		// Keep extra threads hot in the team for possible next parallels
master_th->th.th_set_nproc = 0;		master_th->th.th_set_nproc = 0;
}		}

#if USE_DEBUGGER		#if USE_DEBUGGER
if (__kmp_debugging) { // Let debugger override number of threads.		if (__kmp_debugging) { // Let debugger override number of threads.
int nth = __kmp_omp_num_threads(loc);		int nth = __kmp_omp_num_threads(loc);
if (nth > 0) { // 0 means debugger doesn't want to change num threads		if (nth > 0) { // 0 means debugger doesn't want to change num threads
master_set_numthreads = nth;		master_set_numthreads = nth;
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines

#if KMP_DEBUG		#if KMP_DEBUG
if (__kmp_tasking_mode != tskm_immediate_exec) {		if (__kmp_tasking_mode != tskm_immediate_exec) {
KMP_DEBUG_ASSERT(master_th->th.th_task_team ==		KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
parent_team->t.t_task_team[master_th->th.th_task_state]);		parent_team->t.t_task_team[master_th->th.th_task_state]);
}		}
#endif		#endif

		// Need this to happen before we determine the number of threads, not while
		// we are allocating the team
		//__kmp_push_current_task_to_thread(master_th, parent_team, 0);
int enter_teams = 0;		int enter_teams = 0;
if (parent_team->t.t_active_level >=		if (parent_team->t.t_active_level >=
master_th->th.th_current_task->td_icvs.max_active_levels) {		master_th->th.th_current_task->td_icvs.max_active_levels) {
nthreads = 1;		nthreads = 1;
} else {		} else {
enter_teams = ((ap == NULL && active_level == 0) \|\|		enter_teams = ((ap == NULL && active_level == 0) \|\|
(ap && teams_level > 0 && teams_level == level));		(ap && teams_level > 0 && teams_level == level));
nthreads =		nthreads = master_set_numthreads
master_set_numthreads
? master_set_numthreads		? master_set_numthreads
: get__nproc_2(		// TODO: get nproc directly from current task
parent_team,		: get__nproc_2(parent_team, master_tid);
master_tid); // TODO: get nproc directly from current task

// Check if we need to take forkjoin lock? (no need for serialized		// Check if we need to take forkjoin lock? (no need for serialized
// parallel out of teams construct). This code moved here from		// parallel out of teams construct). This code moved here from
// __kmp_reserve_threads() to speedup nested serialized parallels.		// __kmp_reserve_threads() to speedup nested serialized parallels.
if (nthreads > 1) {		if (nthreads > 1) {
if ((get__max_active_levels(master_th) == 1 &&		if ((get__max_active_levels(master_th) == 1 &&
(root->r.r_in_parallel && !enter_teams)) \|\|		(root->r.r_in_parallel && !enter_teams)) \|\|
(__kmp_library == library_serial)) {		(__kmp_library == library_serial)) {
KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"		KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team; requested %d"
▲ Show 20 Lines • Show All 318 Lines • ▼ Show 20 Lines	if ((nthreads_icv > 0) \|\| (proc_bind_icv != proc_bind_default)) {
/* allocate a new parallel team */		/* allocate a new parallel team */
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));		KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
team = __kmp_allocate_team(root, nthreads, nthreads,		team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT		#if OMPT_SUPPORT
ompt_parallel_data,		ompt_parallel_data,
#endif		#endif
proc_bind, &new_icvs,		proc_bind, &new_icvs,
argc USE_NESTED_HOT_ARG(master_th));		argc USE_NESTED_HOT_ARG(master_th));
		if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
		copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
} else {		} else {
/* allocate a new parallel team */		/* allocate a new parallel team */
KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));		KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));
team = __kmp_allocate_team(root, nthreads, nthreads,		team = __kmp_allocate_team(root, nthreads, nthreads,
#if OMPT_SUPPORT		#if OMPT_SUPPORT
ompt_parallel_data,		ompt_parallel_data,
#endif		#endif
proc_bind,		proc_bind,
&master_th->th.th_current_task->td_icvs,		&master_th->th.th_current_task->td_icvs,
argc USE_NESTED_HOT_ARG(master_th));		argc USE_NESTED_HOT_ARG(master_th));
		if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
		copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
		&master_th->th.th_current_task->td_icvs);
}		}
KF_TRACE(		KF_TRACE(
10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));		10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));

/* setup the new team */		/* setup the new team */
KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);		KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);		KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
KMP_CHECK_UPDATE(team->t.t_ident, loc);		KMP_CHECK_UPDATE(team->t.t_ident, loc);
▲ Show 20 Lines • Show All 350 Lines • ▼ Show 20 Lines	if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);		KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
// destroy the stack stitching id on exit from the teams construct		// destroy the stack stitching id on exit from the teams construct
// if parent_team is active, then the id will be destroyed later on		// if parent_team is active, then the id will be destroyed later on
// by master of the league of teams		// by master of the league of teams
__kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);		__kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
parent_team->t.t_stack_id = NULL;		parent_team->t.t_stack_id = NULL;
}		}
#endif		#endif

		if (team->t.t_nproc > 1 &&
		__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		team->t.b->update_num_threads(team->t.t_nproc);
		__kmp_add_threads_to_team(team, team->t.t_nproc);
		}
}		}

KMP_MB();		KMP_MB();

#if OMPT_SUPPORT		#if OMPT_SUPPORT
ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);		ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
void *codeptr = team->t.ompt_team_info.master_return_address;		void *codeptr = team->t.ompt_team_info.master_return_address;
#endif		#endif
▲ Show 20 Lines • Show All 271 Lines • ▼ Show 20 Lines	#if KMP_NESTED_HOT_TEAMS
&& __kmp_hot_teams_max_level && !__kmp_hot_teams_mode		&& __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
#endif		#endif
) {		) {
kmp_team_t *hot_team = root->r.r_hot_team;		kmp_team_t *hot_team = root->r.r_hot_team;
int f;		int f;

__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);		__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);

		if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		__kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
		}
// Release the extra threads we don't need any more.		// Release the extra threads we don't need any more.
for (f = new_nth; f < hot_team->t.t_nproc; f++) {		for (f = new_nth; f < hot_team->t.t_nproc; f++) {
KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);		KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
if (__kmp_tasking_mode != tskm_immediate_exec) {		if (__kmp_tasking_mode != tskm_immediate_exec) {
// When decreasing team size, threads no longer in the team should unref		// When decreasing team size, threads no longer in the team should unref
// task team.		// task team.
hot_team->t.t_threads[f]->th.th_task_team = NULL;		hot_team->t.t_threads[f]->th.th_task_team = NULL;
}		}
__kmp_free_thread(hot_team->t.t_threads[f]);		__kmp_free_thread(hot_team->t.t_threads[f]);
hot_team->t.t_threads[f] = NULL;		hot_team->t.t_threads[f] = NULL;
}		}
hot_team->t.t_nproc = new_nth;		hot_team->t.t_nproc = new_nth;
#if KMP_NESTED_HOT_TEAMS		#if KMP_NESTED_HOT_TEAMS
if (thread->th.th_hot_teams) {		if (thread->th.th_hot_teams) {
KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);		KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
thread->th.th_hot_teams[0].hot_team_nth = new_nth;		thread->th.th_hot_teams[0].hot_team_nth = new_nth;
}		}
#endif		#endif

		if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		hot_team->t.b->update_num_threads(new_nth);
		__kmp_add_threads_to_team(hot_team, new_nth);
		}

__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);		__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);

// Update the t_nproc field in the threads that are still active.		// Update the t_nproc field in the threads that are still active.
for (f = 0; f < new_nth; f++) {		for (f = 0; f < new_nth; f++) {
KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);		KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;		hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
}		}
// Special flag in case omp_set_num_threads() call		// Special flag in case omp_set_num_threads() call
▲ Show 20 Lines • Show All 1,431 Lines • ▼ Show 20 Lines	#if KMP_AFFINITY_SUPPORTED
this_thr->th.th_new_place = this_thr->th.th_current_place;		this_thr->th.th_new_place = this_thr->th.th_current_place;
#endif		#endif
this_thr->th.th_root = master->th.th_root;		this_thr->th.th_root = master->th.th_root;

/* setup the thread's cache of the team structure */		/* setup the thread's cache of the team structure */
this_thr->th.th_team_nproc = team->t.t_nproc;		this_thr->th.th_team_nproc = team->t.t_nproc;
this_thr->th.th_team_master = master;		this_thr->th.th_team_master = master;
this_thr->th.th_team_serialized = team->t.t_serialized;		this_thr->th.th_team_serialized = team->t.t_serialized;
TCW_PTR(this_thr->th.th_sleep_loc, NULL);

KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);		KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);

KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",		KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
tid, gtid, this_thr, this_thr->th.th_current_task));		tid, gtid, this_thr, this_thr->th.th_current_task));

__kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,		__kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
team, tid, TRUE);		team, tid, TRUE);
▲ Show 20 Lines • Show All 152 Lines • ▼ Show 20 Lines	if (__kmp_thread_pool) {
KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);		KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);

TCW_4(__kmp_nth, __kmp_nth + 1);		TCW_4(__kmp_nth, __kmp_nth + 1);

new_thr->th.th_task_state = 0;		new_thr->th.th_task_state = 0;
new_thr->th.th_task_state_top = 0;		new_thr->th.th_task_state_top = 0;
new_thr->th.th_task_state_stack_sz = 4;		new_thr->th.th_task_state_stack_sz = 4;

		if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		// Make sure pool thread has transitioned to waiting on own thread struct
		KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
		// Thread activated in __kmp_allocate_team when increasing team size
		}

#ifdef KMP_ADJUST_BLOCKTIME		#ifdef KMP_ADJUST_BLOCKTIME
/* Adjust blocktime back to zero if necessary */		/* Adjust blocktime back to zero if necessary */
/* Middle initialization might not have occurred yet */		/* Middle initialization might not have occurred yet */
if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {		if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
if (__kmp_nth > __kmp_avail_proc) {		if (__kmp_nth > __kmp_avail_proc) {
__kmp_zero_bt = TRUE;		__kmp_zero_bt = TRUE;
}		}
}		}
▲ Show 20 Lines • Show All 151 Lines • ▼ Show 20 Lines	#endif
kmp_balign_t *balign = new_thr->th.th_bar;		kmp_balign_t *balign = new_thr->th.th_bar;
for (b = 0; b < bs_last_barrier; ++b) {		for (b = 0; b < bs_last_barrier; ++b) {
balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;		balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
balign[b].bb.team = NULL;		balign[b].bb.team = NULL;
balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;		balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
balign[b].bb.use_oncore_barrier = 0;		balign[b].bb.use_oncore_barrier = 0;
}		}

		TCW_PTR(new_thr->th.th_sleep_loc, NULL);
		new_thr->th.th_sleep_loc_type = flag_unset;

new_thr->th.th_spin_here = FALSE;		new_thr->th.th_spin_here = FALSE;
new_thr->th.th_next_waiting = 0;		new_thr->th.th_next_waiting = 0;
#if KMP_OS_UNIX		#if KMP_OS_UNIX
new_thr->th.th_blocking = false;		new_thr->th.th_blocking = false;
#endif		#endif

#if KMP_AFFINITY_SUPPORTED		#if KMP_AFFINITY_SUPPORTED
new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;		new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
▲ Show 20 Lines • Show All 563 Lines • ▼ Show 20 Lines
#if KMP_DEBUG		#if KMP_DEBUG
if (__kmp_tasking_mode != tskm_immediate_exec) {		if (__kmp_tasking_mode != tskm_immediate_exec) {
KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "		KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "
"task_team[1] = %p before reinit\n",		"task_team[1] = %p before reinit\n",
team->t.t_task_team[0], team->t.t_task_team[1]));		team->t.t_task_team[0], team->t.t_task_team[1]));
}		}
#endif		#endif

		if (team->t.t_nproc != new_nproc &&
		__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		// Distributed barrier may need a resize
		int old_nthr = team->t.t_nproc;
		__kmp_resize_dist_barrier(team, old_nthr, new_nproc);
		}

// Has the number of threads changed?		// Has the number of threads changed?
/* Let's assume the most common case is that the number of threads is		/* Let's assume the most common case is that the number of threads is
unchanged, and put that case first. */		unchanged, and put that case first. */
if (team->t.t_nproc == new_nproc) { // Check changes in number of threads		if (team->t.t_nproc == new_nproc) { // Check changes in number of threads
KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));		KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));
// This case can mean that omp_set_num_threads() was called and the hot		// This case can mean that omp_set_num_threads() was called and the hot
// team size was already reduced, so we check the special flag		// team size was already reduced, so we check the special flag
if (team->t.t_size_changed == -1) {		if (team->t.t_size_changed == -1) {
Show All 33 Lines	#else
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);		KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
#endif /* KMP_AFFINITY_SUPPORTED */		#endif /* KMP_AFFINITY_SUPPORTED */
} else if (team->t.t_nproc > new_nproc) {		} else if (team->t.t_nproc > new_nproc) {
KA_TRACE(20,		KA_TRACE(20,
("__kmp_allocate_team: decreasing hot team thread count to %d\n",		("__kmp_allocate_team: decreasing hot team thread count to %d\n",
new_nproc));		new_nproc));

team->t.t_size_changed = 1;		team->t.t_size_changed = 1;
		if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		// Barrier size already reduced earlier in this function
		// Activate team threads via th_used_in_team
		__kmp_add_threads_to_team(team, new_nproc);
		}
#if KMP_NESTED_HOT_TEAMS		#if KMP_NESTED_HOT_TEAMS
if (__kmp_hot_teams_mode == 0) {		if (__kmp_hot_teams_mode == 0) {
// AC: saved number of threads should correspond to team's value in this		// AC: saved number of threads should correspond to team's value in this
// mode, can be bigger in mode 1, when hot team has threads in reserve		// mode, can be bigger in mode 1, when hot team has threads in reserve
KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);		KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
hot_teams[level].hot_team_nth = new_nproc;		hot_teams[level].hot_team_nth = new_nproc;
#endif // KMP_NESTED_HOT_TEAMS		#endif // KMP_NESTED_HOT_TEAMS
/* release the extra threads we don't need any more */		/* release the extra threads we don't need any more */
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	#if (KMP_OS_LINUX \|\| KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
if (KMP_AFFINITY_CAPABLE()) {		if (KMP_AFFINITY_CAPABLE()) {
KMP_CPU_ALLOC(old_mask);		KMP_CPU_ALLOC(old_mask);
}		}
#endif		#endif

KA_TRACE(20,		KA_TRACE(20,
("__kmp_allocate_team: increasing hot team thread count to %d\n",		("__kmp_allocate_team: increasing hot team thread count to %d\n",
new_nproc));		new_nproc));
		int old_nproc = team->t.t_nproc; // save old value and use to update only
team->t.t_size_changed = 1;		team->t.t_size_changed = 1;

#if KMP_NESTED_HOT_TEAMS		#if KMP_NESTED_HOT_TEAMS
int avail_threads = hot_teams[level].hot_team_nth;		int avail_threads = hot_teams[level].hot_team_nth;
if (new_nproc < avail_threads)		if (new_nproc < avail_threads)
avail_threads = new_nproc;		avail_threads = new_nproc;
kmp_info_t **other_threads = team->t.t_threads;		kmp_info_t **other_threads = team->t.t_threads;
for (f = team->t.t_nproc; f < avail_threads; ++f) {		for (f = team->t.t_nproc; f < avail_threads; ++f) {
Show All 10 Lines	#endif
}		}
}		}
if (hot_teams[level].hot_team_nth >= new_nproc) {		if (hot_teams[level].hot_team_nth >= new_nproc) {
// we have all needed threads in reserve, no need to allocate any		// we have all needed threads in reserve, no need to allocate any
// this only possible in mode 1, cannot have reserved threads in mode 0		// this only possible in mode 1, cannot have reserved threads in mode 0
KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);		KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
team->t.t_nproc = new_nproc; // just get reserved threads involved		team->t.t_nproc = new_nproc; // just get reserved threads involved
} else {		} else {
// we may have some threads in reserve, but not enough		// We may have some threads in reserve, but not enough;
team->t.t_nproc =		// get reserved threads involved if any.
hot_teams[level]		team->t.t_nproc = hot_teams[level].hot_team_nth;
.hot_team_nth; // get reserved threads involved if any
hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size		hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size
#endif // KMP_NESTED_HOT_TEAMS		#endif // KMP_NESTED_HOT_TEAMS
if (team->t.t_max_nproc < new_nproc) {		if (team->t.t_max_nproc < new_nproc) {
/* reallocate larger arrays */		/* reallocate larger arrays */
__kmp_reallocate_team_arrays(team, new_nproc);		__kmp_reallocate_team_arrays(team, new_nproc);
__kmp_reinitialize_team(team, new_icvs, NULL);		__kmp_reinitialize_team(team, new_icvs, NULL);
}		}

Show All 38 Lines	#if (KMP_OS_LINUX \|\| KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
/* Restore initial primary thread's affinity mask */		/* Restore initial primary thread's affinity mask */
__kmp_set_system_affinity(old_mask, TRUE);		__kmp_set_system_affinity(old_mask, TRUE);
KMP_CPU_FREE(old_mask);		KMP_CPU_FREE(old_mask);
}		}
#endif		#endif
#if KMP_NESTED_HOT_TEAMS		#if KMP_NESTED_HOT_TEAMS
} // end of check of t_nproc vs. new_nproc vs. hot_team_nth		} // end of check of t_nproc vs. new_nproc vs. hot_team_nth
#endif // KMP_NESTED_HOT_TEAMS		#endif // KMP_NESTED_HOT_TEAMS
		if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		// Barrier size already increased earlier in this function
		// Activate team threads via th_used_in_team
		__kmp_add_threads_to_team(team, new_nproc);
		}
/* make sure everyone is syncronized */		/* make sure everyone is syncronized */
int old_nproc = team->t.t_nproc; // save old value and use to update only
// new threads below		// new threads below
__kmp_initialize_team(team, new_nproc, new_icvs,		__kmp_initialize_team(team, new_nproc, new_icvs,
root->r.r_uber_thread->th.th_ident);		root->r.r_uber_thread->th.th_ident);

/* reinitialize the threads */		/* reinitialize the threads */
KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);		KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
for (f = 0; f < team->t.t_nproc; ++f)		for (f = 0; f < team->t.t_nproc; ++f)
__kmp_initialize_info(team->t.t_threads[f], team, f,		__kmp_initialize_info(team->t.t_threads[f], team, f,
▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines	#endif
KMP_MB();		KMP_MB();
for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {		for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
/* TODO: consider resizing undersized teams instead of reaping them, now		/* TODO: consider resizing undersized teams instead of reaping them, now
that we have a resizing mechanism */		that we have a resizing mechanism */
if (team->t.t_max_nproc >= max_nproc) {		if (team->t.t_max_nproc >= max_nproc) {
/* take this team from the team pool */		/* take this team from the team pool */
__kmp_team_pool = team->t.t_next_pool;		__kmp_team_pool = team->t.t_next_pool;

		if (max_nproc > 1 &&
		__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		if (!team->t.b) { // Allocate barrier structure
		team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
		}
		}

/* setup the team for fresh use */		/* setup the team for fresh use */
__kmp_initialize_team(team, new_nproc, new_icvs, NULL);		__kmp_initialize_team(team, new_nproc, new_icvs, NULL);

KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "		KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "
"task_team[1] %p to NULL\n",		"task_team[1] %p to NULL\n",
&team->t.t_task_team[0], &team->t.t_task_team[1]));		&team->t.t_task_team[0], &team->t.t_task_team[1]));
team->t.t_task_team[0] = NULL;		team->t.t_task_team[0] = NULL;
team->t.t_task_team[1] = NULL;		team->t.t_task_team[1] = NULL;
Show All 39 Lines	#endif
}		}

/* nothing available in the pool, no matter, make a new team! */		/* nothing available in the pool, no matter, make a new team! */
KMP_MB();		KMP_MB();
team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));		team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));

/* and set it up */		/* and set it up */
team->t.t_max_nproc = max_nproc;		team->t.t_max_nproc = max_nproc;
		if (max_nproc > 1 &&
		__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		// Allocate barrier structure
		team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
		}

/* NOTE well, for some reason allocating one big buffer and dividing it up		/* NOTE well, for some reason allocating one big buffer and dividing it up
seems to really hurt performance a lot on the P4, so, let's not use this */		seems to really hurt performance a lot on the P4, so, let's not use this */
__kmp_allocate_team_arrays(team, max_nproc);		__kmp_allocate_team_arrays(team, max_nproc);

KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));		KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));
__kmp_initialize_team(team, new_nproc, new_icvs, NULL);		__kmp_initialize_team(team, new_nproc, new_icvs, NULL);

KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "		KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
▲ Show 20 Lines • Show All 140 Lines • ▼ Show 20 Lines	#endif
// Reset pointer to parent team only for non-hot teams.		// Reset pointer to parent team only for non-hot teams.
team->t.t_parent = NULL;		team->t.t_parent = NULL;
team->t.t_level = 0;		team->t.t_level = 0;
team->t.t_active_level = 0;		team->t.t_active_level = 0;

/* free the worker threads */		/* free the worker threads */
for (f = 1; f < team->t.t_nproc; ++f) {		for (f = 1; f < team->t.t_nproc; ++f) {
KMP_DEBUG_ASSERT(team->t.t_threads[f]);		KMP_DEBUG_ASSERT(team->t.t_threads[f]);
		if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
		1, 2);
		}
__kmp_free_thread(team->t.t_threads[f]);		__kmp_free_thread(team->t.t_threads[f]);
		}

		if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		if (team->t.b) {
		// wake up thread at old location
		team->t.b->go_release();
		if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
		for (f = 1; f < team->t.t_nproc; ++f) {
		if (team->t.b->sleep[f].sleep) {
		__kmp_atomic_resume_64(
		team->t.t_threads[f]->th.th_info.ds.ds_gtid,
		(kmp_atomic_flag_64<> *)NULL);
		}
		}
		}
		// Wait for threads to be removed from team
		for (int f = 1; f < team->t.t_nproc; ++f) {
		while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
		KMP_CPU_PAUSE();
		}
		}
		}

		for (f = 1; f < team->t.t_nproc; ++f) {
team->t.t_threads[f] = NULL;		team->t.t_threads[f] = NULL;
}		}

		if (team->t.t_max_nproc > 1 &&
		__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
		distributedBarrier::deallocate(team->t.b);
		team->t.b = NULL;
		}
/* put the team back in the team pool */		/* put the team back in the team pool */
/* TODO limit size of team pool, call reap_team if pool too large */		/* TODO limit size of team pool, call reap_team if pool too large */
team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);		team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
__kmp_team_pool = (volatile kmp_team_t *)team;		__kmp_team_pool = (volatile kmp_team_t *)team;
} else { // Check if team was created for primary threads in teams construct		} else { // Check if team was created for primary threads in teams construct
// See if first worker is a CG root		// See if first worker is a CG root
KMP_DEBUG_ASSERT(team->t.t_threads[1] &&		KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
team->t.t_threads[1]->th.th_cg_roots);		team->t.t_threads[1]->th.th_cg_roots);
▲ Show 20 Lines • Show All 382 Lines • ▼ Show 20 Lines	static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {
gtid = thread->th.th_info.ds.ds_gtid;		gtid = thread->th.th_info.ds.ds_gtid;

if (!is_root) {		if (!is_root) {
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {		if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
/* Assume the threads are at the fork barrier here */		/* Assume the threads are at the fork barrier here */
KA_TRACE(		KA_TRACE(
20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",		20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
gtid));		gtid));
/* Need release fence here to prevent seg faults for tree forkjoin barrier		if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
* (GEH) */		while (
		!KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
		KMP_CPU_PAUSE();
		__kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
		} else {
		/* Need release fence here to prevent seg faults for tree forkjoin
		barrier (GEH) */
kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,		kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
thread);		thread);
__kmp_release_64(&flag);		__kmp_release_64(&flag);
}		}
		}

// Terminate OS thread.		// Terminate OS thread.
__kmp_reap_worker(thread);		__kmp_reap_worker(thread);

// The thread was killed asynchronously. If it was actively		// The thread was killed asynchronously. If it was actively
// spinning in the thread pool, decrement the global count.		// spinning in the thread pool, decrement the global count.
//		//
// There is a small timing hole here - if the worker thread was just waking		// There is a small timing hole here - if the worker thread was just waking
▲ Show 20 Lines • Show All 867 Lines • ▼ Show 20 Lines
//__kmp_guided = kmp_sch_guided_iterative_chunked;		//__kmp_guided = kmp_sch_guided_iterative_chunked;
//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no		//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no
// need to repeat assignment		// need to repeat assignment
// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch		// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch
// bit control and barrier method control parts		// bit control and barrier method control parts
#if KMP_FAST_REDUCTION_BARRIER		#if KMP_FAST_REDUCTION_BARRIER
#define kmp_reduction_barrier_gather_bb ((int)1)		#define kmp_reduction_barrier_gather_bb ((int)1)
#define kmp_reduction_barrier_release_bb ((int)1)		#define kmp_reduction_barrier_release_bb ((int)1)
#define kmp_reduction_barrier_gather_pat bp_hyper_bar		#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
#define kmp_reduction_barrier_release_pat bp_hyper_bar		#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
#endif // KMP_FAST_REDUCTION_BARRIER		#endif // KMP_FAST_REDUCTION_BARRIER
for (i = bs_plain_barrier; i < bs_last_barrier; i++) {		for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
__kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;		__kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
__kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;		__kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
__kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;		__kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
__kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;		__kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
#if KMP_FAST_REDUCTION_BARRIER		#if KMP_FAST_REDUCTION_BARRIER
if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (		if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (
▲ Show 20 Lines • Show All 1,840 Lines • ▼ Show 20 Lines
void __kmp_omp_display_env(int verbose) {		void __kmp_omp_display_env(int verbose) {
__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);		__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
if (__kmp_init_serial == 0)		if (__kmp_init_serial == 0)
__kmp_do_serial_initialize();		__kmp_do_serial_initialize();
__kmp_display_env_impl(!verbose, verbose);		__kmp_display_env_impl(!verbose, verbose);
__kmp_release_bootstrap_lock(&__kmp_initz_lock);		__kmp_release_bootstrap_lock(&__kmp_initz_lock);
}		}

		// The team size is changing, so distributed barrier must be modified
		void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
		int new_nthreads) {
		KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
		bp_dist_bar);
		kmp_info_t **other_threads = team->t.t_threads;

		// We want all the workers to stop waiting on the barrier while we adjust the
		// size of the team.
		for (int f = 1; f < old_nthreads; ++f) {
		KMP_DEBUG_ASSERT(other_threads[f] != NULL);
		// Ignore threads that are already inactive or not present in the team
		if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
		// teams construct causes thread_limit to get passed in, and some of
		// those could be inactive; just ignore them
		continue;
		}
		// If thread is transitioning still to in_use state, wait for it
		if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
		while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
		KMP_CPU_PAUSE();
		}
		// The thread should be in_use now
		KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
		// Transition to unused state
		team->t.t_threads[f]->th.th_used_in_team.store(2);
		KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
		}
		// Release all the workers
		kmp_uint64 new_value; // new value for go
		new_value = team->t.b->go_release();

		KMP_MFENCE();

		// Workers should see transition status 2 and move to 0; but may need to be
		// woken up first
		size_t my_go_index;
		int count = old_nthreads - 1;
		while (count > 0) {
		count = old_nthreads - 1;
		for (int f = 1; f < old_nthreads; ++f) {
		my_go_index = f / team->t.b->threads_per_go;
		if (other_threads[f]->th.th_used_in_team.load() != 0) {
		if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers
		kmp_atomic_flag_64<> flag = (kmp_atomic_flag_64<> )CCAST(
		void *, other_threads[f]->th.th_sleep_loc);
		__kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
		}
		} else {
		KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
		count--;
		}
		}
		}
		// Now update the barrier size
		team->t.b->update_num_threads(new_nthreads);
		team->t.b->go_reset();
		}

		void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {
		// Add the threads back to the team
		KMP_DEBUG_ASSERT(team);
		// Threads were paused and pointed at th_used_in_team temporarily during a
		// resize of the team. We're going to set th_used_in_team to 3 to indicate to
		// the thread that it should transition itself back into the team. Then, if
		// blocktime isn't infinite, the thread could be sleeping, so we send a resume
		// to wake it up.
		for (int f = 1; f < new_nthreads; ++f) {
		KMP_DEBUG_ASSERT(team->t.t_threads[f]);
		KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
		3);
		if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads
		__kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
		(kmp_flag_32<false, false> *)NULL);
		}
		}
		// The threads should be transitioning to the team; when they are done, they
		// should have set th_used_in_team to 1. This loop forces master to wait until
		// all threads have moved into the team and are waiting in the barrier.
		int count = new_nthreads - 1;
		while (count > 0) {
		count = new_nthreads - 1;
		for (int f = 1; f < new_nthreads; ++f) {
		if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
		count--;
		}
		}
		}
		}

// Globals and functions for hidden helper task		// Globals and functions for hidden helper task
kmp_info_t **__kmp_hidden_helper_threads;		kmp_info_t **__kmp_hidden_helper_threads;
kmp_info_t *__kmp_hidden_helper_main_thread;		kmp_info_t *__kmp_hidden_helper_main_thread;
std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;		std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
#if KMP_OS_LINUX		#if KMP_OS_LINUX
kmp_int32 __kmp_hidden_helper_threads_num = 8;		kmp_int32 __kmp_hidden_helper_threads_num = 8;
kmp_int32 __kmp_enable_hidden_helper = TRUE;		kmp_int32 __kmp_enable_hidden_helper = TRUE;
#else		#else
▲ Show 20 Lines • Show All 134 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_settings.cpp

	Show First 20 Lines • Show All 1,678 Lines • ▼ Show 20 Lines
	// TODO: Remove __kmp_barrier_pattern_name variable, remove loops from parse and			// TODO: Remove __kmp_barrier_pattern_name variable, remove loops from parse and
	// print functions, pass required data to functions through data argument.			// print functions, pass required data to functions through data argument.

	static void __kmp_stg_parse_barrier_pattern(char const name, char const value,			static void __kmp_stg_parse_barrier_pattern(char const name, char const value,
	void *data) {			void *data) {
	const char *var;			const char *var;
	/* ---------- Barrier method control ------------ */			/* ---------- Barrier method control ------------ */

				static int dist_req = 0, non_dist_req = 0;
				static bool warn = 1;
	for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {			for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
	var = __kmp_barrier_pattern_env_name[i];			var = __kmp_barrier_pattern_env_name[i];

	if ((strcmp(var, name) == 0) && (value != 0)) {			if ((strcmp(var, name) == 0) && (value != 0)) {
	int j;			int j;
	char comma = CCAST(char , strchr(value, ','));			char comma = CCAST(char , strchr(value, ','));

	/* handle first parameter: gather pattern */			/* handle first parameter: gather pattern */
	for (j = bp_linear_bar; j < bp_last_bar; j++) {			for (j = bp_linear_bar; j < bp_last_bar; j++) {
	if (__kmp_match_with_sentinel(__kmp_barrier_pattern_name[j], value, 1,			if (__kmp_match_with_sentinel(__kmp_barrier_pattern_name[j], value, 1,
	',')) {			',')) {
				if (j == bp_dist_bar) {
				dist_req++;
				} else {
				non_dist_req++;
				}
	__kmp_barrier_gather_pattern[i] = (kmp_bar_pat_e)j;			__kmp_barrier_gather_pattern[i] = (kmp_bar_pat_e)j;
	break;			break;
	}			}
	}			}
	if (j == bp_last_bar) {			if (j == bp_last_bar) {
	KMP_WARNING(BarrGatherValueInvalid, name, value);			KMP_WARNING(BarrGatherValueInvalid, name, value);
	KMP_INFORM(Using_str_Value, name,			KMP_INFORM(Using_str_Value, name,
	__kmp_barrier_pattern_name[bp_linear_bar]);			__kmp_barrier_pattern_name[bp_linear_bar]);
	}			}

	/* handle second parameter: release pattern */			/* handle second parameter: release pattern */
	if (comma != NULL) {			if (comma != NULL) {
	for (j = bp_linear_bar; j < bp_last_bar; j++) {			for (j = bp_linear_bar; j < bp_last_bar; j++) {
	if (__kmp_str_match(__kmp_barrier_pattern_name[j], 1, comma + 1)) {			if (__kmp_str_match(__kmp_barrier_pattern_name[j], 1, comma + 1)) {
				if (j == bp_dist_bar) {
				dist_req++;
				} else {
				non_dist_req++;
				}
	__kmp_barrier_release_pattern[i] = (kmp_bar_pat_e)j;			__kmp_barrier_release_pattern[i] = (kmp_bar_pat_e)j;
	break;			break;
	}			}
	}			}
	if (j == bp_last_bar) {			if (j == bp_last_bar) {
	__kmp_msg(kmp_ms_warning,			__kmp_msg(kmp_ms_warning,
	KMP_MSG(BarrReleaseValueInvalid, name, comma + 1),			KMP_MSG(BarrReleaseValueInvalid, name, comma + 1),
	__kmp_msg_null);			__kmp_msg_null);
	KMP_INFORM(Using_str_Value, name,			KMP_INFORM(Using_str_Value, name,
	__kmp_barrier_pattern_name[bp_linear_bar]);			__kmp_barrier_pattern_name[bp_linear_bar]);
	}			}
	}			}
	}			}
	}			}
				if ((dist_req == 0) && (non_dist_req != 0)) {
				// Something was set to a barrier other than dist; set all others to hyper
				for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
				if (__kmp_barrier_release_pattern[i] == bp_dist_bar)
				__kmp_barrier_release_pattern[i] = bp_hyper_bar;
				if (__kmp_barrier_gather_pattern[i] == bp_dist_bar)
				__kmp_barrier_gather_pattern[i] = bp_hyper_bar;
				}
				} else if (non_dist_req != 0) {
				// some requests for dist, plus requests for others; set all to dist
				if (non_dist_req > 0 && dist_req > 0 && warn) {
				KMP_INFORM(BarrierPatternOverride, name,
				__kmp_barrier_pattern_name[bp_dist_bar]);
				warn = 0;
				}
				for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
				if (__kmp_barrier_release_pattern[i] != bp_dist_bar)
				__kmp_barrier_release_pattern[i] = bp_dist_bar;
				if (__kmp_barrier_gather_pattern[i] != bp_dist_bar)
				__kmp_barrier_gather_pattern[i] = bp_dist_bar;
				}
				}
	} // __kmp_stg_parse_barrier_pattern			} // __kmp_stg_parse_barrier_pattern

	static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer,			static void __kmp_stg_print_barrier_pattern(kmp_str_buf_t *buffer,
	char const name, void data) {			char const name, void data) {
	const char *var;			const char *var;
	for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {			for (int i = bs_plain_barrier; i < bs_last_barrier; i++) {
	var = __kmp_barrier_pattern_env_name[i];			var = __kmp_barrier_pattern_env_name[i];
	if (strcmp(var, name) == 0) {			if (strcmp(var, name) == 0) {
	int j = __kmp_barrier_gather_pattern[i];			int j = __kmp_barrier_gather_pattern[i];
	int k = __kmp_barrier_release_pattern[i];			int k = __kmp_barrier_release_pattern[i];
	if (__kmp_env_format) {			if (__kmp_env_format) {
	KMP_STR_BUF_PRINT_NAME_EX(__kmp_barrier_pattern_env_name[i]);			KMP_STR_BUF_PRINT_NAME_EX(__kmp_barrier_pattern_env_name[i]);
	} else {			} else {
	__kmp_str_buf_print(buffer, " %s='",			__kmp_str_buf_print(buffer, " %s='",
	__kmp_barrier_pattern_env_name[i]);			__kmp_barrier_pattern_env_name[i]);
	}			}
	KMP_DEBUG_ASSERT(j < bs_last_barrier && k < bs_last_barrier);			KMP_DEBUG_ASSERT(j < bp_last_bar && k < bp_last_bar);
	__kmp_str_buf_print(buffer, "%s,%s'\n", __kmp_barrier_pattern_name[j],			__kmp_str_buf_print(buffer, "%s,%s'\n", __kmp_barrier_pattern_name[j],
	__kmp_barrier_pattern_name[k]);			__kmp_barrier_pattern_name[k]);
	}			}
	}			}
	} // __kmp_stg_print_barrier_pattern			} // __kmp_stg_print_barrier_pattern

	// -----------------------------------------------------------------------------			// -----------------------------------------------------------------------------
	// KMP_ABORT_DELAY			// KMP_ABORT_DELAY
	▲ Show 20 Lines • Show All 4,471 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_stats.h

	Show First 20 Lines • Show All 240 Lines • ▼ Show 20 Lines
	// KMP_setup_icv_copy -- time in __kmp_setup_icv_copy			// KMP_setup_icv_copy -- time in __kmp_setup_icv_copy
	// KMP_icv_copy -- start/stop timer for any ICV copying			// KMP_icv_copy -- start/stop timer for any ICV copying
	// KMP_linear_gather -- time in __kmp_linear_barrier_gather			// KMP_linear_gather -- time in __kmp_linear_barrier_gather
	// KMP_linear_release -- time in __kmp_linear_barrier_release			// KMP_linear_release -- time in __kmp_linear_barrier_release
	// KMP_tree_gather -- time in __kmp_tree_barrier_gather			// KMP_tree_gather -- time in __kmp_tree_barrier_gather
	// KMP_tree_release -- time in __kmp_tree_barrier_release			// KMP_tree_release -- time in __kmp_tree_barrier_release
	// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather			// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
	// KMP_hyper_release -- time in __kmp_hyper_barrier_release			// KMP_hyper_release -- time in __kmp_hyper_barrier_release
				// KMP_dist_gather -- time in __kmp_dist_barrier_gather
				// KMP_dist_release -- time in __kmp_dist_barrier_release
	// clang-format off			// clang-format off
	#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \			#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
	macro(KMP_fork_call, 0, arg) \			macro(KMP_fork_call, 0, arg) \
	macro(KMP_join_call, 0, arg) \			macro(KMP_join_call, 0, arg) \
	macro(KMP_end_split_barrier, 0, arg) \			macro(KMP_end_split_barrier, 0, arg) \
	macro(KMP_hier_gather, 0, arg) \			macro(KMP_hier_gather, 0, arg) \
	macro(KMP_hier_release, 0, arg) \			macro(KMP_hier_release, 0, arg) \
	macro(KMP_hyper_gather, 0, arg) \			macro(KMP_hyper_gather, 0, arg) \
	macro(KMP_hyper_release, 0, arg) \			macro(KMP_hyper_release, 0, arg) \
				macro(KMP_dist_gather, 0, arg) \
				macro(KMP_dist_release, 0, arg) \
	macro(KMP_linear_gather, 0, arg) \			macro(KMP_linear_gather, 0, arg) \
	macro(KMP_linear_release, 0, arg) \			macro(KMP_linear_release, 0, arg) \
	macro(KMP_tree_gather, 0, arg) \			macro(KMP_tree_gather, 0, arg) \
	macro(KMP_tree_release, 0, arg) \			macro(KMP_tree_release, 0, arg) \
	macro(USER_resume, 0, arg) \			macro(USER_resume, 0, arg) \
	macro(USER_suspend, 0, arg) \			macro(USER_suspend, 0, arg) \
	macro(USER_mwait, 0, arg) \			macro(USER_mwait, 0, arg) \
	macro(KMP_allocate_team, 0, arg) \			macro(KMP_allocate_team, 0, arg) \
	▲ Show 20 Lines • Show All 749 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_str.h

	Show First 20 Lines • Show All 100 Lines • ▼ Show 20 Lines
	kmp_str_loc_t __kmp_str_loc_init(char const *psource, bool init_fname);			kmp_str_loc_t __kmp_str_loc_init(char const *psource, bool init_fname);
	void __kmp_str_loc_numbers(char const Psource, int Line, int *Col);			void __kmp_str_loc_numbers(char const Psource, int Line, int *Col);
	void __kmp_str_loc_free(kmp_str_loc_t *loc);			void __kmp_str_loc_free(kmp_str_loc_t *loc);

	int __kmp_str_eqf(char const lhs, char const rhs);			int __kmp_str_eqf(char const lhs, char const rhs);
	char __kmp_str_format(char const format, ...);			char __kmp_str_format(char const format, ...);
	void __kmp_str_free(char **str);			void __kmp_str_free(char **str);
	int __kmp_str_match(char const target, int len, char const data);			int __kmp_str_match(char const target, int len, char const data);
				bool __kmp_str_contains(char const target, int len, char const data);
	int __kmp_str_match_false(char const *data);			int __kmp_str_match_false(char const *data);
	int __kmp_str_match_true(char const *data);			int __kmp_str_match_true(char const *data);
	void __kmp_str_replace(char *str, char search_for, char replace_with);			void __kmp_str_replace(char *str, char search_for, char replace_with);
	void __kmp_str_split(char str, char delim, char head, char *tail);			void __kmp_str_split(char str, char delim, char head, char *tail);
	char __kmp_str_token(char str, char const delim, char *buf);			char __kmp_str_token(char str, char const delim, char *buf);
	int __kmp_str_to_int(char const *str, char sentinel);			int __kmp_str_to_int(char const *str, char sentinel);

	void __kmp_str_to_size(char const str, size_t out, size_t dfactor,			void __kmp_str_to_size(char const str, size_t out, size_t dfactor,
	Show All 10 Lines

openmp/runtime/src/kmp_str.cpp

Show First 20 Lines • Show All 509 Lines • ▼ Show 20 Lines	int __kmp_str_match(char const target, int len, char const data) {
for (i = 0; target[i] && data[i]; ++i) {		for (i = 0; target[i] && data[i]; ++i) {
if (TOLOWER(target[i]) != TOLOWER(data[i])) {		if (TOLOWER(target[i]) != TOLOWER(data[i])) {
return FALSE;		return FALSE;
}		}
}		}
return ((len > 0) ? i >= len : (!target[i] && (len \|\| !data[i])));		return ((len > 0) ? i >= len : (!target[i] && (len \|\| !data[i])));
} // __kmp_str_match		} // __kmp_str_match

		// If data contains all of target, returns true, otherwise returns false.
		// len should be the length of target
		bool __kmp_str_contains(char const target, int len, char const data) {
		int i = 0, j = 0, start = 0;
		if (target == NULL \|\| data == NULL) {
		return FALSE;
		}
		while (target[i]) {
		if (!data[j])
		return FALSE;
		if (TOLOWER(target[i]) != TOLOWER(data[j])) {
		j = start + 1;
		start = j;
		i = 0;
		} else {
		if (i == 0)
		start = j;
		j++;
		i++;
		}
		}

		return i == len;
		} // __kmp_str_contains

int __kmp_str_match_false(char const *data) {		int __kmp_str_match_false(char const *data) {
int result =		int result =
__kmp_str_match("false", 1, data) \|\| __kmp_str_match("off", 2, data) \|\|		__kmp_str_match("false", 1, data) \|\| __kmp_str_match("off", 2, data) \|\|
__kmp_str_match("0", 1, data) \|\| __kmp_str_match(".false.", 2, data) \|\|		__kmp_str_match("0", 1, data) \|\| __kmp_str_match(".false.", 2, data) \|\|
__kmp_str_match(".f.", 2, data) \|\| __kmp_str_match("no", 1, data) \|\|		__kmp_str_match(".f.", 2, data) \|\| __kmp_str_match("no", 1, data) \|\|
__kmp_str_match("disabled", 0, data);		__kmp_str_match("disabled", 0, data);
return result;		return result;
} // __kmp_str_match_false		} // __kmp_str_match_false
▲ Show 20 Lines • Show All 275 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_tasking.cpp

Show First 20 Lines • Show All 2,948 Lines • ▼ Show 20 Lines	while (1) { // Inner loop to find a task and execute it
// extra barrier mode, tasks do not sleep at the separate tasking		// extra barrier mode, tasks do not sleep at the separate tasking
// barrier, so this isn't a problem.		// barrier, so this isn't a problem.
asleep = 0;		asleep = 0;
if ((__kmp_tasking_mode == tskm_task_teams) &&		if ((__kmp_tasking_mode == tskm_task_teams) &&
(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&		(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
(TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=		(TCR_PTR(CCAST(void *, other_thread->th.th_sleep_loc)) !=
NULL)) {		NULL)) {
asleep = 1;		asleep = 1;
__kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),		__kmp_null_resume_wrapper(other_thread);
other_thread->th.th_sleep_loc);
// A sleeping thread should not have any tasks on it's queue.		// A sleeping thread should not have any tasks on it's queue.
// There is a slight possibility that it resumes, steals a task		// There is a slight possibility that it resumes, steals a task
// from another thread, which spawns more tasks, all in the time		// from another thread, which spawns more tasks, all in the time
// that it takes this thread to check => don't write an assertion		// that it takes this thread to check => don't write an assertion
// that the victim's queue is empty. Try stealing from a		// that the victim's queue is empty. Try stealing from a
// different thread.		// different thread.
}		}
} while (asleep);		} while (asleep);
▲ Show 20 Lines • Show All 132 Lines • ▼ Show 20 Lines	int __kmp_execute_tasks_64(
kmp_info_t thread, kmp_int32 gtid, kmp_flag_64<C, S> flag, int final_spin,		kmp_info_t thread, kmp_int32 gtid, kmp_flag_64<C, S> flag, int final_spin,
int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),		int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),
kmp_int32 is_constrained) {		kmp_int32 is_constrained) {
return __kmp_execute_tasks_template(		return __kmp_execute_tasks_template(
thread, gtid, flag, final_spin,		thread, gtid, flag, final_spin,
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);		thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}		}

		template <bool C, bool S>
		int __kmp_atomic_execute_tasks_64(
		kmp_info_t thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> flag,
		int final_spin, int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),
		kmp_int32 is_constrained) {
		return __kmp_execute_tasks_template(
		thread, gtid, flag, final_spin,
		thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
		}

int __kmp_execute_tasks_oncore(		int __kmp_execute_tasks_oncore(
kmp_info_t thread, kmp_int32 gtid, kmp_flag_oncore flag, int final_spin,		kmp_info_t thread, kmp_int32 gtid, kmp_flag_oncore flag, int final_spin,
int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),		int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),
kmp_int32 is_constrained) {		kmp_int32 is_constrained) {
return __kmp_execute_tasks_template(		return __kmp_execute_tasks_template(
thread, gtid, flag, final_spin,		thread, gtid, flag, final_spin,
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);		thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}		}
Show All 10 Lines	template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
kmp_int32);		kmp_int32);

template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,		template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
kmp_flag_64<true, false> *,		kmp_flag_64<true, false> *,
int,		int,
int USE_ITT_BUILD_ARG(void ),		int USE_ITT_BUILD_ARG(void ),
kmp_int32);		kmp_int32);

		template int __kmp_atomic_execute_tasks_64<false, true>(
		kmp_info_t , kmp_int32, kmp_atomic_flag_64<false, true> , int,
		int USE_ITT_BUILD_ARG(void ), kmp_int32);

		template int __kmp_atomic_execute_tasks_64<true, false>(
		kmp_info_t , kmp_int32, kmp_atomic_flag_64<true, false> , int,
		int USE_ITT_BUILD_ARG(void ), kmp_int32);

// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the		// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
// next barrier so they can assist in executing enqueued tasks.		// next barrier so they can assist in executing enqueued tasks.
// First thread in allocates the task team atomically.		// First thread in allocates the task team atomically.
static void __kmp_enable_tasking(kmp_task_team_t *task_team,		static void __kmp_enable_tasking(kmp_task_team_t *task_team,
kmp_info_t *this_thr) {		kmp_info_t *this_thr) {
kmp_thread_data_t *threads_data;		kmp_thread_data_t *threads_data;
int nthreads, i, is_init_thread;		int nthreads, i, is_init_thread;

Show All 22 Lines	static void __kmp_enable_tasking(kmp_task_team_t *task_team,
KMP_DEBUG_ASSERT(threads_data != NULL);		KMP_DEBUG_ASSERT(threads_data != NULL);

if (__kmp_tasking_mode == tskm_task_teams &&		if (__kmp_tasking_mode == tskm_task_teams &&
(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {		(__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
// Release any threads sleeping at the barrier, so that they can steal		// Release any threads sleeping at the barrier, so that they can steal
// tasks and execute them. In extra barrier mode, tasks do not sleep		// tasks and execute them. In extra barrier mode, tasks do not sleep
// at the separate tasking barrier, so this isn't a problem.		// at the separate tasking barrier, so this isn't a problem.
for (i = 0; i < nthreads; i++) {		for (i = 0; i < nthreads; i++) {
volatile void *sleep_loc;		void *sleep_loc;
kmp_info_t *thread = threads_data[i].td.td_thr;		kmp_info_t *thread = threads_data[i].td.td_thr;

if (i == this_thr->th.th_info.ds.ds_tid) {		if (i == this_thr->th.th_info.ds.ds_tid) {
continue;		continue;
}		}
// Since we haven't locked the thread's suspend mutex lock at this		// Since we haven't locked the thread's suspend mutex lock at this
// point, there is a small window where a thread might be putting		// point, there is a small window where a thread might be putting
// itself to sleep, but hasn't set the th_sleep_loc field yet.		// itself to sleep, but hasn't set the th_sleep_loc field yet.
// To work around this, __kmp_execute_tasks_template() periodically checks		// To work around this, __kmp_execute_tasks_template() periodically checks
// see if other threads are sleeping (using the same random mechanism that		// see if other threads are sleeping (using the same random mechanism that
// is used for task stealing) and awakens them if they are.		// is used for task stealing) and awakens them if they are.
if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=		if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
NULL) {		NULL) {
KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",		KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),		__kmp_gtid_from_thread(this_thr),
__kmp_gtid_from_thread(thread)));		__kmp_gtid_from_thread(thread)));
__kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);		__kmp_null_resume_wrapper(thread);
} else {		} else {
KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",		KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
__kmp_gtid_from_thread(this_thr),		__kmp_gtid_from_thread(this_thr),
__kmp_gtid_from_thread(thread)));		__kmp_gtid_from_thread(thread)));
}		}
}		}
}		}

▲ Show 20 Lines • Show All 351 Lines • ▼ Show 20 Lines	#endif

done = FALSE; // Because th_task_team pointer is not NULL for this thread		done = FALSE; // Because th_task_team pointer is not NULL for this thread

KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "		KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
"unreference task_team\n",		"unreference task_team\n",
__kmp_gtid_from_thread(thread)));		__kmp_gtid_from_thread(thread)));

if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {		if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
volatile void *sleep_loc;		void *sleep_loc;
// If the thread is sleeping, awaken it.		// If the thread is sleeping, awaken it.
if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=		if ((sleep_loc = TCR_PTR(CCAST(void *, thread->th.th_sleep_loc))) !=
NULL) {		NULL) {
KA_TRACE(		KA_TRACE(
10,		10,
("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",		("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
__kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));		__kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
__kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);		__kmp_null_resume_wrapper(thread);
}		}
}		}
}		}
if (done) {		if (done) {
break;		break;
}		}

// If oversubscribed or have waited a bit, yield.		// If oversubscribed or have waited a bit, yield.
▲ Show 20 Lines • Show All 1,196 Lines • Show Last 20 Lines

openmp/runtime/src/kmp_wait_release.h

Show All 27 Lines
higher level operations such as barriers and fork/join.		higher level operations such as barriers and fork/join.
*/		*/

/*!		/*!
@ingroup WAIT_RELEASE		@ingroup WAIT_RELEASE
@{		@{
*/		*/

/*!
* The flag_type describes the storage used for the flag.
*/
enum flag_type {
flag32, /*< 32 bit flags /
flag64, /*< 64 bit flags /
flag_oncore /*< special 64-bit flag for on-core barrier (hierarchical) /
};

struct flag_properties {		struct flag_properties {
unsigned int type : 16;		unsigned int type : 16;
unsigned int reserved : 16;		unsigned int reserved : 16;
};		};

/*!		template <enum flag_type FlagType> struct flag_traits {};
* Base class for wait/release volatile flag
*/		template <> struct flag_traits<flag32> {
template <typename P> class kmp_flag_native {		typedef kmp_uint32 flag_t;
volatile P *loc;		static const flag_type t = flag32;
flag_properties t;		static inline flag_t tcr(flag_t f) { return TCR_4(f); }
		static inline flag_t test_then_add4(volatile flag_t *f) {
		return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
		}
		static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
		return KMP_TEST_THEN_OR32(f, v);
		}
		static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
		return KMP_TEST_THEN_AND32(f, v);
		}
		};

		template <> struct flag_traits<atomic_flag64> {
		typedef kmp_uint64 flag_t;
		static const flag_type t = atomic_flag64;
		static inline flag_t tcr(flag_t f) { return TCR_8(f); }
		static inline flag_t test_then_add4(volatile flag_t *f) {
		return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
		}
		static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
		return KMP_TEST_THEN_OR64(f, v);
		}
		static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
		return KMP_TEST_THEN_AND64(f, v);
		}
		};

		template <> struct flag_traits<flag64> {
		typedef kmp_uint64 flag_t;
		static const flag_type t = flag64;
		static inline flag_t tcr(flag_t f) { return TCR_8(f); }
		static inline flag_t test_then_add4(volatile flag_t *f) {
		return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
		}
		static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
		return KMP_TEST_THEN_OR64(f, v);
		}
		static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
		return KMP_TEST_THEN_AND64(f, v);
		}
		};

		template <> struct flag_traits<flag_oncore> {
		typedef kmp_uint64 flag_t;
		static const flag_type t = flag_oncore;
		static inline flag_t tcr(flag_t f) { return TCR_8(f); }
		static inline flag_t test_then_add4(volatile flag_t *f) {
		return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
		}
		static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
		return KMP_TEST_THEN_OR64(f, v);
		}
		static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
		return KMP_TEST_THEN_AND64(f, v);
		}
		};

		/! Base class for all flags /
		template <flag_type FlagType> class kmp_flag {
		protected:
		flag_properties t; /*< "Type" of the flag in loc /
		kmp_info_t waiting_threads[1]; /< Threads sleeping on this thread. /
		kmp_uint32 num_waiting_threads; /*< #threads sleeping on this thread. /
		std::atomic<bool> *sleepLoc;

public:		public:
typedef P flag_t;		typedef flag_traits<FlagType> traits_type;
kmp_flag_native(volatile P *p, flag_type ft)		kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
: loc(p), t({(short unsigned int)ft, 0U}) {}		kmp_flag(int nwaiters)
volatile P *get() { return loc; }		: t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
void get_void_p() { return RCAST(void , CCAST(P *, loc)); }		kmp_flag(std::atomic<bool> *sloc)
void set(volatile P *new_loc) { loc = new_loc; }		: t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
		/! @result the flag_type /
flag_type get_type() { return (flag_type)(t.type); }		flag_type get_type() { return (flag_type)(t.type); }
P load() { return *loc; }
void store(P val) { *loc = val; }		/*! param i in index into waiting_threads
		* @result the thread that is waiting at index i */
		kmp_info_t *get_waiter(kmp_uint32 i) {
		KMP_DEBUG_ASSERT(i < num_waiting_threads);
		return waiting_threads[i];
		}
		/! @result num_waiting_threads /
		kmp_uint32 get_num_waiters() { return num_waiting_threads; }
		/*! @param thr in the thread which is now waiting
		* Insert a waiting thread at index 0. */
		void set_waiter(kmp_info_t *thr) {
		waiting_threads[0] = thr;
		num_waiting_threads = 1;
		}
		enum barrier_type get_bt() { return bs_last_barrier; }
};		};

/*!		/! Base class for wait/release volatile flag /
* Base class for wait/release atomic flag		template <typename PtrType, flag_type FlagType, bool Sleepable>
*/		class kmp_flag_native : public kmp_flag<FlagType> {
template <typename P> class kmp_flag {		protected:
std::atomic<P>		volatile PtrType *loc;
loc; /*< Pointer to the flag storage that is modified by another thread		PtrType checker; /*< When flag==checker, it has been released. /
*/		typedef flag_traits<FlagType> traits_type;
flag_properties t; /*< "Type" of the flag in loc /
public:		public:
typedef P flag_t;		typedef PtrType flag_t;
kmp_flag(std::atomic<P> *p, flag_type ft)		kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
: loc(p), t({(short unsigned int)ft, 0U}) {}		kmp_flag_native(volatile PtrType p, kmp_info_t thr)
/*!		: kmp_flag<FlagType>(1), loc(p) {
* @result the pointer to the actual flag		this->waiting_threads[0] = thr;
*/		}
std::atomic<P> *get() { return loc; }		kmp_flag_native(volatile PtrType *p, PtrType c)
/*!		: kmp_flag<FlagType>(), loc(p), checker(c) {}
* @result void* pointer to the actual flag		kmp_flag_native(volatile PtrType p, PtrType c, std::atomic<bool> sloc)
*/		: kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
		virtual ~kmp_flag_native() {}
		volatile PtrType *get() { return loc; }
		void get_void_p() { return RCAST(void , CCAST(PtrType *, loc)); }
		void set(volatile PtrType *new_loc) { loc = new_loc; }
		PtrType load() { return *loc; }
		void store(PtrType val) { *loc = val; }
		/! @result true if the flag object has been released. /
		virtual bool done_check() {
		if (Sleepable && !(this->sleepLoc))
		return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
		checker;
		else
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] not useful Lint: Pre-merge checks: clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] [[https://github.
		return traits_type::tcr(*(this->get())) == checker;
		}
		/*! @param old_loc in old value of flag
		* @result true if the flag's old value indicates it was released. */
		virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
		/*! @result true if the flag object is not yet released.
		* Used in __kmp_wait_template like:
		* @code
		* while (flag.notdone_check()) { pause(); }
		* @endcode */
		virtual bool notdone_check() {
		return traits_type::tcr(*(this->get())) != checker;
		}
		/*! @result Actual flag value before release was applied.
		* Trigger all waiting threads to run by modifying flag to release state. */
		void internal_release() {
		(void)traits_type::test_then_add4((volatile PtrType *)this->get());
		}
		/*! @result Actual flag value before sleep bit(s) set.
		* Notes that there is at least one thread sleeping on the flag by setting
		* sleep bit(s). */
		PtrType set_sleeping() {
		if (this->sleepLoc) {
		this->sleepLoc->store(true);
		return *(this->get());
		}
		return traits_type::test_then_or((volatile PtrType *)this->get(),
		KMP_BARRIER_SLEEP_STATE);
		}
		/*! @result Actual flag value before sleep bit(s) cleared.
		* Notes that there are no longer threads sleeping on the flag by clearing
		* sleep bit(s). */
		void unset_sleeping() {
		if (this->sleepLoc) {
		this->sleepLoc->store(false);
		return;
		}
		traits_type::test_then_and((volatile PtrType *)this->get(),
		~KMP_BARRIER_SLEEP_STATE);
		}
		/*! @param old_loc in old value of flag
		* Test if there are threads sleeping on the flag's old value in old_loc. */
		bool is_sleeping_val(PtrType old_loc) {
		if (this->sleepLoc)
		return this->sleepLoc->load();
		return old_loc & KMP_BARRIER_SLEEP_STATE;
		}
		/! Test whether there are threads sleeping on the flag. /
		bool is_sleeping() {
		if (this->sleepLoc)
		return this->sleepLoc->load();
		return is_sleeping_val(*(this->get()));
		}
		bool is_any_sleeping() {
		if (this->sleepLoc)
		return this->sleepLoc->load();
		return is_sleeping_val(*(this->get()));
		}
		kmp_uint8 *get_stolen() { return NULL; }
		};

		/! Base class for wait/release atomic flag /
		template <typename PtrType, flag_type FlagType, bool Sleepable>
		class kmp_flag_atomic : public kmp_flag<FlagType> {
		protected:
		std::atomic<PtrType> loc; /< Pointer to flag location to wait on /
		PtrType checker; /*< Flag == checker means it has been released. /
		public:
		typedef flag_traits<FlagType> traits_type;
		typedef PtrType flag_t;
		kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
		kmp_flag_atomic(std::atomic<PtrType> p, kmp_info_t thr)
		: kmp_flag<FlagType>(1), loc(p) {
		this->waiting_threads[0] = thr;
		}
		kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
		: kmp_flag<FlagType>(), loc(p), checker(c) {}
		kmp_flag_atomic(std::atomic<PtrType> p, PtrType c, std::atomic<bool> sloc)
		: kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
		/! @result the pointer to the actual flag /
		std::atomic<PtrType> *get() { return loc; }
		/! @result void pointer to the actual flag */
void get_void_p() { return RCAST(void , loc); }		void get_void_p() { return RCAST(void , loc); }
/*!		/! @param new_loc in set loc to point at new_loc /
* @param new_loc in set loc to point at new_loc		void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
*/		/! @result flag value /
void set(std::atomic<P> *new_loc) { loc = new_loc; }		PtrType load() { return loc->load(std::memory_order_acquire); }
/*!		/! @param val the new flag value to be stored /
* @result the flag_type		void store(PtrType val) { loc->store(val, std::memory_order_release); }
*/		/! @result true if the flag object has been released. /
flag_type get_type() { return (flag_type)(t.type); }		bool done_check() {
/*!		if (Sleepable && !(this->sleepLoc))
* @result flag value		return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
*/		else
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] not useful Lint: Pre-merge checks: clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] [[https://github.
P load() { return loc->load(std::memory_order_acquire); }		return this->load() == checker;
/*!		}
* @param val the new flag value to be stored		/*! @param old_loc in old value of flag
*/		* @result true if the flag's old value indicates it was released. */
void store(P val) { loc->store(val, std::memory_order_release); }		bool done_check_val(PtrType old_loc) { return old_loc == checker; }
// Derived classes must provide the following:		/*! @result true if the flag object is not yet released.
/*		* Used in __kmp_wait_template like:
kmp_info_t * get_waiter(kmp_uint32 i);		* @code
kmp_uint32 get_num_waiters();		* while (flag.notdone_check()) { pause(); }
bool done_check();		* @endcode */
bool done_check_val(P old_loc);		bool notdone_check() { return this->load() != checker; }
bool notdone_check();		/*! @result Actual flag value before release was applied.
P internal_release();		* Trigger all waiting threads to run by modifying flag to release state. */
void suspend(int th_gtid);		void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
void mwait(int th_gtid);		/*! @result Actual flag value before sleep bit(s) set.
void resume(int th_gtid);		* Notes that there is at least one thread sleeping on the flag by setting
P set_sleeping();		* sleep bit(s). */
P unset_sleeping();		PtrType set_sleeping() {
bool is_sleeping();		if (this->sleepLoc) {
bool is_any_sleeping();		this->sleepLoc->store(true);
bool is_sleeping_val(P old_loc);		return *(this->get());
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,		}
int *thread_finished		return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32		}
is_constrained);		/*! @result Actual flag value before sleep bit(s) cleared.
*/		* Notes that there are no longer threads sleeping on the flag by clearing
		* sleep bit(s). */
		void unset_sleeping() {
		if (this->sleepLoc) {
		this->sleepLoc->store(false);
		return;
		}
		KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
		}
		/*! @param old_loc in old value of flag
		* Test whether there are threads sleeping on flag's old value in old_loc. */
		bool is_sleeping_val(PtrType old_loc) {
		if (this->sleepLoc)
		return this->sleepLoc->load();
		return old_loc & KMP_BARRIER_SLEEP_STATE;
		}
		/! Test whether there are threads sleeping on the flag. /
		bool is_sleeping() {
		if (this->sleepLoc)
		return this->sleepLoc->load();
		return is_sleeping_val(this->load());
		}
		bool is_any_sleeping() {
		if (this->sleepLoc)
		return this->sleepLoc->load();
		return is_sleeping_val(this->load());
		}
		kmp_uint8 *get_stolen() { return NULL; }
};		};

#if OMPT_SUPPORT		#if OMPT_SUPPORT
OMPT_NOINLINE		OMPT_NOINLINE
static void __ompt_implicit_task_end(kmp_info_t *this_thr,		static void __ompt_implicit_task_end(kmp_info_t *this_thr,
ompt_state_t ompt_state,		ompt_state_t ompt_state,
ompt_data_t *tId) {		ompt_data_t *tId) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;		int ds_tid = this_thr->th.th_info.ds.ds_tid;
▲ Show 20 Lines • Show All 125 Lines • ▼ Show 20 Lines
*/		*/
#if OMPT_SUPPORT		#if OMPT_SUPPORT
ompt_state_t ompt_entry_state;		ompt_state_t ompt_entry_state;
ompt_data_t *tId;		ompt_data_t *tId;
if (ompt_enabled.enabled) {		if (ompt_enabled.enabled) {
ompt_entry_state = this_thr->th.ompt_thread_info.state;		ompt_entry_state = this_thr->th.ompt_thread_info.state;
if (!final_spin \|\| ompt_entry_state != ompt_state_wait_barrier_implicit \|\|		if (!final_spin \|\| ompt_entry_state != ompt_state_wait_barrier_implicit \|\|
KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {		KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
ompt_lw_taskteam_t *team =		ompt_lw_taskteam_t *team = NULL;
this_thr->th.th_team->t.ompt_serialized_team_info;		if (this_thr->th.th_team)
		team = this_thr->th.th_team->t.ompt_serialized_team_info;
if (team) {		if (team) {
tId = &(team->ompt_task_info.task_data);		tId = &(team->ompt_task_info.task_data);
} else {		} else {
tId = OMPT_CUR_TASK_DATA(this_thr);		tId = OMPT_CUR_TASK_DATA(this_thr);
}		}
} else {		} else {
tId = &(this_thr->th.ompt_thread_info.task_data);		tId = &(this_thr->th.ompt_thread_info.task_data);
}		}
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	if (__kmp_tasking_mode != tskm_immediate_exec) {
1) A newly-created thread is first being released by		1) A newly-created thread is first being released by
__kmp_fork_barrier(), and its task team has not been set up yet.		__kmp_fork_barrier(), and its task team has not been set up yet.
2) All tasks have been executed to completion.		2) All tasks have been executed to completion.
3) Tasking is off for this region. This could be because we are in a		3) Tasking is off for this region. This could be because we are in a
serialized region (perhaps the outer one), or else tasking was manually		serialized region (perhaps the outer one), or else tasking was manually
disabled (KMP_TASKING=0). */		disabled (KMP_TASKING=0). */
if (task_team != NULL) {		if (task_team != NULL) {
if (TCR_SYNC_4(task_team->tt.tt_active)) {		if (TCR_SYNC_4(task_team->tt.tt_active)) {
if (KMP_TASKING_ENABLED(task_team))		if (KMP_TASKING_ENABLED(task_team)) {
flag->execute_tasks(		flag->execute_tasks(
this_thr, th_gtid, final_spin,		this_thr, th_gtid, final_spin,
&tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);		&tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
else		} else
this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;		this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
} else {		} else {
KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));		KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
#if OMPT_SUPPORT		#if OMPT_SUPPORT
// task-team is done now, other cases should be catched above		// task-team is done now, other cases should be catched above
if (final_spin && ompt_enabled.enabled)		if (final_spin && ompt_enabled.enabled)
__ompt_implicit_task_end(this_thr, ompt_entry_state, tId);		__ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
#endif		#endif
▲ Show 20 Lines • Show All 196 Lines • ▼ Show 20 Lines	#endif
// To avoid a race, check flag between 'monitor' and 'mwait'. A write to		// To avoid a race, check flag between 'monitor' and 'mwait'. A write to
// the address could happen after the last time we checked and before		// the address could happen after the last time we checked and before
// monitoring started, in which case monitor can't detect the change.		// monitoring started, in which case monitor can't detect the change.
if (flag->done_check())		if (flag->done_check())
flag->unset_sleeping();		flag->unset_sleeping();
else {		else {
// if flag changes here, wake-up happens immediately		// if flag changes here, wake-up happens immediately
TCW_PTR(th->th.th_sleep_loc, (void *)flag);		TCW_PTR(th->th.th_sleep_loc, (void *)flag);
		th->th.th_sleep_loc_type = flag->get_type();
__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);
KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));		KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
#if KMP_HAVE_UMWAIT		#if KMP_HAVE_UMWAIT
if (__kmp_umwait_enabled) {		if (__kmp_umwait_enabled) {
__kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter		__kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
}		}
#elif KMP_HAVE_MWAIT		#elif KMP_HAVE_MWAIT
if (__kmp_mwait_enabled) {		if (__kmp_mwait_enabled) {
__kmp_mm_mwait(0, __kmp_mwait_hints);		__kmp_mm_mwait(0, __kmp_mwait_hints);
}		}
#endif		#endif
KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));		KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
__kmp_lock_suspend_mx(th);		__kmp_lock_suspend_mx(th);
// Clean up sleep info; doesn't matter how/why this thread stopped waiting		// Clean up sleep info; doesn't matter how/why this thread stopped waiting
if (flag->is_sleeping())		if (flag->is_sleeping())
flag->unset_sleeping();		flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;
}		}
// Mark thread as active again		// Mark thread as active again
th->th.th_active = TRUE;		th->th.th_active = TRUE;
if (TCR_4(th->th.th_in_pool)) {		if (TCR_4(th->th.th_in_pool)) {
KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);		KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
th->th.th_active_in_pool = TRUE;		th->th.th_active_in_pool = TRUE;
}		}
} // Drop out to main wait loop to check flag, handle tasks, etc.		} // Drop out to main wait loop to check flag, handle tasks, etc.
Show All 34 Lines	if (flag->is_any_sleeping()) {
gtid, wait_gtid, flag->get()));		gtid, wait_gtid, flag->get()));
flag->resume(wait_gtid); // unsets flag's current_waiter when done		flag->resume(wait_gtid); // unsets flag's current_waiter when done
}		}
}		}
}		}
}		}
}		}

template <typename FlagType> struct flag_traits {};

template <> struct flag_traits<kmp_uint32> {
typedef kmp_uint32 flag_t;
static const flag_type t = flag32;
static inline flag_t tcr(flag_t f) { return TCR_4(f); }
static inline flag_t test_then_add4(volatile flag_t *f) {
return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
}
static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_OR32(f, v);
}
static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_AND32(f, v);
}
};

template <> struct flag_traits<kmp_uint64> {
typedef kmp_uint64 flag_t;
static const flag_type t = flag64;
static inline flag_t tcr(flag_t f) { return TCR_8(f); }
static inline flag_t test_then_add4(volatile flag_t *f) {
return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
}
static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_OR64(f, v);
}
static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
return KMP_TEST_THEN_AND64(f, v);
}
};

// Basic flag that does not use C11 Atomics
template <typename FlagType, bool Sleepable>
class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
typedef flag_traits<FlagType> traits_type;
FlagType checker; /**< Value to compare flag to to check if flag has been
released. */
kmp_info_t
waiting_threads[1]; /< Array of threads sleeping on this thread. /
kmp_uint32
num_waiting_threads; /*< Number of threads sleeping on this thread. /
public:
kmp_basic_flag_native(volatile FlagType *p)
: kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
kmp_basic_flag_native(volatile FlagType p, kmp_info_t thr)
: kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
waiting_threads[0] = thr;
}
kmp_basic_flag_native(volatile FlagType *p, FlagType c)
: kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
num_waiting_threads(0) {}
/*!
* param i in index into waiting_threads
* @result the thread that is waiting at index i
*/
kmp_info_t *get_waiter(kmp_uint32 i) {
KMP_DEBUG_ASSERT(i < num_waiting_threads);
return waiting_threads[i];
}
/*!
* @result num_waiting_threads
*/
kmp_uint32 get_num_waiters() { return num_waiting_threads; }
/*!
* @param thr in the thread which is now waiting
*
* Insert a waiting thread at index 0.
*/
void set_waiter(kmp_info_t *thr) {
waiting_threads[0] = thr;
num_waiting_threads = 1;
}
/*!
* @result true if the flag object has been released.
*/
bool done_check() {
if (Sleepable)
return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
checker;
else
return traits_type::tcr(*(this->get())) == checker;
}
/*!
* @param old_loc in old value of flag
* @result true if the flag's old value indicates it was released.
*/
bool done_check_val(FlagType old_loc) { return old_loc == checker; }
/*!
* @result true if the flag object is not yet released.
* Used in __kmp_wait_template like:
* @code
* while (flag.notdone_check()) { pause(); }
* @endcode
*/
bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
/*!
* @result Actual flag value before release was applied.
* Trigger all waiting threads to run by modifying flag to release state.
*/
void internal_release() {
(void)traits_type::test_then_add4((volatile FlagType *)this->get());
}
/*!
* @result Actual flag value before sleep bit(s) set.
* Notes that there is at least one thread sleeping on the flag by setting
* sleep bit(s).
*/
FlagType set_sleeping() {
return traits_type::test_then_or((volatile FlagType *)this->get(),
KMP_BARRIER_SLEEP_STATE);
}
/*!
* @result Actual flag value before sleep bit(s) cleared.
* Notes that there are no longer threads sleeping on the flag by clearing
* sleep bit(s).
*/
FlagType unset_sleeping() {
return traits_type::test_then_and((volatile FlagType *)this->get(),
~KMP_BARRIER_SLEEP_STATE);
}
/*!
* @param old_loc in old value of flag
* Test whether there are threads sleeping on the flag's old value in old_loc.
*/
bool is_sleeping_val(FlagType old_loc) {
return old_loc & KMP_BARRIER_SLEEP_STATE;
}
/*!
* Test whether there are threads sleeping on the flag.
*/
bool is_sleeping() { return is_sleeping_val(*(this->get())); }
bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
kmp_uint8 *get_stolen() { return NULL; }
enum barrier_type get_bt() { return bs_last_barrier; }
};

template <typename FlagType, bool Sleepable>
class kmp_basic_flag : public kmp_flag<FlagType> {
typedef flag_traits<FlagType> traits_type;
FlagType checker; /**< Value to compare flag to to check if flag has been
released. */
kmp_info_t
waiting_threads[1]; /< Array of threads sleeping on this thread. /
kmp_uint32
num_waiting_threads; /*< Number of threads sleeping on this thread. /
public:
kmp_basic_flag(std::atomic<FlagType> *p)
: kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
kmp_basic_flag(std::atomic<FlagType> p, kmp_info_t thr)
: kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
waiting_threads[0] = thr;
}
kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
: kmp_flag<FlagType>(p, traits_type::t), checker(c),
num_waiting_threads(0) {}
/*!
* param i in index into waiting_threads
* @result the thread that is waiting at index i
*/
kmp_info_t *get_waiter(kmp_uint32 i) {
KMP_DEBUG_ASSERT(i < num_waiting_threads);
return waiting_threads[i];
}
/*!
* @result num_waiting_threads
*/
kmp_uint32 get_num_waiters() { return num_waiting_threads; }
/*!
* @param thr in the thread which is now waiting
*
* Insert a waiting thread at index 0.
*/
void set_waiter(kmp_info_t *thr) {
waiting_threads[0] = thr;
num_waiting_threads = 1;
}
/*!
* @result true if the flag object has been released.
*/
bool done_check() {
if (Sleepable)
return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
else
return this->load() == checker;
}
/*!
* @param old_loc in old value of flag
* @result true if the flag's old value indicates it was released.
*/
bool done_check_val(FlagType old_loc) { return old_loc == checker; }
/*!
* @result true if the flag object is not yet released.
* Used in __kmp_wait_template like:
* @code
* while (flag.notdone_check()) { pause(); }
* @endcode
*/
bool notdone_check() { return this->load() != checker; }
/*!
* @result Actual flag value before release was applied.
* Trigger all waiting threads to run by modifying flag to release state.
*/
void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
/*!
* @result Actual flag value before sleep bit(s) set.
* Notes that there is at least one thread sleeping on the flag by setting
* sleep bit(s).
*/
FlagType set_sleeping() {
return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
}
/*!
* @result Actual flag value before sleep bit(s) cleared.
* Notes that there are no longer threads sleeping on the flag by clearing
* sleep bit(s).
*/
FlagType unset_sleeping() {
return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
}
/*!
* @param old_loc in old value of flag
* Test whether there are threads sleeping on the flag's old value in old_loc.
*/
bool is_sleeping_val(FlagType old_loc) {
return old_loc & KMP_BARRIER_SLEEP_STATE;
}
/*!
* Test whether there are threads sleeping on the flag.
*/
bool is_sleeping() { return is_sleeping_val(this->load()); }
bool is_any_sleeping() { return is_sleeping_val(this->load()); }
kmp_uint8 *get_stolen() { return NULL; }
enum barrier_type get_bt() { return bs_last_barrier; }
};

template <bool Cancellable, bool Sleepable>		template <bool Cancellable, bool Sleepable>
class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {		class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
public:		public:
kmp_flag_32(std::atomic<kmp_uint32> *p)		kmp_flag_32(std::atomic<kmp_uint32> *p)
: kmp_basic_flag<kmp_uint32, Sleepable>(p) {}		: kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
kmp_flag_32(std::atomic<kmp_uint32> p, kmp_info_t thr)		kmp_flag_32(std::atomic<kmp_uint32> p, kmp_info_t thr)
: kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}		: kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)		kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
: kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}		: kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }		void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
#if KMP_HAVE_MWAIT \|\| KMP_HAVE_UMWAIT		#if KMP_HAVE_MWAIT \|\| KMP_HAVE_UMWAIT
void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }		void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
#endif		#endif
void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }		void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,		int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),		int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),
kmp_int32 is_constrained) {		kmp_int32 is_constrained) {
Show All 10 Lines	else
return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(		return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));		this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
}		}
void release() { __kmp_release_template(this); }		void release() { __kmp_release_template(this); }
flag_type get_ptr_type() { return flag32; }		flag_type get_ptr_type() { return flag32; }
};		};

template <bool Cancellable, bool Sleepable>		template <bool Cancellable, bool Sleepable>
class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {		class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
public:		public:
kmp_flag_64(volatile kmp_uint64 *p)		kmp_flag_64(volatile kmp_uint64 *p)
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}		: kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
kmp_flag_64(volatile kmp_uint64 p, kmp_info_t thr)		kmp_flag_64(volatile kmp_uint64 p, kmp_info_t thr)
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}		: kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)		kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
: kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}		: kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
		kmp_flag_64(volatile kmp_uint64 p, kmp_uint64 c, std::atomic<bool> loc)
		: kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }		void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
#if KMP_HAVE_MWAIT \|\| KMP_HAVE_UMWAIT		#if KMP_HAVE_MWAIT \|\| KMP_HAVE_UMWAIT
void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }		void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
#endif		#endif
void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }		void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,		int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),		int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),
kmp_int32 is_constrained) {		kmp_int32 is_constrained) {
Show All 9 Lines	bool wait(kmp_info_t *this_thr,
else		else
return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(		return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));		this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
}		}
void release() { __kmp_release_template(this); }		void release() { __kmp_release_template(this); }
flag_type get_ptr_type() { return flag64; }		flag_type get_ptr_type() { return flag64; }
};		};

		template <bool Cancellable, bool Sleepable>
		class kmp_atomic_flag_64
		: public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
		public:
		kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
		: kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
		kmp_atomic_flag_64(std::atomic<kmp_uint64> p, kmp_info_t thr)
		: kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
		kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
		: kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
		kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
		std::atomic<bool> *loc)
		: kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
		void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
		void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
		void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
		int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
		int thread_finished USE_ITT_BUILD_ARG(void itt_sync_obj),
		kmp_int32 is_constrained) {
		return __kmp_atomic_execute_tasks_64(
		this_thr, gtid, this, final_spin,
		thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
		}
		bool wait(kmp_info_t *this_thr,
		int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
		if (final_spin)
		return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
		Sleepable>(
		this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
		else
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] not useful Lint: Pre-merge checks: clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] [[https://github.
		return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
		Sleepable>(
		this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
		}
		void release() { __kmp_release_template(this); }
		flag_type get_ptr_type() { return atomic_flag64; }
		};

// Hierarchical 64-bit on-core barrier instantiation		// Hierarchical 64-bit on-core barrier instantiation
class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {		class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
kmp_uint64 checker;		kmp_uint32 offset; /*< Portion of flag of interest for an operation. /
kmp_info_t *waiting_threads[1];
kmp_uint32 num_waiting_threads;
kmp_uint32
offset; /*< Portion of flag that is of interest for an operation. /
bool flag_switch; /*< Indicates a switch in flag location. /		bool flag_switch; /*< Indicates a switch in flag location. /
enum barrier_type bt; /*< Barrier type. /		enum barrier_type bt; /*< Barrier type. /
kmp_info_t this_thr; /*< Thread that may be redirected to different flag		kmp_info_t this_thr; /< Thread to redirect to different flag location. /
location. */
#if USE_ITT_BUILD		#if USE_ITT_BUILD
void *		void itt_sync_obj; /< ITT object to pass to new flag location. /
itt_sync_obj; /*< ITT object that must be passed to new flag location. /
#endif		#endif
unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {		unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
return (RCAST(unsigned char , CCAST(kmp_uint64 , loc)))[offset];		return (RCAST(unsigned char , CCAST(kmp_uint64 , loc)))[offset];
}		}

public:		public:
kmp_flag_oncore(volatile kmp_uint64 *p)		kmp_flag_oncore(volatile kmp_uint64 *p)
: kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),		: kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
flag_switch(false) {}		}
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)		kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
: kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),		: kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
offset(idx), flag_switch(false) {}		flag_switch(false),
		bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {}
kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,		kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
enum barrier_type bar_t,		enum barrier_type bar_t,
kmp_info_t thr USE_ITT_BUILD_ARG(void itt))		kmp_info_t thr USE_ITT_BUILD_ARG(void itt))
: kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),		: kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),		flag_switch(false), bt(bar_t),
this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}		this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
kmp_info_t *get_waiter(kmp_uint32 i) {		virtual ~kmp_flag_oncore() override {}
KMP_DEBUG_ASSERT(i < num_waiting_threads);		bool done_check_val(kmp_uint64 old_loc) override {
return waiting_threads[i];
}
kmp_uint32 get_num_waiters() { return num_waiting_threads; }
void set_waiter(kmp_info_t *thr) {
waiting_threads[0] = thr;
num_waiting_threads = 1;
}
bool done_check_val(kmp_uint64 old_loc) {
return byteref(&old_loc, offset) == checker;		return byteref(&old_loc, offset) == checker;
}		}
bool done_check() { return done_check_val(*get()); }		bool done_check() override { return done_check_val(*get()); }
bool notdone_check() {		bool notdone_check() override {
// Calculate flag_switch		// Calculate flag_switch
if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)		if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
flag_switch = true;		flag_switch = true;
if (byteref(get(), offset) != 1 && !flag_switch)		if (byteref(get(), offset) != 1 && !flag_switch)
return true;		return true;
else if (flag_switch) {		else if (flag_switch) {
this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;		this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,		kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
(kmp_uint64)KMP_BARRIER_STATE_BUMP);		(kmp_uint64)KMP_BARRIER_STATE_BUMP);
__kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));		__kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
}		}
return false;		return false;
}		}
void internal_release() {		void internal_release() {
// Other threads can write their own bytes simultaneously.		// Other threads can write their own bytes simultaneously.
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {		if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
byteref(get(), offset) = 1;		byteref(get(), offset) = 1;
} else {		} else {
kmp_uint64 mask = 0;		kmp_uint64 mask = 0;
byteref(&mask, offset) = 1;		byteref(&mask, offset) = 1;
KMP_TEST_THEN_OR64(get(), mask);		KMP_TEST_THEN_OR64(get(), mask);
}		}
}		}
kmp_uint64 set_sleeping() {
return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
}
kmp_uint64 unset_sleeping() {
return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
}
bool is_sleeping_val(kmp_uint64 old_loc) {
return old_loc & KMP_BARRIER_SLEEP_STATE;
}
bool is_sleeping() { return is_sleeping_val(*get()); }
bool is_any_sleeping() { return is_sleeping_val(*get()); }
void wait(kmp_info_t *this_thr, int final_spin) {		void wait(kmp_info_t *this_thr, int final_spin) {
if (final_spin)		if (final_spin)
__kmp_wait_template<kmp_flag_oncore, TRUE>(		__kmp_wait_template<kmp_flag_oncore, TRUE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));		this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
else		else
__kmp_wait_template<kmp_flag_oncore, FALSE>(		__kmp_wait_template<kmp_flag_oncore, FALSE>(
this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));		this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
}		}
Show All 14 Lines	if (ompd_state & OMPD_ENABLE_BP)
ompd_bp_task_end();		ompd_bp_task_end();
return ret;		return ret;
#else		#else
return __kmp_execute_tasks_oncore(		return __kmp_execute_tasks_oncore(
this_thr, gtid, this, final_spin,		this_thr, gtid, this, final_spin,
thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);		thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
#endif		#endif
}		}
kmp_uint8 *get_stolen() { return NULL; }
enum barrier_type get_bt() { return bt; }		enum barrier_type get_bt() { return bt; }
flag_type get_ptr_type() { return flag_oncore; }		flag_type get_ptr_type() { return flag_oncore; }
};		};

// Used to wake up threads, volatile void* flag is usually the th_sleep_loc		static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: unused function '__kmp_null_resume_wrapper' [clang-diagnostic-unused-function] not useful Lint: Pre-merge checks: clang-tidy: warning: unused function '__kmp_null_resume_wrapper' [clang-diagnostic-unused…
// associated with int gtid.		int gtid = __kmp_gtid_from_thread(thr);
static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {		void flag = CCAST(void , thr->th.th_sleep_loc);
		flag_type type = thr->th.th_sleep_loc_type;
if (!flag)		if (!flag)
return;		return;
		// Attempt to wake up a thread: examine its type and call appropriate template
switch (RCAST(kmp_flag_64<> , CCAST(void , flag))->get_type()) {		switch (type) {
case flag32:		case flag32:
__kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);		__kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
break;		break;
case flag64:		case flag64:
__kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);		__kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
		break;
		case atomic_flag64:
		__kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
break;		break;
case flag_oncore:		case flag_oncore:
__kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);		__kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
break;		break;
		#ifdef KMP_DEBUG
		case flag_unset:
		KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
		break;
		default:
		KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "
		"known flag type\n",
		type));
		#endif
}		}
}		}

/*!		/*!
@}		@}
*/		*/

#endif // KMP_WAIT_RELEASE_H		#endif // KMP_WAIT_RELEASE_H

openmp/runtime/src/kmp_wait_release.cpp

	Show All 27 Lines
	template <bool C, bool S>			template <bool C, bool S>
	void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag) {			void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag) {
	__kmp_mwait_template(th_gtid, flag);			__kmp_mwait_template(th_gtid, flag);
	}			}
	template <bool C, bool S>			template <bool C, bool S>
	void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag) {			void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag) {
	__kmp_mwait_template(th_gtid, flag);			__kmp_mwait_template(th_gtid, flag);
	}			}
				template <bool C, bool S>
				void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
				__kmp_mwait_template(th_gtid, flag);
				}
	void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {			void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag) {
	__kmp_mwait_template(th_gtid, flag);			__kmp_mwait_template(th_gtid, flag);
	}			}

	template void __kmp_mwait_32<false, false>(int, kmp_flag_32<false, false> *);			template void __kmp_mwait_32<false, false>(int, kmp_flag_32<false, false> *);
	template void __kmp_mwait_64<false, true>(int, kmp_flag_64<false, true> *);			template void __kmp_mwait_64<false, true>(int, kmp_flag_64<false, true> *);
	template void __kmp_mwait_64<true, false>(int, kmp_flag_64<true, false> *);			template void __kmp_mwait_64<true, false>(int, kmp_flag_64<true, false> *);
				template void
				__kmp_atomic_mwait_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
				template void
				__kmp_atomic_mwait_64<true, false>(int, kmp_atomic_flag_64<true, false> *);
	#endif			#endif

openmp/runtime/src/z_Linux_util.cpp

Show First 20 Lines • Show All 1,401 Lines • ▼ Show 20 Lines	static inline void __kmp_suspend_template(int th_gtid, C *flag) {
__kmp_lock_suspend_mx(th);		__kmp_lock_suspend_mx(th);

KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",		KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
th_gtid, flag->get()));		th_gtid, flag->get()));

/* TODO: shouldn't this use release semantics to ensure that		/* TODO: shouldn't this use release semantics to ensure that
__kmp_suspend_initialize_thread gets called first? */		__kmp_suspend_initialize_thread gets called first? */
old_spin = flag->set_sleeping();		old_spin = flag->set_sleeping();
		TCW_PTR(th->th.th_sleep_loc, (void *)flag);
		th->th.th_sleep_loc_type = flag->get_type();
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&		if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
__kmp_pause_status != kmp_soft_paused) {		__kmp_pause_status != kmp_soft_paused) {
flag->unset_sleeping();		flag->unset_sleeping();
		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;
__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);
return;		return;
}		}
KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"		KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
" was %x\n",		" was %x\n",
th_gtid, flag->get(), flag->load(), old_spin));		th_gtid, flag->get(), flag->load(), old_spin));

if (flag->done_check_val(old_spin)) {		if (flag->done_check_val(old_spin) \|\| flag->done_check()) {
old_spin = flag->unset_sleeping();		flag->unset_sleeping();
		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;
KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "		KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
"for spin(%p)\n",		"for spin(%p)\n",
th_gtid, flag->get()));		th_gtid, flag->get()));
} else {		} else {
/* Encapsulate in a loop as the documentation states that this may		/* Encapsulate in a loop as the documentation states that this may
"with low probability" return when the condition variable has		"with low probability" return when the condition variable has
not been signaled or broadcast */		not been signaled or broadcast */
int deactivated = FALSE;		int deactivated = FALSE;
TCW_PTR(th->th.th_sleep_loc, (void *)flag);

while (flag->is_sleeping()) {		while (flag->is_sleeping()) {
#ifdef DEBUG_SUSPEND		#ifdef DEBUG_SUSPEND
char buffer[128];		char buffer[128];
__kmp_suspend_count++;		__kmp_suspend_count++;
__kmp_print_cond(buffer, &th->th.th_suspend_cv);		__kmp_print_cond(buffer, &th->th.th_suspend_cv);
__kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid,		__kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid,
buffer);		buffer);
#endif		#endif
// Mark the thread as no longer active (only in the first iteration of the		// Mark the thread as no longer active (only in the first iteration of the
// loop).		// loop).
if (!deactivated) {		if (!deactivated) {
th->th.th_active = FALSE;		th->th.th_active = FALSE;
if (th->th.th_active_in_pool) {		if (th->th.th_active_in_pool) {
th->th.th_active_in_pool = FALSE;		th->th.th_active_in_pool = FALSE;
KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);		KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);		KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
}		}
deactivated = TRUE;		deactivated = TRUE;
}		}

		KMP_DEBUG_ASSERT(th->th.th_sleep_loc);
		KMP_DEBUG_ASSERT(flag->get_type() == th->th.th_sleep_loc_type);

#if USE_SUSPEND_TIMEOUT		#if USE_SUSPEND_TIMEOUT
struct timespec now;		struct timespec now;
struct timeval tval;		struct timeval tval;
int msecs;		int msecs;

status = gettimeofday(&tval, NULL);		status = gettimeofday(&tval, NULL);
KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);		KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
TIMEVAL_TO_TIMESPEC(&tval, &now);		TIMEVAL_TO_TIMESPEC(&tval, &now);
Show All 13 Lines	#else
th_gtid));		th_gtid));
status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,		status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
&th->th.th_suspend_mx.m_mutex);		&th->th.th_suspend_mx.m_mutex);
#endif // USE_SUSPEND_TIMEOUT		#endif // USE_SUSPEND_TIMEOUT

if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {		if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
KMP_SYSFAIL("pthread_cond_wait", status);		KMP_SYSFAIL("pthread_cond_wait", status);
}		}

		KMP_DEBUG_ASSERT(flag->get_type() == flag->get_ptr_type());

		if (!flag->is_sleeping() &&
		((status == EINTR) \|\| (status == ETIMEDOUT))) {
		// if interrupt or timeout, and thread is no longer sleeping, we need to
		// make sure sleep_loc gets reset; however, this shouldn't be needed if
		// we woke up with resume
		flag->unset_sleeping();
		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;
		}
#ifdef KMP_DEBUG		#ifdef KMP_DEBUG
if (status == ETIMEDOUT) {		if (status == ETIMEDOUT) {
if (flag->is_sleeping()) {		if (flag->is_sleeping()) {
KF_TRACE(100,		KF_TRACE(100,
("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid));		("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid));
} else {		} else {
KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "		KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
"not set!\n",		"not set!\n",
th_gtid));		th_gtid));
		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;
}		}
} else if (flag->is_sleeping()) {		} else if (flag->is_sleeping()) {
KF_TRACE(100,		KF_TRACE(100,
("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));		("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));
}		}
#endif		#endif
} // while		} // while

// Mark the thread as active again (if it was previous marked as inactive)		// Mark the thread as active again (if it was previous marked as inactive)
if (deactivated) {		if (deactivated) {
th->th.th_active = TRUE;		th->th.th_active = TRUE;
if (TCR_4(th->th.th_in_pool)) {		if (TCR_4(th->th.th_in_pool)) {
KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);		KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
th->th.th_active_in_pool = TRUE;		th->th.th_active_in_pool = TRUE;
}		}
}		}
}		}
		// We may have had the loop variable set before entering the loop body;
		// so we need to reset sleep_loc.
		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;

		KMP_DEBUG_ASSERT(!flag->is_sleeping());
		KMP_DEBUG_ASSERT(!th->th.th_sleep_loc);
#ifdef DEBUG_SUSPEND		#ifdef DEBUG_SUSPEND
{		{
char buffer[128];		char buffer[128];
__kmp_print_cond(buffer, &th->th.th_suspend_cv);		__kmp_print_cond(buffer, &th->th.th_suspend_cv);
__kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid,		__kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid,
buffer);		buffer);
}		}
#endif		#endif

__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);
KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));		KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
}		}

template <bool C, bool S>		template <bool C, bool S>
void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {		void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag);		__kmp_suspend_template(th_gtid, flag);
}		}
template <bool C, bool S>		template <bool C, bool S>
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {		void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag);		__kmp_suspend_template(th_gtid, flag);
}		}
		template <bool C, bool S>
		void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
		__kmp_suspend_template(th_gtid, flag);
		}
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {		void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_suspend_template(th_gtid, flag);		__kmp_suspend_template(th_gtid, flag);
}		}

template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);		template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);		template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);		template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
		template void
		__kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
		template void
		__kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);

/* This routine signals the thread specified by target_gtid to wake up		/* This routine signals the thread specified by target_gtid to wake up
after setting the sleep bit indicated by the flag argument to FALSE.		after setting the sleep bit indicated by the flag argument to FALSE.
The target thread must already have called __kmp_suspend_template() */		The target thread must already have called __kmp_suspend_template() */
template <class C>		template <class C>
static inline void __kmp_resume_template(int target_gtid, C *flag) {		static inline void __kmp_resume_template(int target_gtid, C *flag) {
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);		KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
kmp_info_t *th = __kmp_threads[target_gtid];		kmp_info_t *th = __kmp_threads[target_gtid];
int status;		int status;

#ifdef KMP_DEBUG		#ifdef KMP_DEBUG
int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;		int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
#endif		#endif

KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",		KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",
gtid, target_gtid));		gtid, target_gtid));
KMP_DEBUG_ASSERT(gtid != target_gtid);		KMP_DEBUG_ASSERT(gtid != target_gtid);

__kmp_suspend_initialize_thread(th);		__kmp_suspend_initialize_thread(th);

__kmp_lock_suspend_mx(th);		__kmp_lock_suspend_mx(th);

if (!flag) { // coming from __kmp_null_resume_wrapper		if (!flag \|\| flag != th->th.th_sleep_loc) {
		// coming from __kmp_null_resume_wrapper, or thread is now sleeping on a
		// different location; wake up at new location
flag = (C )CCAST(void , th->th.th_sleep_loc);		flag = (C )CCAST(void , th->th.th_sleep_loc);
}		}

// First, check if the flag is null or its type has changed. If so, someone		// First, check if the flag is null or its type has changed. If so, someone
// else woke it up.		// else woke it up.
if (!flag \|\| flag->get_type() != flag->get_ptr_type()) { // get_ptr_type		if (!flag) { // Thread doesn't appear to be sleeping on anything
// simply shows what flag was cast to
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "		KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag(%p)\n",		"awake: flag(%p)\n",
gtid, target_gtid, NULL));		gtid, target_gtid, (void *)NULL));
__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);
return;		return;
		} else if (flag->get_type() != th->th.th_sleep_loc_type) {
		Lint: Pre-merge checks Inline Actions clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] not useful Lint: Pre-merge checks: clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] [[https://github.
		// Flag type does not appear to match this function template; possibly the
		// thread is sleeping on something else. Try null resume again.
		KF_TRACE(
		5,
		("__kmp_resume_template: T#%d retrying, thread T#%d Mismatch flag(%p), "
		"spin(%p) type=%d ptr_type=%d\n",
		gtid, target_gtid, flag, flag->get(), flag->get_type(),
		th->th.th_sleep_loc_type));
		__kmp_unlock_suspend_mx(th);
		__kmp_null_resume_wrapper(th);
		return;
} else { // if multiple threads are sleeping, flag should be internally		} else { // if multiple threads are sleeping, flag should be internally
// referring to a specific thread here		// referring to a specific thread here
typename C::flag_t old_spin = flag->unset_sleeping();		if (!flag->is_sleeping()) {
if (!flag->is_sleeping_val(old_spin)) {
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "		KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag(%p): "		"awake: flag(%p): %u\n",
"%u => %u\n",		gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
gtid, target_gtid, flag->get(), old_spin, flag->load()));
__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);
return;		return;
}		}
KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
"sleep bit for flag's loc(%p): "
"%u => %u\n",
gtid, target_gtid, flag->get(), old_spin, flag->load()));
}		}
		KMP_DEBUG_ASSERT(flag);
		flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;

		KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
		"sleep bit for flag's loc(%p): %u\n",
		gtid, target_gtid, flag->get(), (unsigned int)flag->load()));

#ifdef DEBUG_SUSPEND		#ifdef DEBUG_SUSPEND
{		{
char buffer[128];		char buffer[128];
__kmp_print_cond(buffer, &th->th.th_suspend_cv);		__kmp_print_cond(buffer, &th->th.th_suspend_cv);
__kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid,		__kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid,
target_gtid, buffer);		target_gtid, buffer);
}		}
Show All 9 Lines
template <bool C, bool S>		template <bool C, bool S>
void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {		void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {
__kmp_resume_template(target_gtid, flag);		__kmp_resume_template(target_gtid, flag);
}		}
template <bool C, bool S>		template <bool C, bool S>
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {		void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
__kmp_resume_template(target_gtid, flag);		__kmp_resume_template(target_gtid, flag);
}		}
		template <bool C, bool S>
		void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {
		__kmp_resume_template(target_gtid, flag);
		}
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {		void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
__kmp_resume_template(target_gtid, flag);		__kmp_resume_template(target_gtid, flag);
}		}

template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);		template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
		template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);		template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
		template void
		__kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);

#if KMP_USE_MONITOR		#if KMP_USE_MONITOR
void __kmp_resume_monitor() {		void __kmp_resume_monitor() {
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);		KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
int status;		int status;
#ifdef KMP_DEBUG		#ifdef KMP_DEBUG
int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;		int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid,		KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid,
▲ Show 20 Lines • Show All 1,043 Lines • Show Last 20 Lines

openmp/runtime/src/z_Windows_NT_util.cpp

Show All 15 Lines
#include "kmp_io.h"		#include "kmp_io.h"
#include "kmp_itt.h"		#include "kmp_itt.h"
#include "kmp_wait_release.h"		#include "kmp_wait_release.h"

/* This code is related to NtQuerySystemInformation() function. This function		/* This code is related to NtQuerySystemInformation() function. This function
is used in the Load balance algorithm for OMP_DYNAMIC=true to find the		is used in the Load balance algorithm for OMP_DYNAMIC=true to find the
number of running threads in the system. */		number of running threads in the system. */

#include <ntsecapi.h> // UNICODE_STRING		#include <ntsecapi.h> // UNICODE_STRING
		Lint: Pre-merge checks Inline Actions clang-tidy: error: 'ntsecapi.h' file not found [clang-diagnostic-error] not useful Lint: Pre-merge checks: clang-tidy: error: 'ntsecapi.h' file not found [clang-diagnostic-error] [[https://github.
#include <ntstatus.h>		#include <ntstatus.h>
#include <psapi.h>		#include <psapi.h>
#ifdef _MSC_VER		#ifdef _MSC_VER
#pragma comment(lib, "psapi.lib")		#pragma comment(lib, "psapi.lib")
#endif		#endif

enum SYSTEM_INFORMATION_CLASS {		enum SYSTEM_INFORMATION_CLASS {
SystemProcessInformation = 5		SystemProcessInformation = 5
▲ Show 20 Lines • Show All 202 Lines • ▼ Show 20 Lines	if (res == WAIT_OBJECT_0) {
// as MS sent us debug dump whith inconsistent state of data		// as MS sent us debug dump whith inconsistent state of data
__kmp_win32_mutex_lock(mx);		__kmp_win32_mutex_lock(mx);
typename C::flag_t old_f = flag->set_sleeping();		typename C::flag_t old_f = flag->set_sleeping();
if (!flag->done_check_val(old_f & ~KMP_BARRIER_SLEEP_STATE)) {		if (!flag->done_check_val(old_f & ~KMP_BARRIER_SLEEP_STATE)) {
__kmp_win32_mutex_unlock(mx);		__kmp_win32_mutex_unlock(mx);
continue;		continue;
}		}
// condition fulfilled, exiting		// condition fulfilled, exiting
old_f = flag->unset_sleeping();		flag->unset_sleeping();
KMP_DEBUG_ASSERT(old_f & KMP_BARRIER_SLEEP_STATE);
TCW_PTR(th->th.th_sleep_loc, NULL);		TCW_PTR(th->th.th_sleep_loc, NULL);
KF_TRACE(50,		th->th.th_sleep_loc_type = flag_unset;
("__kmp_win32_cond_wait: exiting, condition "		KF_TRACE(50, ("__kmp_win32_cond_wait: exiting, condition "
"fulfilled: flag's loc(%p): %u => %u\n",		"fulfilled: flag's loc(%p): %u\n",
flag->get(), (unsigned int)old_f, (unsigned int)flag->load()));		flag->get(), (unsigned int)flag->load()));

__kmp_win32_mutex_lock(&cv->waiters_count_lock_);		__kmp_win32_mutex_lock(&cv->waiters_count_lock_);
KMP_DEBUG_ASSERT(cv->waiters_count_ > 0);		KMP_DEBUG_ASSERT(cv->waiters_count_ > 0);
cv->release_count_ = cv->waiters_count_;		cv->release_count_ = cv->waiters_count_;
cv->wait_generation_count_++;		cv->wait_generation_count_++;
wait_done = 1;		wait_done = 1;
__kmp_win32_mutex_unlock(&cv->waiters_count_lock_);		__kmp_win32_mutex_unlock(&cv->waiters_count_lock_);

▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines	static inline void __kmp_suspend_template(int th_gtid, C *flag) {

KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's"		KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's"
" loc(%p)\n",		" loc(%p)\n",
th_gtid, flag->get()));		th_gtid, flag->get()));

/* TODO: shouldn't this use release semantics to ensure that		/* TODO: shouldn't this use release semantics to ensure that
__kmp_suspend_initialize_thread gets called first? */		__kmp_suspend_initialize_thread gets called first? */
old_spin = flag->set_sleeping();		old_spin = flag->set_sleeping();
		TCW_PTR(th->th.th_sleep_loc, (void *)flag);
		th->th.th_sleep_loc_type = flag->get_type();
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&		if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
__kmp_pause_status != kmp_soft_paused) {		__kmp_pause_status != kmp_soft_paused) {
flag->unset_sleeping();		flag->unset_sleeping();
		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;
__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);
return;		return;
}		}

KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for flag's"		KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for flag's"
" loc(%p)==%u\n",		" loc(%p)==%u\n",
th_gtid, flag->get(), (unsigned int)flag->load()));		th_gtid, flag->get(), (unsigned int)flag->load()));

if (flag->done_check_val(old_spin)) {		if (flag->done_check_val(old_spin) \|\| flag->done_check()) {
old_spin = flag->unset_sleeping();		flag->unset_sleeping();
		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;
KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "		KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
"for flag's loc(%p)\n",		"for flag's loc(%p)\n",
th_gtid, flag->get()));		th_gtid, flag->get()));
} else {		} else {
#ifdef DEBUG_SUSPEND		#ifdef DEBUG_SUSPEND
__kmp_suspend_count++;		__kmp_suspend_count++;
#endif		#endif
/* Encapsulate in a loop as the documentation states that this may "with		/* Encapsulate in a loop as the documentation states that this may "with
low probability" return when the condition variable has not been signaled		low probability" return when the condition variable has not been signaled
or broadcast */		or broadcast */
int deactivated = FALSE;		int deactivated = FALSE;
TCW_PTR(th->th.th_sleep_loc, (void *)flag);
while (flag->is_sleeping()) {		while (flag->is_sleeping()) {
KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "		KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
"kmp_win32_cond_wait()\n",		"kmp_win32_cond_wait()\n",
th_gtid));		th_gtid));
// Mark the thread as no longer active (only in the first iteration of the		// Mark the thread as no longer active (only in the first iteration of the
// loop).		// loop).
if (!deactivated) {		if (!deactivated) {
th->th.th_active = FALSE;		th->th.th_active = FALSE;
if (th->th.th_active_in_pool) {		if (th->th.th_active_in_pool) {
th->th.th_active_in_pool = FALSE;		th->th.th_active_in_pool = FALSE;
KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);		KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);		KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
}		}
deactivated = TRUE;		deactivated = TRUE;
		}

		KMP_DEBUG_ASSERT(th->th.th_sleep_loc);
		KMP_DEBUG_ASSERT(th->th.th_sleep_loc_type == flag->get_type());

__kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,		__kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
flag);		flag);
} else {
__kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, th,
flag);
}

#ifdef KMP_DEBUG		#ifdef KMP_DEBUG
if (flag->is_sleeping()) {		if (flag->is_sleeping()) {
KF_TRACE(100,		KF_TRACE(100,
("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));		("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));
}		}
#endif /* KMP_DEBUG */		#endif /* KMP_DEBUG */

} // while		} // while

		// We may have had the loop variable set before entering the loop body;
		// so we need to reset sleep_loc.
		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;

		KMP_DEBUG_ASSERT(!flag->is_sleeping());
		KMP_DEBUG_ASSERT(!th->th.th_sleep_loc);

// Mark the thread as active again (if it was previous marked as inactive)		// Mark the thread as active again (if it was previous marked as inactive)
if (deactivated) {		if (deactivated) {
th->th.th_active = TRUE;		th->th.th_active = TRUE;
if (TCR_4(th->th.th_in_pool)) {		if (TCR_4(th->th.th_in_pool)) {
KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);		KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
th->th.th_active_in_pool = TRUE;		th->th.th_active_in_pool = TRUE;
}		}
}		}
}		}

__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);
KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));		KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
}		}

template <bool C, bool S>		template <bool C, bool S>
void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {		void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag);		__kmp_suspend_template(th_gtid, flag);
}		}
template <bool C, bool S>		template <bool C, bool S>
void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {		void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag) {
__kmp_suspend_template(th_gtid, flag);		__kmp_suspend_template(th_gtid, flag);
}		}
		template <bool C, bool S>
		void __kmp_atomic_suspend_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag) {
		__kmp_suspend_template(th_gtid, flag);
		}
void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {		void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
__kmp_suspend_template(th_gtid, flag);		__kmp_suspend_template(th_gtid, flag);
}		}

template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);		template void __kmp_suspend_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);		template void __kmp_suspend_64<false, true>(int, kmp_flag_64<false, true> *);
template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);		template void __kmp_suspend_64<true, false>(int, kmp_flag_64<true, false> *);
		template void
		__kmp_atomic_suspend_64<false, true>(int, kmp_atomic_flag_64<false, true> *);
		template void
		__kmp_atomic_suspend_64<true, false>(int, kmp_atomic_flag_64<true, false> *);

/* This routine signals the thread specified by target_gtid to wake up		/* This routine signals the thread specified by target_gtid to wake up
after setting the sleep bit indicated by the flag argument to FALSE */		after setting the sleep bit indicated by the flag argument to FALSE */
template <class C>		template <class C>
static inline void __kmp_resume_template(int target_gtid, C *flag) {		static inline void __kmp_resume_template(int target_gtid, C *flag) {
kmp_info_t *th = __kmp_threads[target_gtid];		kmp_info_t *th = __kmp_threads[target_gtid];

#ifdef KMP_DEBUG		#ifdef KMP_DEBUG
int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;		int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
#endif		#endif

KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",		KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",
gtid, target_gtid));		gtid, target_gtid));

__kmp_suspend_initialize_thread(th);		__kmp_suspend_initialize_thread(th);
__kmp_lock_suspend_mx(th);		__kmp_lock_suspend_mx(th);

if (!flag) { // coming from __kmp_null_resume_wrapper		if (!flag \|\| flag != th->th.th_sleep_loc) {
		// coming from __kmp_null_resume_wrapper, or thread is now sleeping on a
		// different location; wake up at new location
flag = (C *)th->th.th_sleep_loc;		flag = (C *)th->th.th_sleep_loc;
}		}

// First, check if the flag is null or its type has changed. If so, someone		// First, check if the flag is null or its type has changed. If so, someone
// else woke it up.		// else woke it up.
if (!flag \|\| flag->get_type() != flag->get_ptr_type()) { // get_ptr_type		if (!flag \|\| flag->get_type() != th->th.th_sleep_loc_type) {
// simply shows what		// simply shows what flag was cast to
// flag was cast to
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "		KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag's loc(%p)\n",		"awake: flag's loc(%p)\n",
gtid, target_gtid, NULL));		gtid, target_gtid, NULL));
__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);
return;		return;
} else {		} else {
typename C::flag_t old_spin = flag->unset_sleeping();		if (!flag->is_sleeping()) {
if (!flag->is_sleeping_val(old_spin)) {
KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "		KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
"awake: flag's loc(%p): %u => %u\n",		"awake: flag's loc(%p): %u\n",
gtid, target_gtid, flag->get(), (unsigned int)old_spin,		gtid, target_gtid, flag->get(), (unsigned int)flag->load()));
(unsigned int)flag->load()));
__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);
return;		return;
}		}
}		}
		KMP_DEBUG_ASSERT(flag);
		flag->unset_sleeping();
TCW_PTR(th->th.th_sleep_loc, NULL);		TCW_PTR(th->th.th_sleep_loc, NULL);
		th->th.th_sleep_loc_type = flag_unset;

KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep "		KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep "
"bit for flag's loc(%p)\n",		"bit for flag's loc(%p)\n",
gtid, target_gtid, flag->get()));		gtid, target_gtid, flag->get()));

__kmp_win32_cond_signal(&th->th.th_suspend_cv);		__kmp_win32_cond_signal(&th->th.th_suspend_cv);
__kmp_unlock_suspend_mx(th);		__kmp_unlock_suspend_mx(th);

KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"		KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
" for T#%d\n",		" for T#%d\n",
gtid, target_gtid));		gtid, target_gtid));
}		}

template <bool C, bool S>		template <bool C, bool S>
void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {		void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag) {
__kmp_resume_template(target_gtid, flag);		__kmp_resume_template(target_gtid, flag);
}		}
template <bool C, bool S>		template <bool C, bool S>
void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {		void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag) {
__kmp_resume_template(target_gtid, flag);		__kmp_resume_template(target_gtid, flag);
}		}
		template <bool C, bool S>
		void __kmp_atomic_resume_64(int target_gtid, kmp_atomic_flag_64<C, S> *flag) {
		__kmp_resume_template(target_gtid, flag);
		}
void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {		void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
__kmp_resume_template(target_gtid, flag);		__kmp_resume_template(target_gtid, flag);
}		}

template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);		template void __kmp_resume_32<false, true>(int, kmp_flag_32<false, true> *);
		template void __kmp_resume_32<false, false>(int, kmp_flag_32<false, false> *);
template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);		template void __kmp_resume_64<false, true>(int, kmp_flag_64<false, true> *);
		template void
		__kmp_atomic_resume_64<false, true>(int, kmp_atomic_flag_64<false, true> *);

void __kmp_yield() { Sleep(0); }		void __kmp_yield() { Sleep(0); }

void __kmp_gtid_set_specific(int gtid) {		void __kmp_gtid_set_specific(int gtid) {
if (__kmp_init_gtid) {		if (__kmp_init_gtid) {
KA_TRACE(50, ("__kmp_gtid_set_specific: T#%d key:%d\n", gtid,		KA_TRACE(50, ("__kmp_gtid_set_specific: T#%d key:%d\n", gtid,
__kmp_gtid_threadprivate_key));		__kmp_gtid_threadprivate_key));
if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(gtid + 1)))		if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(gtid + 1)))
▲ Show 20 Lines • Show All 1,158 Lines • Show Last 20 Lines

openmp/runtime/test/barrier/omp_barrier.c

	// RUN: %libomp-compile-and-run			// RUN: %libomp-compile-and-run
	// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite %libomp-run			// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite %libomp-run
	// RUN: %libomp-compile && env KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run			// RUN: %libomp-compile && env KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run
	// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run			// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite KMP_PLAIN_BARRIER_PATTERN='hierarchical,hierarchical' KMP_FORKJOIN_BARRIER_PATTERN='hierarchical,hierarchical' %libomp-run
				// RUN: %libomp-compile && env KMP_PLAIN_BARRIER_PATTERN='dist,dist' KMP_FORKJOIN_BARRIER_PATTERN='dist,dist' KMP_REDUCTION_BARRIER_PATTERN='dist,dist' %libomp-run
				// RUN: %libomp-compile && env KMP_BLOCKTIME=infinite KMP_PLAIN_BARRIER_PATTERN='dist,dist' KMP_FORKJOIN_BARRIER_PATTERN='dist,dist' KMP_REDUCTION_BARRIER_PATTERN='dist,dist' %libomp-run
	#include <stdio.h>			#include <stdio.h>
	#include "omp_testsuite.h"			#include "omp_testsuite.h"
	#include "omp_my_sleep.h"			#include "omp_my_sleep.h"

	int test_omp_barrier()			int test_omp_barrier()
	{			{
	int result1;			int result1;
	int result2;			int result2;
	Show All 35 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[OpenMP] libomp: Add new experimental barrier: two-level distributed barrier
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 358312

openmp/runtime/cmake/config-ix.cmake

openmp/runtime/src/i18n/en_US.txt

openmp/runtime/src/kmp.h

openmp/runtime/src/kmp_atomic.cpp

openmp/runtime/src/kmp_barrier.h

openmp/runtime/src/kmp_barrier.cpp

openmp/runtime/src/kmp_config.h.cmake

openmp/runtime/src/kmp_global.cpp

openmp/runtime/src/kmp_os.h

openmp/runtime/src/kmp_runtime.cpp

openmp/runtime/src/kmp_settings.cpp

openmp/runtime/src/kmp_stats.h

openmp/runtime/src/kmp_str.h

openmp/runtime/src/kmp_str.cpp

openmp/runtime/src/kmp_tasking.cpp

openmp/runtime/src/kmp_wait_release.h

openmp/runtime/src/kmp_wait_release.cpp

openmp/runtime/src/z_Linux_util.cpp

openmp/runtime/src/z_Windows_NT_util.cpp

openmp/runtime/test/barrier/omp_barrier.c

This is an archive of the discontinued LLVM Phabricator instance.

[OpenMP] libomp: Add new experimental barrier: two-level distributed barrierClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 358312

openmp/runtime/cmake/config-ix.cmake

openmp/runtime/src/i18n/en_US.txt

openmp/runtime/src/kmp.h

openmp/runtime/src/kmp_atomic.cpp

openmp/runtime/src/kmp_barrier.h

openmp/runtime/src/kmp_barrier.cpp

openmp/runtime/src/kmp_config.h.cmake

openmp/runtime/src/kmp_global.cpp

openmp/runtime/src/kmp_os.h

openmp/runtime/src/kmp_runtime.cpp

openmp/runtime/src/kmp_settings.cpp

openmp/runtime/src/kmp_stats.h

openmp/runtime/src/kmp_str.h

openmp/runtime/src/kmp_str.cpp

openmp/runtime/src/kmp_tasking.cpp

openmp/runtime/src/kmp_wait_release.h

openmp/runtime/src/kmp_wait_release.cpp

openmp/runtime/src/z_Linux_util.cpp

openmp/runtime/src/z_Windows_NT_util.cpp

openmp/runtime/test/barrier/omp_barrier.c

[OpenMP] libomp: Add new experimental barrier: two-level distributed barrier
ClosedPublic