Changeset View
Changeset View
Standalone View
Standalone View
openmp/trunk/runtime/src/kmp.h
Show First 20 Lines • Show All 1,700 Lines • ▼ Show 20 Lines | union shared_info { | ||||
dispatch_shared_info64_t s64; | dispatch_shared_info64_t s64; | ||||
} u; | } u; | ||||
volatile kmp_uint32 buffer_index; | volatile kmp_uint32 buffer_index; | ||||
#if OMP_45_ENABLED | #if OMP_45_ENABLED | ||||
volatile kmp_int32 doacross_buf_idx; // teamwise index | volatile kmp_int32 doacross_buf_idx; // teamwise index | ||||
volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1) | volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1) | ||||
kmp_int32 doacross_num_done; // count finished threads | kmp_int32 doacross_num_done; // count finished threads | ||||
#endif | #endif | ||||
#if KMP_USE_HWLOC | |||||
// When linking with libhwloc, the ORDERED EPCC test slows down on big | |||||
// machines (> 48 cores). Performance analysis showed that a cache thrash | |||||
// was occurring and this padding helps alleviate the problem. | |||||
char padding[64]; | |||||
#endif | |||||
} dispatch_shared_info_t; | } dispatch_shared_info_t; | ||||
typedef struct kmp_disp { | typedef struct kmp_disp { | ||||
/* Vector for ORDERED SECTION */ | /* Vector for ORDERED SECTION */ | ||||
void (*th_deo_fcn)( int * gtid, int * cid, ident_t *); | void (*th_deo_fcn)( int * gtid, int * cid, ident_t *); | ||||
/* Vector for END ORDERED SECTION */ | /* Vector for END ORDERED SECTION */ | ||||
void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *); | void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *); | ||||
▲ Show 20 Lines • Show All 845 Lines • ▼ Show 20 Lines | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ | ||||
kmp_r_sched_t t_sched; // run-time schedule for the team | kmp_r_sched_t t_sched; // run-time schedule for the team | ||||
#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED | #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED | ||||
int t_first_place; // first & last place in parent thread's partition. | int t_first_place; // first & last place in parent thread's partition. | ||||
int t_last_place; // Restore these values to master after par region. | int t_last_place; // Restore these values to master after par region. | ||||
#endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED | #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED | ||||
int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call | int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call | ||||
// Read/write by workers as well ----------------------------------------------------------------------- | // Read/write by workers as well ----------------------------------------------------------------------- | ||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64 | #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_USE_HWLOC | ||||
// Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel' | // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel' | ||||
// and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel' | // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel' | ||||
// and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding. | // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding. | ||||
char dummy_padding[1024]; | char dummy_padding[1024]; | ||||
#endif | #endif | ||||
KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams. | KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams. | ||||
// for SERIALIZED teams nested 2 or more levels deep | // for SERIALIZED teams nested 2 or more levels deep | ||||
#if OMP_40_ENABLED | #if OMP_40_ENABLED | ||||
▲ Show 20 Lines • Show All 1,077 Lines • Show Last 20 Lines |