Index: openmp/trunk/runtime/cmake/LibompHandleFlags.cmake =================================================================== --- openmp/trunk/runtime/cmake/LibompHandleFlags.cmake +++ openmp/trunk/runtime/cmake/LibompHandleFlags.cmake @@ -28,6 +28,7 @@ set(flags_local) libomp_append(flags_local -std=c++11 LIBOMP_HAVE_STD_CPP11_FLAG) libomp_append(flags_local -fno-exceptions LIBOMP_HAVE_FNO_EXCEPTIONS_FLAG) + libomp_append(flags_local -fno-rtti LIBOMP_HAVE_FNO_RTTI_FLAG) if(${LIBOMP_ENABLE_WERROR}) libomp_append(flags_local -Werror LIBOMP_HAVE_WERROR_FLAG) endif() Index: openmp/trunk/runtime/cmake/config-ix.cmake =================================================================== --- openmp/trunk/runtime/cmake/config-ix.cmake +++ openmp/trunk/runtime/cmake/config-ix.cmake @@ -49,6 +49,7 @@ # Checking C, CXX, Linker Flags check_cxx_compiler_flag(-std=c++11 LIBOMP_HAVE_STD_CPP11_FLAG) check_cxx_compiler_flag(-fno-exceptions LIBOMP_HAVE_FNO_EXCEPTIONS_FLAG) +check_cxx_compiler_flag(-fno-rtti LIBOMP_HAVE_FNO_RTTI_FLAG) check_c_compiler_flag("-x c++" LIBOMP_HAVE_X_CPP_FLAG) check_c_compiler_flag(-Werror LIBOMP_HAVE_WERROR_FLAG) check_c_compiler_flag(-Wunused-function LIBOMP_HAVE_WNO_UNUSED_FUNCTION_FLAG) Index: openmp/trunk/runtime/src/kmp.h =================================================================== --- openmp/trunk/runtime/src/kmp.h +++ openmp/trunk/runtime/src/kmp.h @@ -528,8 +528,8 @@ */ #if KMP_AFFINITY_SUPPORTED -# if KMP_GROUP_AFFINITY // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). +# if KMP_OS_WINDOWS # if _MSC_VER < 1600 typedef struct GROUP_AFFINITY { KAFFINITY Mask; @@ -537,7 +537,11 @@ WORD Reserved[3]; } GROUP_AFFINITY; # endif /* _MSC_VER < 1600 */ +# if KMP_GROUP_AFFINITY extern int __kmp_num_proc_groups; +# else +static const int __kmp_num_proc_groups = 1; +# endif /* KMP_GROUP_AFFINITY */ typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; @@ -549,285 +553,107 @@ typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; -# endif /* KMP_GROUP_AFFINITY */ +# endif /* KMP_OS_WINDOWS */ + +# if KMP_USE_HWLOC +extern hwloc_topology_t __kmp_hwloc_topology; +extern int __kmp_hwloc_error; +# endif extern size_t __kmp_affin_mask_size; # define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) # define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) # define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) -# if !KMP_USE_HWLOC -# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT) -# define KMP_CPU_SET_ITERATE(i,mask) \ - for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i) -# endif - -#if KMP_USE_HWLOC - -extern hwloc_topology_t __kmp_hwloc_topology; -extern int __kmp_hwloc_error; -typedef hwloc_cpuset_t kmp_affin_mask_t; -# define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i) -# define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i) -# define KMP_CPU_CLR(i,mask) hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i) -# define KMP_CPU_ZERO(mask) hwloc_bitmap_zero((hwloc_cpuset_t)mask) -# define KMP_CPU_COPY(dest, src) hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src) -# define KMP_CPU_AND(dest, src) hwloc_bitmap_and((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src) -# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ - { \ - unsigned i; \ - for(i=0;i<(unsigned)max_bit_number+1;i++) { \ - if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \ - hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \ - } else { \ - hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \ - } \ - } \ - hwloc_bitmap_and((hwloc_cpuset_t)mask, (hwloc_cpuset_t)mask, \ - (hwloc_cpuset_t)__kmp_affin_fullMask); \ - } \ - -# define KMP_CPU_UNION(dest, src) hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src) # define KMP_CPU_SET_ITERATE(i,mask) \ - for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i)) - -# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc() -# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr); + for (i = (mask)->begin(); i != (mask)->end() ; i = (mask)->next(i)) +# define KMP_CPU_SET(i,mask) (mask)->set(i) +# define KMP_CPU_ISSET(i,mask) (mask)->is_set(i) +# define KMP_CPU_CLR(i,mask) (mask)->clear(i) +# define KMP_CPU_ZERO(mask) (mask)->zero() +# define KMP_CPU_COPY(dest, src) (dest)->copy(src) +# define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src) +# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not() +# define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src) +# define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask()) +# define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr) # define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) # define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) # define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) # define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) - -// -// The following macro should be used to index an array of masks. -// The array should be declared as "kmp_affinity_t *" and allocated with -// size "__kmp_affinity_mask_size * len". The macro takes care of the fact -// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but -// on Linux* OS, sizeof(kmp_affin_t) is 1. -// -# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i])) -# define KMP_CPU_ALLOC_ARRAY(arr, n) { \ - arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \ - unsigned i; \ - for(i=0;i<(unsigned)n;i++) { \ - arr[i] = hwloc_bitmap_alloc(); \ - } \ - } -# define KMP_CPU_FREE_ARRAY(arr, n) { \ - unsigned i; \ - for(i=0;i<(unsigned)n;i++) { \ - hwloc_bitmap_free(arr[i]); \ - } \ - __kmp_free(arr); \ - } -# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) { \ - arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \ - unsigned i; \ - for(i=0;i<(unsigned)n;i++) { \ - arr[i] = hwloc_bitmap_alloc(); \ - } \ - } -# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \ - unsigned i; \ - for(i=0;i<(unsigned)n;i++) { \ - hwloc_bitmap_free(arr[i]); \ - } \ - KMP_INTERNAL_FREE(arr); \ - } - -#else /* KMP_USE_HWLOC */ -# if KMP_OS_LINUX -// -// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size -// (in bytes). It should be allocated on a word boundary. -// -// WARNING!!! We have made the base type of the affinity mask unsigned char, -// in order to eliminate a lot of checks that the true system mask size is -// really a multiple of 4 bytes (on Linux* OS). -// -// THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!! -// - -typedef unsigned char kmp_affin_mask_t; - -# define _KMP_CPU_SET(i,mask) (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT))) -# define KMP_CPU_SET(i,mask) _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask))) -# define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))) -# define KMP_CPU_ISSET(i,mask) _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask))) -# define _KMP_CPU_CLR(i,mask) (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT))) -# define KMP_CPU_CLR(i,mask) _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask))) - -# define KMP_CPU_ZERO(mask) \ - { \ - size_t __i; \ - for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ - ((kmp_affin_mask_t *)(mask))[__i] = 0; \ - } \ - } - -# define KMP_CPU_COPY(dest, src) \ - { \ - size_t __i; \ - for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - = ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - -# define KMP_CPU_AND(dest, src) \ - { \ - size_t __i; \ - for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - &= ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - -# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ - { \ - size_t __i; \ - for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ - ((kmp_affin_mask_t *)(mask))[__i] \ - = ~((kmp_affin_mask_t *)(mask))[__i]; \ - } \ - KMP_CPU_AND(mask, __kmp_affin_fullMask); \ - } - -# define KMP_CPU_UNION(dest, src) \ - { \ - size_t __i; \ - for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - |= ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - -# endif /* KMP_OS_LINUX */ - -# if KMP_OS_WINDOWS -// -// On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on -// Intel(R) 64 it is 8 bytes times the number of processor groups. -// - -# if KMP_GROUP_AFFINITY -typedef DWORD_PTR kmp_affin_mask_t; - -# define _KMP_CPU_SET(i,mask) \ - (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \ - (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) - -# define KMP_CPU_SET(i,mask) \ - _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask))) - -# define _KMP_CPU_ISSET(i,mask) \ - (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] & \ - (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))) - -# define KMP_CPU_ISSET(i,mask) \ - _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask))) - -# define _KMP_CPU_CLR(i,mask) \ - (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &= \ - ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) - -# define KMP_CPU_CLR(i,mask) \ - _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask))) - -# define KMP_CPU_ZERO(mask) \ - { \ - int __i; \ - for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ - ((kmp_affin_mask_t *)(mask))[__i] = 0; \ - } \ - } - -# define KMP_CPU_COPY(dest, src) \ - { \ - int __i; \ - for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - = ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - -# define KMP_CPU_AND(dest, src) \ - { \ - int __i; \ - for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - &= ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - -# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \ - { \ - int __i; \ - for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ - ((kmp_affin_mask_t *)(mask))[__i] \ - = ~((kmp_affin_mask_t *)(mask))[__i]; \ - } \ - KMP_CPU_AND(mask, __kmp_affin_fullMask); \ - } - -# define KMP_CPU_UNION(dest, src) \ - { \ - int __i; \ - for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \ - ((kmp_affin_mask_t *)(dest))[__i] \ - |= ((kmp_affin_mask_t *)(src))[__i]; \ - } \ - } - - -# else /* KMP_GROUP_AFFINITY */ - -typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */ - -# define KMP_CPU_SET(i,mask) (*(mask) |= (((kmp_affin_mask_t)1) << (i))) -# define KMP_CPU_ISSET(i,mask) (!!(*(mask) & (((kmp_affin_mask_t)1) << (i)))) -# define KMP_CPU_CLR(i,mask) (*(mask) &= ~(((kmp_affin_mask_t)1) << (i))) -# define KMP_CPU_ZERO(mask) (*(mask) = 0) -# define KMP_CPU_COPY(dest, src) (*(dest) = *(src)) -# define KMP_CPU_AND(dest, src) (*(dest) &= *(src)) -# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask)); KMP_CPU_AND(mask, __kmp_affin_fullMask) -# define KMP_CPU_UNION(dest, src) (*(dest) |= *(src)) - -# endif /* KMP_GROUP_AFFINITY */ - -# endif /* KMP_OS_WINDOWS */ - -// -// __kmp_allocate() will return memory allocated on a 4-bytes boundary. -// after zeroing it - it takes care of those assumptions stated above. -// -# define KMP_CPU_ALLOC(ptr) \ - (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size))) -# define KMP_CPU_FREE(ptr) __kmp_free(ptr) -# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size))) -# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */ -# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size))) -# define KMP_CPU_INTERNAL_FREE(ptr) KMP_INTERNAL_FREE(ptr) - -// -// The following macro should be used to index an array of masks. -// The array should be declared as "kmp_affinity_t *" and allocated with -// size "__kmp_affinity_mask_size * len". The macro takes care of the fact -// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but -// on Linux* OS, sizeof(kmp_affin_t) is 1. -// -# define KMP_CPU_INDEX(array,i) \ - ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size)) -# define KMP_CPU_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size) -# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr); -# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size) -# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr); - -#endif /* KMP_USE_HWLOC */ - -// prototype after typedef of kmp_affin_mask_t -#if KMP_GROUP_AFFINITY -extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask); +# define KMP_CPU_INDEX(arr,i) __kmp_affinity_dispatch->index_mask_array(arr, i) +# define KMP_CPU_ALLOC_ARRAY(arr, n) (arr = __kmp_affinity_dispatch->allocate_mask_array(n)) +# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_affinity_dispatch->deallocate_mask_array(arr) +# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n) +# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n) +# define __kmp_get_system_affinity(mask, abort_bool) (mask)->get_system_affinity(abort_bool) +# define __kmp_set_system_affinity(mask, abort_bool) (mask)->set_system_affinity(abort_bool) +# define __kmp_get_proc_group(mask) (mask)->get_proc_group() + +class KMPAffinity { +public: + class Mask { + public: + void* operator new(size_t n); + void operator delete(void* p); + void* operator new[](size_t n); + void operator delete[](void* p); + virtual ~Mask() {} + // Set bit i to 1 + virtual void set(int i) {} + // Return bit i + virtual bool is_set(int i) const { return false; } + // Set bit i to 0 + virtual void clear(int i) {} + // Zero out entire mask + virtual void zero() {} + // Copy src into this mask + virtual void copy(const Mask* src) {} + // this &= rhs + virtual void bitwise_and(const Mask* rhs) {} + // this |= rhs + virtual void bitwise_or(const Mask* rhs) {} + // this = ~this + virtual void bitwise_not() {} + // API for iterating over an affinity mask + // for (int i = mask->begin(); i != mask->end(); i = mask->next(i)) + virtual int begin() const { return 0; } + virtual int end() const { return 0; } + virtual int next(int previous) const { return 0; } + // Set the system's affinity to this affinity mask's value + virtual int set_system_affinity(bool abort_on_error) const { return -1; } + // Set this affinity mask to the current system affinity + virtual int get_system_affinity(bool abort_on_error) { return -1; } + // Only 1 DWORD in the mask should have any procs set. + // Return the appropriate index, or -1 for an invalid mask. + virtual int get_proc_group() const { return -1; } + }; + void* operator new(size_t n); + void operator delete(void* p); + // Determine if affinity is capable + virtual void determine_capable(const char* env_var) {} + // Bind the current thread to os proc + virtual void bind_thread(int proc) {} + // Factory functions to allocate/deallocate a mask + virtual Mask* allocate_mask() { return nullptr; } + virtual void deallocate_mask(Mask* m) { } + virtual Mask* allocate_mask_array(int num) { return nullptr; } + virtual void deallocate_mask_array(Mask* m) { } + virtual Mask* index_mask_array(Mask* m, int index) { return nullptr; } + static void pick_api(); + static void destroy_api(); + enum api_type { + NATIVE_OS +#if KMP_USE_HWLOC + , HWLOC #endif + }; + virtual api_type get_api_type() const { KMP_ASSERT(0); return NATIVE_OS; }; +private: + static bool picked_api; +}; + +typedef KMPAffinity::Mask kmp_affin_mask_t; +extern KMPAffinity* __kmp_affinity_dispatch; // // Declare local char buffers with this size for printing debug and info @@ -895,8 +721,6 @@ extern char * __kmp_affinity_proclist; /* proc ID list */ extern kmp_affin_mask_t *__kmp_affinity_masks; extern unsigned __kmp_affinity_num_masks; -extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error); -extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error); extern void __kmp_affinity_bind_thread(int which); extern kmp_affin_mask_t *__kmp_affin_fullMask; @@ -2606,7 +2430,7 @@ int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call // Read/write by workers as well ----------------------------------------------------------------------- -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_USE_HWLOC +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel' // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel' // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding. Index: openmp/trunk/runtime/src/kmp_affinity.h =================================================================== --- openmp/trunk/runtime/src/kmp_affinity.h +++ openmp/trunk/runtime/src/kmp_affinity.h @@ -15,6 +15,470 @@ #ifndef KMP_AFFINITY_H #define KMP_AFFINITY_H +#include "kmp_os.h" +#include "kmp.h" + +#if KMP_AFFINITY_SUPPORTED +#if KMP_USE_HWLOC +class KMPHwlocAffinity: public KMPAffinity { +public: + class Mask : public KMPAffinity::Mask { + hwloc_cpuset_t mask; + public: + Mask() { mask = hwloc_bitmap_alloc(); this->zero(); } + ~Mask() { hwloc_bitmap_free(mask); } + void set(int i) override { hwloc_bitmap_set(mask, i); } + bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } + void clear(int i) override { hwloc_bitmap_clr(mask, i); } + void zero() override { hwloc_bitmap_zero(mask); } + void copy(const KMPAffinity::Mask* src) override { + const Mask* convert = static_cast(src); + hwloc_bitmap_copy(mask, convert->mask); + } + void bitwise_and(const KMPAffinity::Mask* rhs) override { + const Mask* convert = static_cast(rhs); + hwloc_bitmap_and(mask, mask, convert->mask); + } + void bitwise_or(const KMPAffinity::Mask * rhs) override { + const Mask* convert = static_cast(rhs); + hwloc_bitmap_or(mask, mask, convert->mask); + } + void bitwise_not() override { hwloc_bitmap_not(mask, mask); } + int begin() const override { return hwloc_bitmap_first(mask); } + int end() const override { return -1; } + int next(int previous) const override { return hwloc_bitmap_next(mask, previous); } + int get_system_affinity(bool abort_on_error) override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); + } + return error; + } + int set_system_affinity(bool abort_on_error) const override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); + } + return error; + } + int get_proc_group() const override { + int i; + int group = -1; +# if KMP_OS_WINDOWS + if (__kmp_num_proc_groups == 1) { + return 1; + } + for (i = 0; i < __kmp_num_proc_groups; i++) { + // On windows, the long type is always 32 bits + unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2); + unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1); + if (first_32_bits == 0 && second_32_bits == 0) { + continue; + } + if (group >= 0) { + return -1; + } + group = i; + } +# endif /* KMP_OS_WINDOWS */ + return group; + } + }; + void determine_capable(const char* var) override { + const hwloc_topology_support* topology_support; + if(__kmp_hwloc_topology == NULL) { + if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) { + __kmp_hwloc_error = TRUE; + if(__kmp_affinity_verbose) + KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); + } + if(hwloc_topology_load(__kmp_hwloc_topology) < 0) { + __kmp_hwloc_error = TRUE; + if(__kmp_affinity_verbose) + KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); + } + } + topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); + // Is the system capable of setting/getting this thread's affinity? + // also, is topology discovery possible? (pu indicates ability to discover processing units) + // and finally, were there no errors when calling any hwloc_* API functions? + if(topology_support && topology_support->cpubind->set_thisthread_cpubind && + topology_support->cpubind->get_thisthread_cpubind && + topology_support->discovery->pu && + !__kmp_hwloc_error) + { + // enables affinity according to KMP_AFFINITY_CAPABLE() macro + KMP_AFFINITY_ENABLE(TRUE); + } else { + // indicate that hwloc didn't work and disable affinity + __kmp_hwloc_error = TRUE; + KMP_AFFINITY_DISABLE(); + } + } + void bind_thread(int which) override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal set affinity operation when not capable"); + KMPAffinity::Mask *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + KMP_CPU_SET(which, mask); + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); + } + KMPAffinity::Mask* allocate_mask() override { return new Mask(); } + void deallocate_mask(KMPAffinity::Mask* m) override { delete m; } + KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } + void deallocate_mask_array(KMPAffinity::Mask* array) override { + Mask* hwloc_array = static_cast(array); + delete[] hwloc_array; + } + KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { + Mask* hwloc_array = static_cast(array); + return &(hwloc_array[index]); + } + api_type get_api_type() const override { return HWLOC; } +}; +#endif /* KMP_USE_HWLOC */ + +#if KMP_OS_LINUX +/* + * On some of the older OS's that we build on, these constants aren't present + * in #included from . They must be the same on + * all systems of the same arch where they are defined, and they cannot change. + * stone forever. + */ +#include +# if KMP_ARCH_X86 || KMP_ARCH_ARM +# ifndef __NR_sched_setaffinity +# define __NR_sched_setaffinity 241 +# elif __NR_sched_setaffinity != 241 +# error Wrong code for setaffinity system call. +# endif /* __NR_sched_setaffinity */ +# ifndef __NR_sched_getaffinity +# define __NR_sched_getaffinity 242 +# elif __NR_sched_getaffinity != 242 +# error Wrong code for getaffinity system call. +# endif /* __NR_sched_getaffinity */ +# elif KMP_ARCH_AARCH64 +# ifndef __NR_sched_setaffinity +# define __NR_sched_setaffinity 122 +# elif __NR_sched_setaffinity != 122 +# error Wrong code for setaffinity system call. +# endif /* __NR_sched_setaffinity */ +# ifndef __NR_sched_getaffinity +# define __NR_sched_getaffinity 123 +# elif __NR_sched_getaffinity != 123 +# error Wrong code for getaffinity system call. +# endif /* __NR_sched_getaffinity */ +# elif KMP_ARCH_X86_64 +# ifndef __NR_sched_setaffinity +# define __NR_sched_setaffinity 203 +# elif __NR_sched_setaffinity != 203 +# error Wrong code for setaffinity system call. +# endif /* __NR_sched_setaffinity */ +# ifndef __NR_sched_getaffinity +# define __NR_sched_getaffinity 204 +# elif __NR_sched_getaffinity != 204 +# error Wrong code for getaffinity system call. +# endif /* __NR_sched_getaffinity */ +# elif KMP_ARCH_PPC64 +# ifndef __NR_sched_setaffinity +# define __NR_sched_setaffinity 222 +# elif __NR_sched_setaffinity != 222 +# error Wrong code for setaffinity system call. +# endif /* __NR_sched_setaffinity */ +# ifndef __NR_sched_getaffinity +# define __NR_sched_getaffinity 223 +# elif __NR_sched_getaffinity != 223 +# error Wrong code for getaffinity system call. +# endif /* __NR_sched_getaffinity */ +# else +# error Unknown or unsupported architecture +# endif /* KMP_ARCH_* */ +class KMPNativeAffinity : public KMPAffinity { + class Mask : public KMPAffinity::Mask { + typedef unsigned char mask_t; + static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT; + public: + mask_t* mask; + Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); } + ~Mask() { if (mask) __kmp_free(mask); } + void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); } + bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); } + void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } + void zero() override { + for (size_t i=0; i<__kmp_affin_mask_size; ++i) + mask[i] = 0; + } + void copy(const KMPAffinity::Mask* src) override { + const Mask * convert = static_cast(src); + for (size_t i=0; i<__kmp_affin_mask_size; ++i) + mask[i] = convert->mask[i]; + } + void bitwise_and(const KMPAffinity::Mask* rhs) override { + const Mask * convert = static_cast(rhs); + for (size_t i=0; i<__kmp_affin_mask_size; ++i) + mask[i] &= convert->mask[i]; + } + void bitwise_or(const KMPAffinity::Mask* rhs) override { + const Mask * convert = static_cast(rhs); + for (size_t i=0; i<__kmp_affin_mask_size; ++i) + mask[i] |= convert->mask[i]; + } + void bitwise_not() override { + for (size_t i=0; i<__kmp_affin_mask_size; ++i) + mask[i] = ~(mask[i]); + } + int begin() const override { + int retval = 0; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; + } + int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; } + int next(int previous) const override { + int retval = previous+1; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; + } + int get_system_affinity(bool abort_on_error) override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask ); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); + } + return error; + } + int set_system_affinity(bool abort_on_error) const override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask ); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); + } + return error; + } + }; + void determine_capable(const char* env_var) override { + __kmp_affinity_determine_capable(env_var); + } + void bind_thread(int which) override { + __kmp_affinity_bind_thread(which); + } + KMPAffinity::Mask* allocate_mask() override { + KMPNativeAffinity::Mask* retval = new Mask(); + return retval; + } + void deallocate_mask(KMPAffinity::Mask* m) override { + KMPNativeAffinity::Mask* native_mask = static_cast(m); + delete m; + } + KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } + void deallocate_mask_array(KMPAffinity::Mask* array) override { + Mask* linux_array = static_cast(array); + delete[] linux_array; + } + KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { + Mask* linux_array = static_cast(array); + return &(linux_array[index]); + } + api_type get_api_type() const override { return NATIVE_OS; } +}; +#endif /* KMP_OS_LINUX */ + +#if KMP_OS_WINDOWS +class KMPNativeAffinity : public KMPAffinity { + class Mask : public KMPAffinity::Mask { + typedef ULONG_PTR mask_t; + static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT; + mask_t* mask; + public: + Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); } + ~Mask() { if (mask) __kmp_free(mask); } + void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); } + bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); } + void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } + void zero() override { + for (size_t i=0; i<__kmp_num_proc_groups; ++i) + mask[i] = 0; + } + void copy(const KMPAffinity::Mask* src) override { + const Mask * convert = static_cast(src); + for (size_t i=0; i<__kmp_num_proc_groups; ++i) + mask[i] = convert->mask[i]; + } + void bitwise_and(const KMPAffinity::Mask* rhs) override { + const Mask * convert = static_cast(rhs); + for (size_t i=0; i<__kmp_num_proc_groups; ++i) + mask[i] &= convert->mask[i]; + } + void bitwise_or(const KMPAffinity::Mask* rhs) override { + const Mask * convert = static_cast(rhs); + for (size_t i=0; i<__kmp_num_proc_groups; ++i) + mask[i] |= convert->mask[i]; + } + void bitwise_not() override { + for (size_t i=0; i<__kmp_num_proc_groups; ++i) + mask[i] = ~(mask[i]); + } + int begin() const override { + int retval = 0; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; + } + int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; } + int next(int previous) const override { + int retval = previous+1; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; + } + int set_system_affinity(bool abort_on_error) const override { + if (__kmp_num_proc_groups > 1) { + // Check for a valid mask. + GROUP_AFFINITY ga; + int group = get_proc_group(); + if (group < 0) { + if (abort_on_error) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } + return -1; + } + // Transform the bit vector into a GROUP_AFFINITY struct + // and make the system call to set affinity. + ga.Group = group; + ga.Mask = mask[group]; + ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; + + KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); + if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ), + KMP_ERR( error ), __kmp_msg_null); + } + return error; + } + } else { + if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ), + KMP_ERR( error ), __kmp_msg_null); + } + return error; + } + } + return 0; + } + int get_system_affinity(bool abort_on_error) override { + if (__kmp_num_proc_groups > 1) { + this->zero(); + GROUP_AFFINITY ga; + KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); + if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) { + return -1; + } + mask[ga.Group] = ga.Mask; + } else { + mask_t newMask, sysMask, retval; + if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + retval = SetThreadAffinityMask(GetCurrentThread(), newMask); + if (! retval) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + newMask = SetThreadAffinityMask(GetCurrentThread(), retval); + if (! newMask) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"), + KMP_ERR(error), __kmp_msg_null); + } + } + *mask = retval; + } + return 0; + } + int get_proc_group() const override { + int group = -1; + if (__kmp_num_proc_groups == 1) { + return 1; + } + for (int i = 0; i < __kmp_num_proc_groups; i++) { + if (mask[i] == 0) + continue; + if (group >= 0) + return -1; + group = i; + } + return group; + } + }; + void determine_capable(const char* env_var) override { + __kmp_affinity_determine_capable(env_var); + } + void bind_thread(int which) override { + __kmp_affinity_bind_thread(which); + } + KMPAffinity::Mask* allocate_mask() override { return new Mask(); } + void deallocate_mask(KMPAffinity::Mask* m) override { delete m; } + KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } + void deallocate_mask_array(KMPAffinity::Mask* array) override { + Mask* windows_array = static_cast(array); + delete[] windows_array; + } + KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { + Mask* windows_array = static_cast(array); + return &(windows_array[index]); + } + api_type get_api_type() const override { return NATIVE_OS; } +}; +#endif /* KMP_OS_WINDOWS */ +#endif /* KMP_AFFINITY_SUPPORTED */ + class Address { public: static const unsigned maxDepth = 32; Index: openmp/trunk/runtime/src/kmp_affinity.cpp =================================================================== --- openmp/trunk/runtime/src/kmp_affinity.cpp +++ openmp/trunk/runtime/src/kmp_affinity.cpp @@ -47,53 +47,42 @@ #if KMP_AFFINITY_SUPPORTED -// -// Print the affinity mask to the character array in a pretty format. -// -#if KMP_USE_HWLOC -char * -__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) -{ - int num_chars_to_write, num_chars_written; - char* scan; - KMP_ASSERT(buf_len >= 40); +bool KMPAffinity::picked_api = false; - // bufsize of 0 just retrieves the needed buffer size. - num_chars_to_write = hwloc_bitmap_list_snprintf(buf, 0, (hwloc_bitmap_t)mask); +void* KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); } +void* KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); } +void KMPAffinity::Mask::operator delete(void* p) { __kmp_free(p); } +void KMPAffinity::Mask::operator delete[](void* p) { __kmp_free(p); } +void* KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); } +void KMPAffinity::operator delete(void* p) { __kmp_free(p); } + +void KMPAffinity::pick_api() { + KMPAffinity* affinity_dispatch; + if (picked_api) + return; +#if KMP_USE_HWLOC + if (__kmp_affinity_top_method == affinity_top_method_hwloc) { + affinity_dispatch = new KMPHwlocAffinity(); + } else +#endif + { + affinity_dispatch = new KMPNativeAffinity(); + } + __kmp_affinity_dispatch = affinity_dispatch; + picked_api = true; +} - // need '{', "xxxxxxxx...xx", '}', '\0' = num_chars_to_write + 3 bytes - // * num_chars_to_write returned by hwloc_bitmap_list_snprintf does not - // take into account the '\0' character. - if(hwloc_bitmap_iszero((hwloc_bitmap_t)mask)) { - KMP_SNPRINTF(buf, buf_len, "{}"); - } else if(num_chars_to_write < buf_len - 3) { - // no problem fitting the mask into buf_len number of characters - buf[0] = '{'; - // use buf_len-3 because we have the three characters: '{' '}' '\0' to add to the buffer - num_chars_written = hwloc_bitmap_list_snprintf(buf+1, buf_len-3, (hwloc_bitmap_t)mask); - buf[num_chars_written+1] = '}'; - buf[num_chars_written+2] = '\0'; - } else { - // Need to truncate the affinity mask string and add ellipsis. - // To do this, we first write out the '{' + str(mask) - buf[0] = '{'; - hwloc_bitmap_list_snprintf(buf+1, buf_len-1, (hwloc_bitmap_t)mask); - // then, what we do here is go to the 7th to last character, then go backwards until we are NOT - // on a digit then write "...}\0". This way it is a clean ellipsis addition and we don't - // overwrite part of an affinity number. i.e., we avoid something like { 45, 67, 8...} and get - // { 45, 67,...} instead. - scan = buf + buf_len - 7; - while(*scan >= '0' && *scan <= '9' && scan >= buf) - scan--; - *(scan+1) = '.'; - *(scan+2) = '.'; - *(scan+3) = '.'; - *(scan+4) = '}'; - *(scan+5) = '\0'; +void KMPAffinity::destroy_api() { + if (__kmp_affinity_dispatch != NULL) { + delete __kmp_affinity_dispatch; + __kmp_affinity_dispatch = NULL; + picked_api = false; } - return buf; } -#else + +// +// Print the affinity mask to the character array in a pretty format. +// char * __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) { @@ -105,12 +94,8 @@ // Find first element / check for empty set. // size_t i; - for (i = 0; i < KMP_CPU_SETSIZE; i++) { - if (KMP_CPU_ISSET(i, mask)) { - break; - } - } - if (i == KMP_CPU_SETSIZE) { + i = mask->begin(); + if (i == mask->end()) { KMP_SNPRINTF(scan, end-scan+1, "{}"); while (*scan != '\0') scan++; KMP_ASSERT(scan <= end); @@ -120,7 +105,7 @@ KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i); while (*scan != '\0') scan++; i++; - for (; i < KMP_CPU_SETSIZE; i++) { + for (; i != mask->end(); i = mask->next(i)) { if (! KMP_CPU_ISSET(i, mask)) { continue; } @@ -137,7 +122,7 @@ KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i); while (*scan != '\0') scan++; } - if (i < KMP_CPU_SETSIZE) { + if (i != mask->end()) { KMP_SNPRINTF(scan, end-scan+1, ",..."); while (*scan != '\0') scan++; } @@ -146,7 +131,6 @@ KMP_ASSERT(scan <= end); return buf; } -#endif // KMP_USE_HWLOC void @@ -677,7 +661,7 @@ __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); if (__kmp_affinity_type == affinity_none) { int avail_ct = 0; - unsigned int i; + int i; KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) continue; @@ -1031,7 +1015,7 @@ } KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc); - __kmp_affinity_bind_thread(i); + __kmp_affinity_dispatch->bind_thread(i); threadInfo[nApics].osId = i; // @@ -1547,7 +1531,7 @@ } KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc); - __kmp_affinity_bind_thread(proc); + __kmp_affinity_dispatch->bind_thread(proc); // // Extrach the labels for each level in the machine topology map @@ -3705,7 +3689,7 @@ const char *file_name = NULL; int line = 0; # if KMP_USE_HWLOC - if (depth < 0) { + if (depth < 0 && __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { if (__kmp_affinity_verbose) { KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); } @@ -3947,6 +3931,7 @@ # if KMP_USE_HWLOC else if (__kmp_affinity_top_method == affinity_top_method_hwloc) { + KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC); if (__kmp_affinity_verbose) { KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); } @@ -4233,6 +4218,7 @@ __kmp_hwloc_topology = NULL; } # endif + KMPAffinity::destroy_api(); } Index: openmp/trunk/runtime/src/kmp_ftn_cdecl.c =================================================================== --- openmp/trunk/runtime/src/kmp_ftn_cdecl.c +++ openmp/trunk/runtime/src/kmp_ftn_cdecl.c @@ -14,6 +14,7 @@ #include "kmp.h" +#include "kmp_affinity.h" #if KMP_OS_WINDOWS # if defined KMP_WIN_CDECL || !defined KMP_DYNAMIC_LIB Index: openmp/trunk/runtime/src/kmp_ftn_entry.h =================================================================== --- openmp/trunk/runtime/src/kmp_ftn_entry.h +++ openmp/trunk/runtime/src/kmp_ftn_entry.h @@ -279,15 +279,13 @@ // // We really only NEED serial initialization here. // + kmp_affin_mask_t* mask_internals; if ( ! TCR_4(__kmp_init_middle) ) { __kmp_middle_initialize(); } - # if KMP_USE_HWLOC - *mask = (hwloc_cpuset_t)hwloc_bitmap_alloc(); - # else - *mask = kmpc_malloc( __kmp_affin_mask_size ); - # endif - KMP_CPU_ZERO( (kmp_affin_mask_t *)(*mask) ); + mask_internals = __kmp_affinity_dispatch->allocate_mask(); + KMP_CPU_ZERO( mask_internals ); + *mask = mask_internals; #endif } @@ -300,6 +298,7 @@ // // We really only NEED serial initialization here. // + kmp_affin_mask_t* mask_internals; if ( ! TCR_4(__kmp_init_middle) ) { __kmp_middle_initialize(); } @@ -308,11 +307,8 @@ KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" ); } } - # if KMP_USE_HWLOC - hwloc_bitmap_free((hwloc_cpuset_t)(*mask)); - # else - kmpc_free( *mask ); - # endif + mask_internals = (kmp_affin_mask_t*)(*mask); + __kmp_affinity_dispatch->deallocate_mask(mask_internals); *mask = NULL; #endif } Index: openmp/trunk/runtime/src/kmp_ftn_extra.c =================================================================== --- openmp/trunk/runtime/src/kmp_ftn_extra.c +++ openmp/trunk/runtime/src/kmp_ftn_extra.c @@ -14,6 +14,7 @@ #include "kmp.h" +#include "kmp_affinity.h" #if KMP_OS_WINDOWS # define KMP_FTN_ENTRIES KMP_FTN_PLAIN Index: openmp/trunk/runtime/src/kmp_global.c =================================================================== --- openmp/trunk/runtime/src/kmp_global.c +++ openmp/trunk/runtime/src/kmp_global.c @@ -14,6 +14,7 @@ #include "kmp.h" +#include "kmp_affinity.h" kmp_key_t __kmp_gtid_threadprivate_key; @@ -222,21 +223,22 @@ #if KMP_AFFINITY_SUPPORTED +KMPAffinity* __kmp_affinity_dispatch = NULL; + # if KMP_USE_HWLOC int __kmp_hwloc_error = FALSE; hwloc_topology_t __kmp_hwloc_topology = NULL; # endif -# if KMP_GROUP_AFFINITY - +# if KMP_OS_WINDOWS +# if KMP_GROUP_AFFINITY int __kmp_num_proc_groups = 1; - +# endif /* KMP_GROUP_AFFINITY */ kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL; kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL; kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL; kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL; - -# endif /* KMP_GROUP_AFFINITY */ +# endif /* KMP_OS_WINDOWS */ size_t __kmp_affin_mask_size = 0; enum affinity_type __kmp_affinity_type = affinity_default; Index: openmp/trunk/runtime/src/kmp_runtime.c =================================================================== --- openmp/trunk/runtime/src/kmp_runtime.c +++ openmp/trunk/runtime/src/kmp_runtime.c @@ -25,6 +25,7 @@ #include "kmp_error.h" #include "kmp_stats.h" #include "kmp_wait_release.h" +#include "kmp_affinity.h" #if OMPT_SUPPORT #include "ompt-specific.h" Index: openmp/trunk/runtime/src/kmp_settings.c =================================================================== --- openmp/trunk/runtime/src/kmp_settings.c +++ openmp/trunk/runtime/src/kmp_settings.c @@ -23,6 +23,7 @@ #include "kmp_i18n.h" #include "kmp_lock.h" #include "kmp_io.h" +#include "kmp_affinity.h" static int __kmp_env_toPrint( char const * name, int flag ); @@ -5339,44 +5340,12 @@ // affinity. // const char *var = "KMP_AFFINITY"; -# if KMP_USE_HWLOC - if(__kmp_hwloc_topology == NULL) { - if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if(__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); - } - if(hwloc_topology_load(__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if(__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); - } - } -# endif + KMPAffinity::pick_api(); if ( __kmp_affinity_type == affinity_disabled ) { KMP_AFFINITY_DISABLE(); } else if ( ! KMP_AFFINITY_CAPABLE() ) { -# if KMP_USE_HWLOC - const hwloc_topology_support* topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); - // Is the system capable of setting/getting this thread's affinity? - // also, is topology discovery possible? (pu indicates ability to discover processing units) - // and finally, were there no errors when calling any hwloc_* API functions? - if(topology_support && topology_support->cpubind->set_thisthread_cpubind && - topology_support->cpubind->get_thisthread_cpubind && - topology_support->discovery->pu && - !__kmp_hwloc_error) - { - // enables affinity according to KMP_AFFINITY_CAPABLE() macro - KMP_AFFINITY_ENABLE(TRUE); - } else { - // indicate that hwloc didn't work and disable affinity - __kmp_hwloc_error = TRUE; - KMP_AFFINITY_DISABLE(); - } -# else - __kmp_affinity_determine_capable( var ); -# endif // KMP_USE_HWLOC + __kmp_affinity_dispatch->determine_capable(var); if ( ! KMP_AFFINITY_CAPABLE() ) { if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings && ( __kmp_affinity_type != affinity_default ) Index: openmp/trunk/runtime/src/z_Linux_util.c =================================================================== --- openmp/trunk/runtime/src/z_Linux_util.c +++ openmp/trunk/runtime/src/z_Linux_util.c @@ -22,6 +22,7 @@ #include "kmp_io.h" #include "kmp_stats.h" #include "kmp_wait_release.h" +#include "kmp_affinity.h" #if !KMP_OS_FREEBSD && !KMP_OS_NETBSD # include @@ -113,118 +114,6 @@ * Affinity support */ -/* - * On some of the older OS's that we build on, these constants aren't present - * in #included from . They must be the same on - * all systems of the same arch where they are defined, and they cannot change. - * stone forever. - */ - -# if KMP_ARCH_X86 || KMP_ARCH_ARM -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 241 -# elif __NR_sched_setaffinity != 241 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 242 -# elif __NR_sched_getaffinity != 242 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ - -# elif KMP_ARCH_AARCH64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 122 -# elif __NR_sched_setaffinity != 122 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 123 -# elif __NR_sched_getaffinity != 123 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ - -# elif KMP_ARCH_X86_64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 203 -# elif __NR_sched_setaffinity != 203 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 204 -# elif __NR_sched_getaffinity != 204 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ - -# elif KMP_ARCH_PPC64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 222 -# elif __NR_sched_setaffinity != 222 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 223 -# elif __NR_sched_getaffinity != 223 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ - - -# else -# error Unknown or unsupported architecture - -# endif /* KMP_ARCH_* */ - -int -__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) -{ - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); -#if KMP_USE_HWLOC - int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); -#else - int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask ); -#endif - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FatalSysError ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; -} - -int -__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) -{ - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - -#if KMP_USE_HWLOC - int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); -#else - int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask ); -#endif - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FatalSysError ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; -} - void __kmp_affinity_bind_thread( int which ) { Index: openmp/trunk/runtime/src/z_Windows_NT_util.c =================================================================== --- openmp/trunk/runtime/src/z_Windows_NT_util.c +++ openmp/trunk/runtime/src/z_Windows_NT_util.c @@ -18,6 +18,7 @@ #include "kmp_i18n.h" #include "kmp_io.h" #include "kmp_wait_release.h" +#include "kmp_affinity.h" /* This code is related to NtQuerySystemInformation() function. This function is used in the Load balance algorithm for OMP_DYNAMIC=true to find the @@ -127,9 +128,7 @@ /* End of NtQuerySystemInformation()-related code */ -#if KMP_GROUP_AFFINITY static HMODULE kernel32 = NULL; -#endif /* KMP_GROUP_AFFINITY */ /* ----------------------------------------------------------------------------------- */ /* ----------------------------------------------------------------------------------- */ @@ -542,227 +541,9 @@ /* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ -#if KMP_GROUP_AFFINITY - -// -// Only 1 DWORD in the mask should have any procs set. -// Return the appropriate index, or -1 for an invalid mask. -// -int -__kmp_get_proc_group( kmp_affin_mask_t const *mask ) -{ - int i; - int group = -1; - for (i = 0; i < __kmp_num_proc_groups; i++) { -#if KMP_USE_HWLOC - // On windows, the long type is always 32 bits - unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2); - unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2+1); - if (first_32_bits == 0 && second_32_bits == 0) { - continue; - } -#else - if (mask[i] == 0) { - continue; - } -#endif - if (group >= 0) { - return -1; - } - group = i; - } - return group; -} - -#endif /* KMP_GROUP_AFFINITY */ - -int -__kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) -{ -#if KMP_USE_HWLOC - int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FatalSysError ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; -#else -# if KMP_GROUP_AFFINITY - - if (__kmp_num_proc_groups > 1) { - // - // Check for a valid mask. - // - GROUP_AFFINITY ga; - int group = __kmp_get_proc_group( mask ); - if (group < 0) { - if (abort_on_error) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - return -1; - } - - // - // Transform the bit vector into a GROUP_AFFINITY struct - // and make the system call to set affinity. - // - ga.Group = group; - ga.Mask = mask[group]; - ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; - - KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); - if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetThreadAffMask ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; - } - } - else - -# endif /* KMP_GROUP_AFFINITY */ - - { - if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetThreadAffMask ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; - } - } -#endif /* KMP_USE_HWLOC */ - return 0; -} - -int -__kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) -{ -#if KMP_USE_HWLOC - int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FatalSysError ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - return error; -#else /* KMP_USE_HWLOC */ -# if KMP_GROUP_AFFINITY - - if (__kmp_num_proc_groups > 1) { - KMP_CPU_ZERO(mask); - GROUP_AFFINITY ga; - KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); - - if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), - KMP_ERR(error), - __kmp_msg_null - ); - } - return error; - } - - if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) - || (ga.Mask == 0)) { - return -1; - } - - mask[ga.Group] = ga.Mask; - } - else - -# endif /* KMP_GROUP_AFFINITY */ - - { - kmp_affin_mask_t newMask, sysMask, retval; - - if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(FunctionError, "GetProcessAffinityMask()"), - KMP_ERR(error), - __kmp_msg_null - ); - } - return error; - } - retval = SetThreadAffinityMask(GetCurrentThread(), newMask); - if (! retval) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), - __kmp_msg_null - ); - } - return error; - } - newMask = SetThreadAffinityMask(GetCurrentThread(), retval); - if (! newMask) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), - __kmp_msg_null - ); - } - } - *mask = retval; - } -#endif /* KMP_USE_HWLOC */ - return 0; -} - void __kmp_affinity_bind_thread( int proc ) { -#if KMP_USE_HWLOC - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - KMP_CPU_SET(proc, mask); - __kmp_set_system_affinity(mask, TRUE); - KMP_CPU_FREE_FROM_STACK(mask); -#else /* KMP_USE_HWLOC */ -# if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1) { // // Form the GROUP_AFFINITY struct directly, rather than filling @@ -787,18 +568,14 @@ ); } } + } else { + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + KMP_CPU_SET(proc, mask); + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); } - else - -# endif /* KMP_GROUP_AFFINITY */ - - { - kmp_affin_mask_t mask; - KMP_CPU_ZERO(&mask); - KMP_CPU_SET(proc, &mask); - __kmp_set_system_affinity(&mask, TRUE); - } -#endif /* KMP_USE_HWLOC */ } void