Index: openmp/trunk/runtime/cmake/config-ix.cmake =================================================================== --- openmp/trunk/runtime/cmake/config-ix.cmake +++ openmp/trunk/runtime/cmake/config-ix.cmake @@ -246,23 +246,22 @@ # Check if HWLOC support is available if(${LIBOMP_USE_HWLOC}) - if(WIN32) - set(LIBOMP_HAVE_HWLOC FALSE) - libomp_say("Using hwloc not supported on Windows yet") - else() - set(CMAKE_REQUIRED_INCLUDES ${LIBOMP_HWLOC_INSTALL_DIR}/include) - check_include_file(hwloc.h LIBOMP_HAVE_HWLOC_H) - set(CMAKE_REQUIRED_INCLUDES) - check_library_exists(hwloc hwloc_topology_init + set(CMAKE_REQUIRED_INCLUDES ${LIBOMP_HWLOC_INSTALL_DIR}/include) + check_include_file(hwloc.h LIBOMP_HAVE_HWLOC_H) + set(CMAKE_REQUIRED_INCLUDES) + find_library(LIBOMP_HWLOC_LIBRARY + NAMES hwloc libhwloc + HINTS ${LIBOMP_HWLOC_INSTALL_DIR}/lib) + if(LIBOMP_HWLOC_LIBRARY) + check_library_exists(${LIBOMP_HWLOC_LIBRARY} hwloc_topology_init ${LIBOMP_HWLOC_INSTALL_DIR}/lib LIBOMP_HAVE_LIBHWLOC) - find_library(LIBOMP_HWLOC_LIBRARY hwloc ${LIBOMP_HWLOC_INSTALL_DIR}/lib) get_filename_component(LIBOMP_HWLOC_LIBRARY_DIR ${LIBOMP_HWLOC_LIBRARY} PATH) - if(LIBOMP_HAVE_HWLOC_H AND LIBOMP_HAVE_LIBHWLOC AND LIBOMP_HWLOC_LIBRARY) - set(LIBOMP_HAVE_HWLOC TRUE) - else() - set(LIBOMP_HAVE_HWLOC FALSE) - libomp_say("Could not find hwloc") - endif() + endif() + if(LIBOMP_HAVE_HWLOC_H AND LIBOMP_HAVE_LIBHWLOC AND LIBOMP_HWLOC_LIBRARY) + set(LIBOMP_HAVE_HWLOC TRUE) + else() + set(LIBOMP_HAVE_HWLOC FALSE) + libomp_say("Could not find hwloc") endif() endif() Index: openmp/trunk/runtime/src/kmp.h =================================================================== --- openmp/trunk/runtime/src/kmp.h +++ openmp/trunk/runtime/src/kmp.h @@ -79,10 +79,8 @@ class kmp_stats_list; #endif -#if KMP_USE_HWLOC -#include "hwloc.h" -extern hwloc_topology_t __kmp_hwloc_topology; -extern int __kmp_hwloc_error; +#if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED +# include "hwloc.h" #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 @@ -522,14 +520,43 @@ */ #if KMP_AFFINITY_SUPPORTED +# if KMP_GROUP_AFFINITY +// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). +# if _MSC_VER < 1600 +typedef struct GROUP_AFFINITY { + KAFFINITY Mask; + WORD Group; + WORD Reserved[3]; +} GROUP_AFFINITY; +# endif /* _MSC_VER < 1600 */ +extern int __kmp_num_proc_groups; +typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); +extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; + +typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); +extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; + +typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); +extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; + +typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); +extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; +# endif /* KMP_GROUP_AFFINITY */ + extern size_t __kmp_affin_mask_size; # define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) # define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) # define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) -# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT) +# if !KMP_USE_HWLOC +# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT) +# define KMP_CPU_SET_ITERATE(i,mask) \ + for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i) +# endif #if KMP_USE_HWLOC +extern hwloc_topology_t __kmp_hwloc_topology; +extern int __kmp_hwloc_error; typedef hwloc_cpuset_t kmp_affin_mask_t; # define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i) # define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i) @@ -600,9 +627,6 @@ } #else /* KMP_USE_HWLOC */ -# define KMP_CPU_SET_ITERATE(i,mask) \ - for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i) - # if KMP_OS_LINUX // // On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size @@ -678,20 +702,8 @@ // # if KMP_GROUP_AFFINITY - -// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). -# if _MSC_VER < 1600 -typedef struct GROUP_AFFINITY { - KAFFINITY Mask; - WORD Group; - WORD Reserved[3]; -} GROUP_AFFINITY; -# endif - typedef DWORD_PTR kmp_affin_mask_t; -extern int __kmp_num_proc_groups; - # define _KMP_CPU_SET(i,mask) \ (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |= \ (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))) @@ -758,19 +770,6 @@ } \ } -typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); -extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; - -typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void); -extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; - -typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); -extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; - -typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); -extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; - -extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask); # else /* KMP_GROUP_AFFINITY */ @@ -817,6 +816,11 @@ #endif /* KMP_USE_HWLOC */ +// prototype after typedef of kmp_affin_mask_t +#if KMP_GROUP_AFFINITY +extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask); +#endif + // // Declare local char buffers with this size for printing debug and info // messages, using __kmp_affinity_print_mask(). Index: openmp/trunk/runtime/src/kmp_ftn_entry.h =================================================================== --- openmp/trunk/runtime/src/kmp_ftn_entry.h +++ openmp/trunk/runtime/src/kmp_ftn_entry.h @@ -270,9 +270,9 @@ return 0; } - #if KMP_GROUP_AFFINITY && !KMP_USE_HWLOC + #if KMP_GROUP_AFFINITY if ( __kmp_num_proc_groups > 1 ) { - return (int)KMP_CPU_SETSIZE; + return (int)(__kmp_num_proc_groups*sizeof(DWORD_PTR)*CHAR_BIT); } #endif /* KMP_GROUP_AFFINITY */ return __kmp_xproc; Index: openmp/trunk/runtime/src/kmp_global.c =================================================================== --- openmp/trunk/runtime/src/kmp_global.c +++ openmp/trunk/runtime/src/kmp_global.c @@ -35,10 +35,6 @@ // gives reference tick for all events (considered the 0 tick) tsc_tick_count __kmp_stats_start_time; #endif -#if KMP_USE_HWLOC -int __kmp_hwloc_error = FALSE; -hwloc_topology_t __kmp_hwloc_topology = NULL; -#endif /* ----------------------------------------------------- */ /* INITIALIZATION VARIABLES */ @@ -220,6 +216,11 @@ #if KMP_AFFINITY_SUPPORTED +# if KMP_USE_HWLOC +int __kmp_hwloc_error = FALSE; +hwloc_topology_t __kmp_hwloc_topology = NULL; +# endif + # if KMP_GROUP_AFFINITY int __kmp_num_proc_groups = 1; Index: openmp/trunk/runtime/src/z_Windows_NT_util.c =================================================================== --- openmp/trunk/runtime/src/z_Windows_NT_util.c +++ openmp/trunk/runtime/src/z_Windows_NT_util.c @@ -552,9 +552,18 @@ int i; int group = -1; for (i = 0; i < __kmp_num_proc_groups; i++) { +#if KMP_USE_HWLOC + // On windows, the long type is always 32 bits + unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2); + unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong((hwloc_const_bitmap_t)mask, i*2+1); + if (first_32_bits == 0 && second_32_bits == 0) { + continue; + } +#else if (mask[i] == 0) { continue; } +#endif if (group >= 0) { return -1; } @@ -568,8 +577,23 @@ int __kmp_set_system_affinity( kmp_affin_mask_t const *mask, int abort_on_error ) { - -#if KMP_GROUP_AFFINITY +#if KMP_USE_HWLOC + int retval = hwloc_set_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( FatalSysError ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + return error; +#else +# if KMP_GROUP_AFFINITY if (__kmp_num_proc_groups > 1) { // @@ -608,7 +632,7 @@ } else -#endif /* KMP_GROUP_AFFINITY */ +# endif /* KMP_GROUP_AFFINITY */ { if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { @@ -624,14 +648,30 @@ return error; } } +#endif /* KMP_USE_HWLOC */ return 0; } int __kmp_get_system_affinity( kmp_affin_mask_t *mask, int abort_on_error ) { - -#if KMP_GROUP_AFFINITY +#if KMP_USE_HWLOC + int retval = hwloc_get_cpubind(__kmp_hwloc_topology, (hwloc_cpuset_t)mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg( + kmp_ms_fatal, + KMP_MSG( FatalSysError ), + KMP_ERR( error ), + __kmp_msg_null + ); + } + return error; +#else /* KMP_USE_HWLOC */ +# if KMP_GROUP_AFFINITY if (__kmp_num_proc_groups > 1) { KMP_CPU_ZERO(mask); @@ -660,7 +700,7 @@ } else -#endif /* KMP_GROUP_AFFINITY */ +# endif /* KMP_GROUP_AFFINITY */ { kmp_affin_mask_t newMask, sysMask, retval; @@ -704,14 +744,22 @@ } *mask = retval; } +#endif /* KMP_USE_HWLOC */ return 0; } void __kmp_affinity_bind_thread( int proc ) { - -#if KMP_GROUP_AFFINITY +#if KMP_USE_HWLOC + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + KMP_CPU_SET(proc, mask); + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); +#else /* KMP_USE_HWLOC */ +# if KMP_GROUP_AFFINITY if (__kmp_num_proc_groups > 1) { // @@ -740,7 +788,7 @@ } else -#endif /* KMP_GROUP_AFFINITY */ +# endif /* KMP_GROUP_AFFINITY */ { kmp_affin_mask_t mask; @@ -748,6 +796,7 @@ KMP_CPU_SET(proc, &mask); __kmp_set_system_affinity(&mask, TRUE); } +#endif /* KMP_USE_HWLOC */ } void