diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -109,6 +109,23 @@ set(CMAKE_REQUIRED_LIBRARIES) endif() +# Check for aligned memory allocator function +check_include_file(xmmintrin.h LIBOMP_HAVE_XMMINTRIN_H) +set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) +if (LIBOMP_HAVE_XMMINTRIN_H) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -DLIBOMP_HAVE_XMMINTRIN_H") +endif() +set(source_code "// check for _mm_malloc + #ifdef LIBOMP_HAVE_XMMINTRIN_H + #include + #endif + int main() { void *ptr = _mm_malloc(sizeof(int) * 1000, 64); _mm_free(ptr); return 0; }") +check_cxx_source_compiles("${source_code}" LIBOMP_HAVE__MM_MALLOC) +set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) +check_symbol_exists(aligned_alloc "stdlib.h" LIBOMP_HAVE_ALIGNED_ALLOC) +check_symbol_exists(posix_memalign "stdlib.h" LIBOMP_HAVE_POSIX_MEMALIGN) +check_symbol_exists(_aligned_malloc "malloc.h" LIBOMP_HAVE__ALIGNED_MALLOC) + # Check linker flags if(WIN32) libomp_check_linker_flag(/SAFESEH LIBOMP_HAVE_SAFESEH_FLAG) diff --git a/openmp/runtime/src/kmp_barrier.h b/openmp/runtime/src/kmp_barrier.h --- a/openmp/runtime/src/kmp_barrier.h +++ b/openmp/runtime/src/kmp_barrier.h @@ -15,6 +15,34 @@ #include "kmp.h" +#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC +#include +#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment) +#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr) +#elif KMP_HAVE_ALIGNED_ALLOC +#define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size) +#define KMP_ALIGNED_FREE(ptr) free(ptr) +#elif KMP_HAVE_POSIX_MEMALIGN +#include "kmp_i18n.h" +static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) { + void *ptr; + int n = posix_memalign(&ptr, alignment, size); + if (n != 0) { + __kmp_fatal(KMP_MSG(FunctionError, "posix_memalign()"), KMP_ERR(errno), + __kmp_msg_null); + } + return ptr; +} +#define KMP_ALIGNED_FREE(ptr) free(ptr) +#elif KMP_HAVE__ALIGNED_MALLOC +#include +#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment) +#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr) +#else +#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size) +#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr) +#endif + // Use four cache lines: MLC tends to prefetch the next or previous cache line // creating a possible fake conflict between cores, so this is the only way to // guarantee that no such prefetch can happen. @@ -79,7 +107,7 @@ // Used instead of constructor to create aligned data static distributedBarrier *allocate(int nThreads) { - distributedBarrier *d = (distributedBarrier *)_mm_malloc( + distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE( sizeof(distributedBarrier), 4 * CACHE_LINE); d->num_threads = 0; d->max_threads = 0; @@ -96,7 +124,7 @@ return d; } - static void deallocate(distributedBarrier *db) { _mm_free(db); } + static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); } void update_num_threads(size_t nthr) { init(nthr); } diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -88,6 +88,16 @@ #define KMP_HAVE_ATTRIBUTE_RTM LIBOMP_HAVE_ATTRIBUTE_RTM #cmakedefine01 LIBOMP_ARCH_AARCH64_A64FX #define KMP_ARCH_AARCH64_A64FX LIBOMP_ARCH_AARCH64_A64FX +#cmakedefine01 LIBOMP_HAVE_XMMINTRIN_H +#define KMP_HAVE_XMMINTRIN_H LIBOMP_HAVE_XMMINTRIN_H +#cmakedefine01 LIBOMP_HAVE__MM_MALLOC +#define KMP_HAVE__MM_MALLOC LIBOMP_HAVE__MM_MALLOC +#cmakedefine01 LIBOMP_HAVE_ALIGNED_ALLOC +#define KMP_HAVE_ALIGNED_ALLOC LIBOMP_HAVE_ALIGNED_ALLOC +#cmakedefine01 LIBOMP_HAVE_POSIX_MEMALIGN +#define KMP_HAVE_POSIX_MEMALIGN LIBOMP_HAVE_POSIX_MEMALIGN +#cmakedefine01 LIBOMP_HAVE__ALIGNED_MALLOC +#define KMP_HAVE__ALIGNED_MALLOC LIBOMP_HAVE__ALIGNED_MALLOC // Configured cache line based on architecture #if KMP_ARCH_PPC64 diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -1038,6 +1038,9 @@ KMP_MFENCE_(); \ } #define KMP_SFENCE() KMP_SFENCE_() +#else +#define KMP_MFENCE() KMP_MB() +#define KMP_SFENCE() KMP_MB() #endif #ifndef KMP_IMB