diff --git a/openmp/runtime/cmake/config-ix.cmake b/openmp/runtime/cmake/config-ix.cmake --- a/openmp/runtime/cmake/config-ix.cmake +++ b/openmp/runtime/cmake/config-ix.cmake @@ -109,6 +109,23 @@ set(CMAKE_REQUIRED_LIBRARIES) endif() +# Check for aligned memory allocator function +check_include_file(xmmintrin.h LIBOMP_HAVE_XMMINTRIN_H) +set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) +if (LIBOMP_HAVE_XMMINTRIN_H) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -DLIBOMP_HAVE_XMMINTRIN_H") +endif() +set(source_code "// check for _mm_malloc + #ifdef LIBOMP_HAVE_XMMINTRIN_H + #include + #endif + int main() { void *ptr = _mm_malloc(sizeof(int) * 1000, 64); _mm_free(ptr); return 0; }") +check_cxx_source_compiles("${source_code}" LIBOMP_HAVE__MM_MALLOC) +set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) +check_symbol_exists(aligned_alloc "stdlib.h" LIBOMP_HAVE_ALIGNED_ALLOC) +check_symbol_exists(posix_memalign "stdlib.h" LIBOMP_HAVE_POSIX_MEMALIGN) +check_symbol_exists(_aligned_malloc "malloc.h" LIBOMP_HAVE__ALIGNED_MALLOC) + # Check linker flags if(WIN32) libomp_check_linker_flag(/SAFESEH LIBOMP_HAVE_SAFESEH_FLAG) diff --git a/openmp/runtime/src/kmp_barrier.h b/openmp/runtime/src/kmp_barrier.h --- a/openmp/runtime/src/kmp_barrier.h +++ b/openmp/runtime/src/kmp_barrier.h @@ -14,6 +14,35 @@ #define KMP_BARRIER_H #include "kmp.h" +#include "kmp_i18n.h" + +#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC +#include +#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment) +#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr) +#elif KMP_HAVE_ALIGNED_ALLOC +#define KMP_ALIGNED_ALLOCATE(size, alignment) aligned_alloc(alignment, size) +#define KMP_ALIGNED_FREE(ptr) free(ptr) +#elif KMP_HAVE_POSIX_MEMALIGN +static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) { + void *ptr; + int n = posix_memalign(&ptr, alignment, size); + if (n != 0) { + if (ptr) + free(ptr); + return nullptr; + } + return ptr; +} +#define KMP_ALIGNED_FREE(ptr) free(ptr) +#elif KMP_HAVE__ALIGNED_MALLOC +#include +#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment) +#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr) +#else +#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size) +#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr) +#endif // Use four cache lines: MLC tends to prefetch the next or previous cache line // creating a possible fake conflict between cores, so this is the only way to @@ -79,8 +108,11 @@ // Used instead of constructor to create aligned data static distributedBarrier *allocate(int nThreads) { - distributedBarrier *d = (distributedBarrier *)_mm_malloc( + distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE( sizeof(distributedBarrier), 4 * CACHE_LINE); + if (!d) { + KMP_FATAL(MemoryAllocFailed); + } d->num_threads = 0; d->max_threads = 0; for (int i = 0; i < MAX_ITERS; ++i) @@ -96,7 +128,7 @@ return d; } - static void deallocate(distributedBarrier *db) { _mm_free(db); } + static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); } void update_num_threads(size_t nthr) { init(nthr); } diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake --- a/openmp/runtime/src/kmp_config.h.cmake +++ b/openmp/runtime/src/kmp_config.h.cmake @@ -88,6 +88,16 @@ #define KMP_HAVE_ATTRIBUTE_RTM LIBOMP_HAVE_ATTRIBUTE_RTM #cmakedefine01 LIBOMP_ARCH_AARCH64_A64FX #define KMP_ARCH_AARCH64_A64FX LIBOMP_ARCH_AARCH64_A64FX +#cmakedefine01 LIBOMP_HAVE_XMMINTRIN_H +#define KMP_HAVE_XMMINTRIN_H LIBOMP_HAVE_XMMINTRIN_H +#cmakedefine01 LIBOMP_HAVE__MM_MALLOC +#define KMP_HAVE__MM_MALLOC LIBOMP_HAVE__MM_MALLOC +#cmakedefine01 LIBOMP_HAVE_ALIGNED_ALLOC +#define KMP_HAVE_ALIGNED_ALLOC LIBOMP_HAVE_ALIGNED_ALLOC +#cmakedefine01 LIBOMP_HAVE_POSIX_MEMALIGN +#define KMP_HAVE_POSIX_MEMALIGN LIBOMP_HAVE_POSIX_MEMALIGN +#cmakedefine01 LIBOMP_HAVE__ALIGNED_MALLOC +#define KMP_HAVE__ALIGNED_MALLOC LIBOMP_HAVE__ALIGNED_MALLOC // Configured cache line based on architecture #if KMP_ARCH_PPC64 diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -1038,6 +1038,9 @@ KMP_MFENCE_(); \ } #define KMP_SFENCE() KMP_SFENCE_() +#else +#define KMP_MFENCE() KMP_MB() +#define KMP_SFENCE() KMP_MB() #endif #ifndef KMP_IMB