Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -3263,6 +3263,7 @@ extern kmp_r_sched_t __kmp_get_schedule_global(void); extern void __kmp_adjust_num_threads(int new_nproc); +extern void __kmp_check_stksize(size_t *val); extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL); extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL); Index: runtime/src/kmp_settings.cpp =================================================================== --- runtime/src/kmp_settings.cpp +++ runtime/src/kmp_settings.cpp @@ -289,6 +289,16 @@ } } // __kmp_stg_parse_bool +// placed here in order to use __kmp_round4k static function +void __kmp_check_stksize(size_t *val) { + if (*val < KMP_MIN_STKSIZE || *val >= KMP_MAX_STKSIZE) { + *val = KMP_DEFAULT_STKSIZE; // mimics old default behaviour + } +#if KMP_OS_DARWIN + *val = __kmp_round4k(*val); +#endif // KMP_OS_DARWIN +} + static void __kmp_stg_parse_size(char const *name, char const *value, size_t size_min, size_t size_max, int *is_specified, size_t *out, Index: runtime/src/z_Linux_util.cpp =================================================================== --- runtime/src/z_Linux_util.cpp +++ runtime/src/z_Linux_util.cpp @@ -1833,6 +1833,19 @@ __kmp_xproc = __kmp_get_xproc(); +#if ! KMP_32_BIT_ARCH + struct rlimit rlim; + // read stack size of calling thread, save it as default for worker threads + status = getrlimit(RLIMIT_STACK, &rlim); + if (status == 0) { // success? + __kmp_stksize = rlim.rlim_cur; + // if system stack size is too big then don't propagate it to workers + if (__kmp_stksize > KMP_DEFAULT_STKSIZE * 16) // just a heuristics... + __kmp_stksize = KMP_DEFAULT_STKSIZE * 16; + __kmp_check_stksize(&__kmp_stksize); // check value just in case + } +#endif /* KMP_32_BIT_ARCH */ + if (sysconf(_SC_THREADS)) { /* Query the maximum number of threads */ Index: runtime/test/misc_bugs/stack-propagate.c =================================================================== --- runtime/test/misc_bugs/stack-propagate.c +++ runtime/test/misc_bugs/stack-propagate.c @@ -0,0 +1,65 @@ +// RUN: %libomp-compile-and-run + +// https://bugs.llvm.org/show_bug.cgi?id=26540 requested +// stack size to be propagated from master to workers. +// Library implements propagation of not too big stack +// for Linux x86_64 platform (skipped Windows for now). +// +// The test checks that workers can use more than 4MB +// of stack (4MB - was historical default for +// stack size of worker thread). + +#include +#include +#if !defined(_WIN32) +#include // getrlimit +#endif + +#define STK 4800000 + +double foo(int n, int th) +{ + double arr[n]; + int i; + double res = 0.0; + for (i = 0; i < n; ++i) { + arr[i] = (double)i / (n + 2); + } + for (i = 0; i < n; ++i) { + res += arr[i] / n; + } + return res; +} + +int main(int argc, char *argv[]) +{ + int status; + double val = 0.0; + int m = STK / 8; // > 4800000 bytes per thread +#if defined(_WIN32) + // don't test Windows + printf("stack propagation not implemented, skipping test...\n"); + printf("passed\n"); +#else + // read stack size of calling thread, save it as default + struct rlimit rlim; + status = getrlimit(RLIMIT_STACK, &rlim); + if (sizeof(void *) > 4 && // do not test 32-bit systems, + status == 0 && rlim.rlim_cur > STK) { // or small initial stack size +#pragma omp parallel reduction(+:val) + { + val += foo(m, omp_get_thread_num()); + } + } else { + printf("too small stack size limit (needs about 8MB), skipping test...\n"); + val = 0.2; + } + if (val > 0.1) { + printf("passed\n"); + } else { + printf("failed, val = %f\n", val); + return 1; + } +#endif // _WIN32 + return 0; +}