diff --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt --- a/openmp/runtime/CMakeLists.txt +++ b/openmp/runtime/CMakeLists.txt @@ -81,7 +81,7 @@ endif() endif() -libomp_check_variable(LIBOMP_ARCH 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64) +libomp_check_variable(LIBOMP_ARCH wasm32 32e x86_64 32 i386 arm ppc64 ppc64le aarch64 aarch64_a64fx mic mips mips64 riscv64) set(LIBOMP_LIB_TYPE normal CACHE STRING "Performance,Profiling,Stubs library (normal/profile/stubs)") diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -695,7 +695,7 @@ } #endif // KMP_MIC #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \ - KMP_ARCH_RISCV64) + KMP_ARCH_RISCV64 || KMP_ARCH_WASM) // Nothing to see here move along #elif KMP_ARCH_PPC64 // Nothing needed here (we have a real MB above). diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -356,7 +356,7 @@ // They come in two flavors: 64-bit unsigned, and either 32-bit signed // (IA-32 architecture) or 64-bit signed (Intel(R) 64). -#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS +#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM #define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 #define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 #define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -70,7 +70,7 @@ #error Unknown compiler #endif -#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) +#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD) && !KMP_ARCH_WASM #define KMP_AFFINITY_SUPPORTED 1 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64 #define KMP_GROUP_AFFINITY 1 @@ -164,7 +164,7 @@ #define KMP_UINT64_SPEC "llu" #endif /* KMP_OS_UNIX */ -#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS +#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 @@ -173,7 +173,7 @@ #error "Can't determine size_t printf format specifier." #endif -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || KMP_ARCH_WASM #define KMP_SIZE_T_MAX (0xFFFFFFFF) #else #define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) @@ -1102,7 +1102,7 @@ KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), \ (kmp_int64)(b), (kmp_int64)(c)) -#if KMP_ARCH_X86 || KMP_ARCH_MIPS +#if KMP_ARCH_X86 || KMP_ARCH_MIPS || KMP_ARCH_WASM // What about ARM? #define TCR_PTR(a) ((void *)TCR_4(a)) #define TCW_PTR(a, b) TCW_4((a), (b)) @@ -1244,6 +1244,9 @@ extern void *__kmp_lookup_symbol(const char *name); #define KMP_DLSYM(name) __kmp_lookup_symbol(name) #define KMP_DLSYM_NEXT(name) nullptr +#elif KMP_ARCH_WASM +#define KMP_DLSYM(name) nullptr +#define KMP_DLSYM_NEXT(name) nullptr #else #define KMP_DLSYM(name) dlsym(RTLD_DEFAULT, name) #define KMP_DLSYM_NEXT(name) dlsym(RTLD_NEXT, name) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -39,7 +39,7 @@ #if (defined __linux) #undef KMP_OS_LINUX #define KMP_OS_LINUX 1 -#elif (defined __linux__) +#elif (defined __linux__) || defined(__EMSCRIPTEN__) #undef KMP_OS_LINUX #define KMP_OS_LINUX 1 #else @@ -174,6 +174,10 @@ #define KMP_ARCH_ARM 1 #endif +#if defined(__wasm32__) +#define KMP_ARCH_WASM 1 +#endif + #if defined(__MIC__) || defined(__MIC2__) #define KMP_MIC 1 #if __MIC2__ || __KNC__ @@ -190,7 +194,8 @@ #endif /* Specify 32 bit architectures here */ -#define KMP_32_BIT_ARCH (KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS) +#define KMP_32_BIT_ARCH \ + (KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM) // Platforms which support Intel(R) Many Integrated Core Architecture #define KMP_MIC_SUPPORTED \ @@ -199,7 +204,7 @@ // TODO: Fixme - This is clever, but really fugly #if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64 + \ - KMP_ARCH_RISCV64) + KMP_ARCH_RISCV64 + KMP_ARCH_WASM) #error Unknown or unsupported architecture #endif diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -1745,7 +1745,7 @@ if (nthreads == 1) { /* josh todo: hypothetical question: what do we do for OS X*? */ #if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_WASM) void *args[argc]; #else void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); @@ -8752,7 +8752,8 @@ #endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD -#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS +#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \ + KMP_ARCH_WASM #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -1756,7 +1756,27 @@ #if KMP_OS_LINUX # if KMP_ARCH_ARM .section .note.GNU-stack,"",%progbits -# else +# elif !KMP_ARCH_WASM .section .note.GNU-stack,"",@progbits # endif #endif + +#if KMP_ARCH_WASM +.data +.global .gomp_critical_user_ +.global .gomp_critical_user_.var +.global .gomp_critical_user_.reduction.var +.global __kmp_unnamed_critical_addr +.gomp_critical_user_: +.zero 4 +.size .gomp_critical_user_, 4 +.gomp_critical_user_.var: +.zero 4 +.size .gomp_critical_user_.var, 4 +.gomp_critical_user_.reduction.var: +.zero 4 +.size .gomp_critical_user_.reduction.var, 4 +__kmp_unnamed_critical_addr: + .4byte .gomp_critical_user_ + .size __kmp_unnamed_critical_addr, 4 +#endif diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -266,7 +266,7 @@ #endif // KMP_USE_FUTEX -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (!KMP_ASM_INTRINS) +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_WASM) && (!KMP_ASM_INTRINS) /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to use compare_and_store for these routines */ @@ -326,7 +326,7 @@ return old_value; } -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || KMP_ARCH_WASM kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) { kmp_int8 old_value, new_value; @@ -2439,6 +2439,34 @@ #endif // USE_LOAD_BALANCE + +typedef void (*microtask_t0)(int *, int *); +typedef void (*microtask_t1)(int *, int *, void*); +typedef void (*microtask_t2)(int *, int *, void*, void*); +typedef void (*microtask_t3)(int *, int *, void*, void*, void*); +typedef void (*microtask_t4)(int *, int *, void*, void*, void*, void*); +typedef void (*microtask_t5)(int *, int *, void*, void*, void*, void*, void*); +typedef void (*microtask_t6)(int *, int *, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t7)(int *, int *, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t8)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t9)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t10)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t11)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t12)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t13)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t14)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t15)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t16)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t17)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t18)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t19)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t20)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t21)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t22)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t23)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t24)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); +typedef void (*microtask_t25)(int *, int *, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*, void*); + #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \ KMP_ARCH_PPC64 || KMP_ARCH_RISCV64) @@ -2462,67 +2490,117 @@ fflush(stderr); exit(-1); case 0: - (*pkfn)(>id, &tid); + (*(microtask_t0)pkfn)(>id, &tid); break; case 1: - (*pkfn)(>id, &tid, p_argv[0]); + (*(microtask_t1)pkfn)(>id, &tid, p_argv[0]); break; case 2: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1]); + (*(microtask_t2)pkfn)(>id, &tid, p_argv[0], p_argv[1]); break; case 3: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]); + (*(microtask_t3)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2]); break; case 4: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]); + (*(microtask_t4)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]); break; case 5: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]); + (*(microtask_t5)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]); break; case 6: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t6)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5]); break; case 7: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t7)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5], p_argv[6]); break; case 8: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t8)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7]); break; case 9: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t9)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7], p_argv[8]); break; case 10: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t10)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]); break; case 11: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t11)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]); break; case 12: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t12)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], p_argv[11]); break; case 13: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t13)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], p_argv[11], p_argv[12]); break; case 14: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t14)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], p_argv[11], p_argv[12], p_argv[13]); break; case 15: - (*pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + (*(microtask_t15)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], p_argv[11], p_argv[12], p_argv[13], p_argv[14]); break; + case 16: + (*(microtask_t16)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15]); + break; + case 17: + (*(microtask_t17)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15], p_argv[16]); + break; + case 18: + (*(microtask_t18)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15], p_argv[16], p_argv[17]); + break; + case 19: + (*(microtask_t19)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15], p_argv[16], p_argv[17], p_argv[18]); + break; + case 20: + (*(microtask_t20)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15], p_argv[16], p_argv[17], p_argv[18], p_argv[19]); + break; + case 21: + (*(microtask_t21)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15], p_argv[16], p_argv[17], p_argv[18], p_argv[19], p_argv[20]); + break; + case 22: + (*(microtask_t22)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15], p_argv[16], p_argv[17], p_argv[18], p_argv[19], p_argv[20], p_argv[21]); + break; + case 23: + (*(microtask_t23)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15], p_argv[16], p_argv[17], p_argv[18], p_argv[19], p_argv[20], p_argv[21], p_argv[22]); + break; + case 24: + (*(microtask_t24)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15], p_argv[16], p_argv[17], p_argv[18], p_argv[19], p_argv[20], p_argv[21], p_argv[22], p_argv[23]); + break; + case 25: + (*(microtask_t25)pkfn)(>id, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4], + p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10], + p_argv[11], p_argv[12], p_argv[13], p_argv[14], p_argv[15], p_argv[16], p_argv[17], p_argv[18], p_argv[19], p_argv[20], p_argv[21], p_argv[22], p_argv[23], p_argv[24]); + break; } return 1;