Index: runtime/cmake/LibompMicroTests.cmake =================================================================== --- runtime/cmake/LibompMicroTests.cmake +++ runtime/cmake/LibompMicroTests.cmake @@ -198,6 +198,7 @@ elseif(${INTEL64}) libomp_append(libomp_expected_library_deps libc.so.6) libomp_append(libomp_expected_library_deps ld-linux-x86-64.so.2) + libomp_append(libomp_expected_library_deps ld-linux-x32.so.2) elseif(${ARM}) libomp_append(libomp_expected_library_deps libc.so.6) libomp_append(libomp_expected_library_deps libffi.so.6) Index: runtime/src/kmp.h =================================================================== --- runtime/src/kmp.h +++ runtime/src/kmp.h @@ -93,7 +93,7 @@ #endif #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 #include #endif @@ -468,7 +468,7 @@ #define KMP_FAST_REDUCTION_BARRIER 1 #undef KMP_FAST_REDUCTION_CORE_DUO -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 #define KMP_FAST_REDUCTION_CORE_DUO 1 #endif @@ -721,7 +721,7 @@ #if KMP_ARCH_X86 || KMP_ARCH_X86_64 affinity_top_method_apicid, affinity_top_method_x2apicid, -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too #if KMP_GROUP_AFFINITY affinity_top_method_group, @@ -875,7 +875,7 @@ #define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || KMP_ARCH_X86_X32 #define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) #elif KMP_ARCH_X86_64 #define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) @@ -931,7 +931,7 @@ #else #define KMP_BLOCKTIME(team, tid) \ (get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime) -#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) // HW TSC is used to reduce overhead (clock tick instead of nanosecond). extern kmp_uint64 __kmp_ticks_per_msec; #if KMP_COMPILER_ICC @@ -994,7 +994,7 @@ /* Minimum number of threads before switch to TLS gtid (experimentally determined) */ /* josh TODO: what about OS X* tuning? */ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 #define KMP_TLS_GTID_MIN 5 #else #define KMP_TLS_GTID_MIN INT_MAX @@ -1042,7 +1042,7 @@ #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 typedef struct kmp_cpuid { kmp_uint32 eax; kmp_uint32 ebx; @@ -1187,7 +1187,7 @@ long nivcsw; /* the number of times a context switch was forced */ } kmp_sys_info_t; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 typedef struct kmp_cpuinfo { int initialized; // If 0, other fields are not initialized. int signature; // CPUID(1).EAX @@ -2529,7 +2529,7 @@ // t_inline_argv. Historically, we have supported at least 96 bytes. Using a // larger value for more space between the master write/worker read section and // read/write by all section seems to buy more performance on EPCC PARALLEL. -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 #define KMP_INLINE_ARGV_BYTES \ (4 * CACHE_LINE - \ ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \ @@ -2580,12 +2580,12 @@ ompt_lw_taskteam_t *ompt_serialized_team_info; #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 kmp_int8 t_fp_control_saved; kmp_int8 t_pad2b; kmp_int16 t_x87_fpu_control_word; // FP control regs kmp_uint32 t_mxcsr; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES]; @@ -2609,7 +2609,7 @@ // omp_set_num_threads() call // Read/write by workers as well -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra // padding serves to fix the performance of epcc 'parallel' and 'barrier' when @@ -2751,7 +2751,7 @@ placement info */ extern int __kmp_storage_map_verbose_specified; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 extern kmp_cpuinfo_t __kmp_cpuinfo; #endif @@ -2906,11 +2906,11 @@ #endif extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */ extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default via @@ -3224,7 +3224,7 @@ extern void __kmp_expand_host_name(char *buffer, size_t size); extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern); -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 extern void __kmp_initialize_system_tick(void); /* Initialize timer tick value */ #endif @@ -3373,7 +3373,7 @@ enum fork_context_e fork_context, kmp_int32 argc, microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX +#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_AARCH64) && KMP_OS_LINUX va_list *ap #else va_list ap @@ -3485,7 +3485,7 @@ // Assembly routines that have no compiler intrinsic replacement // -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 extern void __kmp_query_cpuid(kmp_cpuinfo_t *p); @@ -3497,7 +3497,7 @@ extern void __kmp_clear_x87_fpu_status_word(); #define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */ -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, void *argv[] Index: runtime/src/kmp_affinity.h =================================================================== --- runtime/src/kmp_affinity.h +++ runtime/src/kmp_affinity.h @@ -201,6 +201,18 @@ #elif __NR_sched_getaffinity != 204 #error Wrong code for getaffinity system call. #endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_X86_X32 +#define __X32_SYSCALL_BIT 0x40000000 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__X32_SYSCALL_BIT + 203) +#elif __NR_sched_setaffinity != (__X32_SYSCALL_BIT + 203) +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity (__X32_SYSCALL_BIT + 204) +#elif __NR_sched_getaffinity != (__X32_SYSCALL_BIT + 204) +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ #elif KMP_ARCH_PPC64 #ifndef __NR_sched_setaffinity #define __NR_sched_setaffinity 222 Index: runtime/src/kmp_affinity.cpp =================================================================== --- runtime/src/kmp_affinity.cpp +++ runtime/src/kmp_affinity.cpp @@ -934,7 +934,7 @@ #endif /* KMP_GROUP_AFFINITY */ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 static int __kmp_cpuid_mask_width(int count) { int r = 0; @@ -1858,7 +1858,7 @@ return depth; } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ #define osIdIndex 0 #define threadIdIndex 1 @@ -2038,7 +2038,7 @@ goto dup_field; #endif threadInfo[num_avail][osIdIndex] = val; -#if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) char path[256]; KMP_SNPRINTF( path, sizeof(path), @@ -4068,7 +4068,7 @@ } #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 if (depth < 0) { if (__kmp_affinity_verbose) { @@ -4101,7 +4101,7 @@ } } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ #if KMP_OS_LINUX @@ -4184,7 +4184,7 @@ // used, then we abort if that method fails. The exception is group affinity, // which might have been implicitly set. -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) { if (__kmp_affinity_verbose) { @@ -4214,7 +4214,7 @@ } } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) { const char *filename; Index: runtime/src/kmp_atomic.h =================================================================== --- runtime/src/kmp_atomic.h +++ runtime/src/kmp_atomic.h @@ -692,7 +692,7 @@ // OpenMP 4.0: x = expr binop x for non-commutative operations. // Supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 void __kmpc_atomic_fixed1_sub_rev(ident_t *id_ref, int gtid, char *lhs, char rhs); @@ -792,7 +792,7 @@ #endif #endif // KMP_HAVE_QUAD -#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 #endif // OMP_40_ENABLED @@ -999,7 +999,7 @@ void (*f)(void *, void *, void *)); // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // Below routines for atomic READ are listed char __kmpc_atomic_fixed1_rd(ident_t *id_ref, int gtid, char *loc); @@ -1763,7 +1763,7 @@ #endif // OMP_40_ENABLED -#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 /* ------------------------------------------------------------------------ */ Index: runtime/src/kmp_atomic.cpp =================================================================== --- runtime/src/kmp_atomic.cpp +++ runtime/src/kmp_atomic.cpp @@ -816,7 +816,7 @@ // end of the first part of the workaround for C78287 #endif // USE_CMPXCHG_FIX -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ------------------------------------------------------------------------ // X86 or X86_64: no alignment problems ==================================== @@ -889,7 +889,7 @@ } // end of the second part of the workaround for C78287 #endif // USE_CMPXCHG_FIX -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ // Routines for ATOMIC 4-byte operands addition and subtraction ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, @@ -1030,7 +1030,7 @@ OP_CRITICAL(= *lhs OP, LCK_ID) \ } -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ------------------------------------------------------------------------ // X86 or X86_64: no alignment problems =================================== @@ -1053,7 +1053,7 @@ OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ } \ } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl @@ -1129,7 +1129,7 @@ } \ } -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ------------------------------------------------------------------------- // X86 or X86_64: no alignment problems ==================================== @@ -1158,7 +1158,7 @@ } \ } \ } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86) // __kmpc_atomic_fixed1_max @@ -1206,7 +1206,7 @@ } // ------------------------------------------------------------------------ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ------------------------------------------------------------------------ // X86 or X86_64: no alignment problems =================================== #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ @@ -1230,7 +1230,7 @@ OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \ } \ } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv @@ -1349,7 +1349,7 @@ // OpenMP 4.0: x = expr binop x for non-commutative operations. // Supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ------------------------------------------------------------------------ // Operation on *lhs, rhs bound by critical section @@ -1553,7 +1553,7 @@ #endif #endif -#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. #endif // OMP_40_ENABLED @@ -1586,7 +1586,7 @@ } // ------------------------------------------------------------------------- -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ------------------------------------------------------------------------- // X86 or X86_64: no alignment problems ==================================== #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ @@ -1610,10 +1610,10 @@ OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ } \ } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ // ------------------------------------------------------------------------- -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ------------------------------------------------------------------------- #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ RTYPE, LCK_ID, MASK, GOMP_FLAG) \ @@ -1627,7 +1627,7 @@ OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ OP_CRITICAL_REV(OP, LCK_ID) \ } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ // RHS=float8 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, @@ -1753,7 +1753,7 @@ ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1) // __kmpc_atomic_float10_div_fp -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // Reverse operations ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp @@ -1805,11 +1805,11 @@ 1) // __kmpc_atomic_float10_sub_rev_fp ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1) // __kmpc_atomic_float10_div_rev_fp -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ------------------------------------------------------------------------ // X86 or X86_64: no alignment problems ==================================== #if USE_CMPXCHG_FIX @@ -1843,7 +1843,7 @@ OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ } \ } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 @@ -1855,7 +1855,7 @@ 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ------------------------------------------------------------------------ // Atomic READ routines @@ -3326,7 +3326,7 @@ #endif // OMP_40_ENABLED -#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 #undef OP_CRITICAL @@ -3385,7 +3385,7 @@ if ( #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) FALSE /* must use lock */ -#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 +#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 TRUE /* no alignment problems */ #else !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ @@ -3434,7 +3434,7 @@ if ( // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. // Gomp compatibility is broken if this routine is called for floats. -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 TRUE /* no alignment problems */ #else !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ @@ -3484,7 +3484,7 @@ #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) FALSE /* must use lock */ -#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 +#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 TRUE /* no alignment problems */ #else !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ Index: runtime/src/kmp_csupport.cpp =================================================================== --- runtime/src/kmp_csupport.cpp +++ runtime/src/kmp_csupport.cpp @@ -300,7 +300,7 @@ VOLATILE_CAST(microtask_t) microtask, // "wrapped" task VOLATILE_CAST(launch_t) __kmp_invoke_task_func, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX &ap #else ap @@ -388,7 +388,7 @@ VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX &ap #else ap @@ -530,13 +530,13 @@ /* return to the parallel section */ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { __kmp_clear_x87_fpu_status_word(); __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); __kmp_load_mxcsr(&serial_team->t.t_mxcsr); } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ this_thr->th.th_team = serial_team->t.t_parent; this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; @@ -597,7 +597,7 @@ /* need explicit __mf() here since use volatile instead in library */ KMP_MB(); /* Flush all pending memory write invalidates. */ -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) #if KMP_MIC // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. // We shouldn't need it, though, since the ABI rules require that @@ -1210,7 +1210,7 @@ #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) #else #define KMP_CPUINFO_RTM 0 @@ -2678,7 +2678,7 @@ if ((__kmp_user_lock_kind == lk_tas) && (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { #if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) // "fast" path implemented to fix customer performance issue #if USE_ITT_BUILD __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); @@ -2772,7 +2772,7 @@ (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= OMP_NEST_LOCK_T_SIZE)) { #if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) // "fast" path implemented to fix customer performance issue kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; #if USE_ITT_BUILD Index: runtime/src/kmp_global.cpp =================================================================== --- runtime/src/kmp_global.cpp +++ runtime/src/kmp_global.cpp @@ -16,7 +16,7 @@ kmp_key_t __kmp_gtid_threadprivate_key; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 kmp_cpuinfo_t __kmp_cpuinfo = {0}; // Not initialized #endif @@ -177,11 +177,11 @@ #endif /* KMP_TDATA_GTID */ int __kmp_tls_gtid_min = INT_MAX; int __kmp_foreign_tp = TRUE; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 int __kmp_inherit_fp_control = TRUE; kmp_int16 __kmp_init_x87_fpu_control_word = 0; kmp_uint32 __kmp_init_mxcsr = 0; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ #ifdef USE_LOAD_BALANCE double __kmp_load_balance_interval = 1.0; Index: runtime/src/kmp_gsupport.cpp =================================================================== --- runtime/src/kmp_gsupport.cpp +++ runtime/src/kmp_gsupport.cpp @@ -376,7 +376,7 @@ rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper, __kmp_invoke_task_func, -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX &ap #else ap Index: runtime/src/kmp_itt.h =================================================================== --- runtime/src/kmp_itt.h +++ runtime/src/kmp_itt.h @@ -156,11 +156,11 @@ therefore uninteresting when collecting traces for architecture simulation. */ #ifndef INCLUDE_SSC_MARKS -#define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) +#define INCLUDE_SSC_MARKS (KMP_OS_LINUX && (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32)) #endif -/* Linux 64 only for now */ -#if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) +/* Linux 64 and Linux 64/x32 only for now */ +#if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32)) // Portable (at least for gcc and icc) code to insert the necessary instructions // to set %ebx and execute the unlikely no-op. #if defined(__INTEL_COMPILER) Index: runtime/src/kmp_lock.h =================================================================== --- runtime/src/kmp_lock.h +++ runtime/src/kmp_lock.h @@ -161,7 +161,7 @@ #ifndef KMP_USE_FUTEX #define KMP_USE_FUTEX \ (KMP_OS_LINUX && !KMP_OS_CNK && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) #endif #if KMP_USE_FUTEX @@ -630,7 +630,7 @@ kmp_int32 gtid); #if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) #define __kmp_acquire_user_lock_with_checks(lck, gtid) \ if (__kmp_user_lock_kind == lk_tas) { \ @@ -684,7 +684,7 @@ kmp_int32 gtid); #if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) #include "kmp_i18n.h" /* AC: KMP_FATAL definition */ extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ @@ -748,7 +748,7 @@ extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid); -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) #define __kmp_acquire_nested_user_lock_with_checks(lck, gtid, depth) \ if (__kmp_user_lock_kind == lk_tas) { \ @@ -806,7 +806,7 @@ extern int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, kmp_int32 gtid); -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid) { if (__kmp_user_lock_kind == lk_tas) { @@ -1052,7 +1052,7 @@ // Shortcuts #define KMP_USE_INLINED_TAS \ - (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 + (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM)) && 1 #define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 // List of lock definitions; all nested locks are indirect locks. Index: runtime/src/kmp_lock.cpp =================================================================== --- runtime/src/kmp_lock.cpp +++ runtime/src/kmp_lock.cpp @@ -2736,7 +2736,7 @@ } // Time stamp counter -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 #define __kmp_tsc() __kmp_hardware_timestamp() // Runtime's default backoff parameters kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; Index: runtime/src/kmp_os.h =================================================================== --- runtime/src/kmp_os.h +++ runtime/src/kmp_os.h @@ -153,7 +153,7 @@ #error "Can't determine size_t printf format specifier." #endif -#if KMP_ARCH_X86 +#if KMP_ARCH_X86 || ( KMP_ARCH_X86_64 && __ILP32__ ) #define KMP_SIZE_T_MAX (0xFFFFFFFF) #else #define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) Index: runtime/src/kmp_platform.h =================================================================== --- runtime/src/kmp_platform.h +++ runtime/src/kmp_platform.h @@ -74,6 +74,7 @@ #define KMP_ARCH_X86 0 #define KMP_ARCH_X86_64 0 +#define KMP_ARCH_X86_X32 0 #define KMP_ARCH_AARCH64 0 #define KMP_ARCH_PPC64_BE 0 #define KMP_ARCH_PPC64_LE 0 @@ -93,8 +94,13 @@ #if KMP_OS_UNIX #if defined __x86_64 +#if defined __ILP32__ +#undef KMP_ARCH_X86_X32 +#define KMP_ARCH_X86_X32 1 +#else #undef KMP_ARCH_X86_64 #define KMP_ARCH_X86_64 1 +#endif #elif defined __i386 #undef KMP_ARCH_X86 #define KMP_ARCH_X86 1 @@ -176,12 +182,12 @@ // Platforms which support Intel(R) Many Integrated Core Architecture #define KMP_MIC_SUPPORTED \ - ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS)) + ((KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) && (KMP_OS_LINUX || KMP_OS_WINDOWS)) // TODO: Fixme - This is clever, but really fugly #if (1 != \ - KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ - KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64) + KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_X86_X32 + KMP_ARCH_ARM + \ + KMP_ARCH_PPC64 + KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64) #error Unknown or unsupported architecture #endif Index: runtime/src/kmp_runtime.cpp =================================================================== --- runtime/src/kmp_runtime.cpp +++ runtime/src/kmp_runtime.cpp @@ -1081,7 +1081,7 @@ KMP_MB(); } -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // Propagate any changes to the floating point control registers out to the team // We try to avoid unnecessary writes to the relevant cache line in the team // structure, so we don't make changes unless they are needed. @@ -1141,7 +1141,7 @@ #else #define propagateFPControl(x) ((void)0) #define updateHWFPControl(x) ((void)0) -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc); // forward declaration @@ -1401,7 +1401,7 @@ enum fork_context_e call_context, // Intel, GNU, ... kmp_int32 argc, microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX va_list *ap #else va_list ap @@ -1518,7 +1518,7 @@ argv = (void **)parent_team->t.t_argv; for (i = argc - 1; i >= 0; --i) /* TODO: revert workaround for Intel(R) 64 tracker #96 */ -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX *argv++ = va_arg(*ap, void *); #else *argv++ = va_arg(ap, void *); @@ -1721,11 +1721,11 @@ if (nthreads == 1) { /* josh todo: hypothetical question: what do we do for OS X*? */ #if KMP_OS_LINUX && \ - (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) void *args[argc]; #else void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); -#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ +#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || \ KMP_ARCH_AARCH64) */ KA_TRACE(20, @@ -1813,7 +1813,7 @@ if (ap) { for (i = argc - 1; i >= 0; --i) // TODO: revert workaround for Intel(R) 64 tracker #96 -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX *argv++ = va_arg(*ap, void *); #else *argv++ = va_arg(ap, void *); @@ -1837,7 +1837,7 @@ argv = args; for (i = argc - 1; i >= 0; --i) // TODO: revert workaround for Intel(R) 64 tracker #96 -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX *argv++ = va_arg(*ap, void *); #else *argv++ = va_arg(ap, void *); @@ -2145,7 +2145,7 @@ #endif /* OMP_40_ENABLED */ for (i = argc - 1; i >= 0; --i) { // TODO: revert workaround for Intel(R) 64 tracker #96 -#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX +#if (KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX void *new_argv = va_arg(*ap, void *); #else void *new_argv = va_arg(ap, void *); @@ -4445,11 +4445,11 @@ // TODO???: team->t.t_max_active_levels = new_max_active_levels; team->t.t_sched.sched = new_icvs->sched.sched; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 team->t.t_fp_control_saved = FALSE; /* not needed */ team->t.t_x87_fpu_control_word = 0; /* not needed */ team->t.t_mxcsr = 0; /* not needed */ -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ team->t.t_construct = 0; @@ -6246,7 +6246,7 @@ double dtime; long ltime; } time; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 __kmp_initialize_system_tick(); #endif __kmp_read_system_time(&time.dtime); @@ -6821,13 +6821,13 @@ KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); KMP_ASSERT(KMP_UBER_GTID(gtid)); -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // Save the FP control regs. // Worker threads will set theirs to these values at thread startup. __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); __kmp_store_mxcsr(&__kmp_init_mxcsr); __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ #if KMP_OS_UNIX #if KMP_HANDLE_SIGNALS @@ -7655,9 +7655,10 @@ #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || // KMP_OS_DARWIN -#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS +#elif KMP_ARCH_X86 || KMP_ARCH_X86_X32 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS #if KMP_OS_LINUX || KMP_OS_WINDOWS + http://software.intel.com/en-us/articles/intel-software-development-emulator // basic tuning Index: runtime/src/kmp_settings.cpp =================================================================== --- runtime/src/kmp_settings.cpp +++ runtime/src/kmp_settings.cpp @@ -686,7 +686,7 @@ // ----------------------------------------------------------------------------- // KMP_INHERIT_FP_CONTROL -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 static void __kmp_stg_parse_inherit_fp_control(char const *name, char const *value, void *data) { @@ -700,7 +700,7 @@ #endif /* KMP_DEBUG */ } // __kmp_stg_print_inherit_fp_control -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ // ----------------------------------------------------------------------------- // KMP_LIBRARY, OMP_WAIT_POLICY @@ -2900,7 +2900,7 @@ __kmp_affinity_top_method = affinity_top_method_hwloc; } #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 else if (__kmp_str_match("x2apic id", 9, value) || __kmp_str_match("x2apic_id", 9, value) || __kmp_str_match("x2apic-id", 9, value) || @@ -2948,7 +2948,7 @@ __kmp_str_match("leaf4", 5, value)) { __kmp_affinity_top_method = affinity_top_method_apicid; } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ else if (__kmp_str_match("/proc/cpuinfo", 2, value) || __kmp_str_match("cpuinfo", 5, value)) { __kmp_affinity_top_method = affinity_top_method_cpuinfo; @@ -2978,7 +2978,7 @@ value = "all"; break; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 case affinity_top_method_x2apicid: value = "x2APIC id"; break; @@ -2986,7 +2986,7 @@ case affinity_top_method_apicid: value = "APIC id"; break; -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ #if KMP_USE_HWLOC case affinity_top_method_hwloc: @@ -4469,10 +4469,10 @@ __kmp_stg_print_handle_signals, NULL, 0, 0}, #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 {"KMP_INHERIT_FP_CONTROL", __kmp_stg_parse_inherit_fp_control, __kmp_stg_print_inherit_fp_control, NULL, 0, 0}, -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ #ifdef KMP_GOMP_COMPAT {"GOMP_STACKSIZE", __kmp_stg_parse_stacksize, NULL, NULL, 0, 0}, Index: runtime/src/kmp_stats.cpp =================================================================== --- runtime/src/kmp_stats.cpp +++ runtime/src/kmp_stats.cpp @@ -561,7 +561,7 @@ fprintf(statsOut, "# Time of run: %s\n", &buffer[0]); if (gethostname(&hostName[0], sizeof(hostName)) == 0) fprintf(statsOut, "# Hostname: %s\n", &hostName[0]); -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 fprintf(statsOut, "# CPU: %s\n", &__kmp_cpuinfo.name[0]); fprintf(statsOut, "# Family: %d, Model: %d, Stepping: %d\n", __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping); Index: runtime/src/kmp_stats_timing.cpp =================================================================== --- runtime/src/kmp_stats_timing.cpp +++ runtime/src/kmp_stats_timing.cpp @@ -29,7 +29,7 @@ // pretty bad assumption of 1GHz clock for MIC return 1 / ((double)1000 * 1.e6); } -#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 +#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 #include // Extract the value from the CPUID information double tsc_tick_count::tick_time() { Index: runtime/src/kmp_taskq.cpp =================================================================== --- runtime/src/kmp_taskq.cpp +++ runtime/src/kmp_taskq.cpp @@ -1918,7 +1918,7 @@ __kmp_pop_workshare(global_tid, ct_taskq, loc); if (in_parallel) { -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 KMP_TEST_THEN_OR32(RCAST(volatile kmp_uint32 *, &queue->tq_flags), TQF_ALL_TASKS_QUEUED); #else @@ -1949,7 +1949,7 @@ /* No synchronization needed for serial context */ queue->tq_flags |= TQF_IS_LAST_TASK; } else { -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 KMP_TEST_THEN_OR32(RCAST(volatile kmp_uint32 *, &queue->tq_flags), TQF_IS_LAST_TASK); #else Index: runtime/src/kmp_utility.cpp =================================================================== --- runtime/src/kmp_utility.cpp +++ runtime/src/kmp_utility.cpp @@ -19,7 +19,7 @@ static const char *unknown = "unknown"; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 /* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then the debugging package has not been initialized yet, and only "0" will print @@ -288,7 +288,7 @@ } } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ void __kmp_expand_host_name(char *buffer, size_t size) { KMP_DEBUG_ASSERT(size >= sizeof(unknown)); Index: runtime/src/z_Linux_asm.S =================================================================== --- runtime/src/z_Linux_asm.S +++ runtime/src/z_Linux_asm.S @@ -18,7 +18,7 @@ #include "kmp_config.h" -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 # if KMP_MIC // the 'delay r16/r32/r64' should be used instead of the 'pause'. @@ -213,7 +213,7 @@ #endif /* KMP_GOMP_COMPAT */ -#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 +#if (KMP_ARCH_X86 || KMP_ARCH_X86_X32) && !KMP_ARCH_PPC64 // ----------------------------------------------------------------------- // microtasking routines specifically written for IA-32 architecture @@ -671,7 +671,7 @@ #endif /* KMP_ARCH_X86 */ -#if KMP_ARCH_X86_64 +#if KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // ----------------------------------------------------------------------- // microtasking routines specifically written for IA-32 architecture and @@ -1361,7 +1361,7 @@ // ----------------------------------------------------------------------- -#endif /* KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ // ' #if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 Index: runtime/src/z_Linux_util.cpp =================================================================== --- runtime/src/z_Linux_util.cpp +++ runtime/src/z_Linux_util.cpp @@ -302,7 +302,7 @@ #endif // KMP_USE_FUTEX -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (!KMP_ASM_INTRINS) +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) && (!KMP_ASM_INTRINS) /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to use compare_and_store for these routines */ @@ -418,7 +418,7 @@ return old_value; } -#endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */ +#endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) && (! KMP_ASM_INTRINS) */ void __kmp_terminate_thread(int gtid) { int status; @@ -531,12 +531,12 @@ KMP_CHECK_SYSFAIL("pthread_setcancelstate", status); #endif -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // Set FP control regs to be a copy of the parallel initialization thread's. __kmp_clear_x87_fpu_status_word(); __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); __kmp_load_mxcsr(&__kmp_init_mxcsr); -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ #ifdef KMP_BLOCK_SIGNALS status = sigfillset(&new_set); @@ -1821,11 +1821,11 @@ return; } -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32) if (!__kmp_cpuinfo.initialized) { __kmp_query_cpuid(&__kmp_cpuinfo); } -#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 */ __kmp_xproc = __kmp_get_xproc(); @@ -1932,7 +1932,7 @@ return KMP_NSEC_PER_SEC * t.tv_sec + 1000 * t.tv_usec; } -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 /* Measure clock ticks per millisecond */ void __kmp_initialize_system_tick() { kmp_uint64 delay = 100000; // 50~100 usec on most machines. @@ -2282,7 +2282,7 @@ #endif // USE_LOAD_BALANCE -#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \ +#if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 || KMP_MIC || \ ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || KMP_ARCH_PPC64) // we really only need the case with 1 argument, because CLANG always build Index: runtime/test/ompt/callback.h =================================================================== --- runtime/test/ompt/callback.h +++ runtime/test/ompt/callback.h @@ -105,7 +105,7 @@ define_ompt_label(id) \ print_possible_return_addresses(get_ompt_label_address(id)) -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_X86_X32 // On X86 the NOP instruction is 1 byte long. In addition, the comiler inserts // a MOV instruction for non-void runtime functions which is 3 bytes long. #define print_possible_return_addresses(addr) \