Index: runtime/src/CMakeLists.txt =================================================================== --- runtime/src/CMakeLists.txt +++ runtime/src/CMakeLists.txt @@ -93,7 +93,7 @@ # Unix specific files libomp_append(LIBOMP_CXXFILES z_Linux_util.cpp) libomp_append(LIBOMP_CXXFILES kmp_gsupport.cpp) - libomp_append(LIBOMP_ASMFILES z_Linux_asm.s) # Unix assembly file + libomp_append(LIBOMP_ASMFILES z_Linux_asm.S) # Unix assembly file endif() libomp_append(LIBOMP_CFILES thirdparty/ittnotify/ittnotify_static.c LIBOMP_USE_ITT_NOTIFY) libomp_append(LIBOMP_CXXFILES kmp_debugger.cpp LIBOMP_USE_DEBUGGER) Index: runtime/src/z_Linux_asm.S =================================================================== --- runtime/src/z_Linux_asm.S +++ runtime/src/z_Linux_asm.S @@ -1,4 +1,4 @@ -// z_Linux_asm.s: - microtasking routines specifically +// z_Linux_asm.S: - microtasking routines specifically // written for Intel platforms running Linux* OS // @@ -681,7 +681,7 @@ // -- Machine type P // mark_description "Intel Corporation"; .ident "Intel Corporation" -// -- .file "z_Linux_asm.s" +// -- .file "z_Linux_asm.S" .data ALIGN 4 Index: runtime/src/z_Linux_asm.s =================================================================== --- runtime/src/z_Linux_asm.s +++ runtime/src/z_Linux_asm.s @@ -1,1730 +0,0 @@ -// z_Linux_asm.s: - microtasking routines specifically -// written for Intel platforms running Linux* OS - -// -////===----------------------------------------------------------------------===// -//// -//// The LLVM Compiler Infrastructure -//// -//// This file is dual licensed under the MIT and the University of Illinois Open -//// Source Licenses. See LICENSE.txt for details. -//// -////===----------------------------------------------------------------------===// -// - -// ----------------------------------------------------------------------- -// macros -// ----------------------------------------------------------------------- - -#include "kmp_config.h" - -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - -# if KMP_MIC -// the 'delay r16/r32/r64' should be used instead of the 'pause'. -// The delay operation has the effect of removing the current thread from -// the round-robin HT mechanism, and therefore speeds up the issue rate of -// the other threads on the same core. -// -// A value of 0 works fine for <= 2 threads per core, but causes the EPCC -// barrier time to increase greatly for 3 or more threads per core. -// -// A value of 100 works pretty well for up to 4 threads per core, but isn't -// quite as fast as 0 for 2 threads per core. -// -// We need to check what happens for oversubscription / > 4 threads per core. -// It is possible that we need to pass the delay value in as a parameter -// that the caller determines based on the total # threads / # cores. -// -//.macro pause_op -// mov $100, %rax -// delay %rax -//.endm -# else -# define pause_op .byte 0xf3,0x90 -# endif // KMP_MIC - -# if KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols -# define KMP_LABEL(x) L_##x // form the name of label -.macro KMP_CFI_DEF_OFFSET -.endmacro -.macro KMP_CFI_OFFSET -.endmacro -.macro KMP_CFI_REGISTER -.endmacro -.macro KMP_CFI_DEF -.endmacro -.macro ALIGN - .align $0 -.endmacro -.macro DEBUG_INFO -/* Not sure what .size does in icc, not sure if we need to do something - similar for OS X*. -*/ -.endmacro -.macro PROC - ALIGN 4 - .globl KMP_PREFIX_UNDERSCORE($0) -KMP_PREFIX_UNDERSCORE($0): -.endmacro -# else // KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols -// Format labels so that they don't override function names in gdb's backtraces -// MIC assembler doesn't accept .L syntax, the L works fine there (as well as -// on OS X*) -# if KMP_MIC -# define KMP_LABEL(x) L_##x // local label -# else -# define KMP_LABEL(x) .L_##x // local label hidden from backtraces -# endif // KMP_MIC -.macro ALIGN size - .align 1<<(\size) -.endm -.macro DEBUG_INFO proc - .cfi_endproc -// Not sure why we need .type and .size for the functions - .align 16 - .type \proc,@function - .size \proc,.-\proc -.endm -.macro PROC proc - ALIGN 4 - .globl KMP_PREFIX_UNDERSCORE(\proc) -KMP_PREFIX_UNDERSCORE(\proc): - .cfi_startproc -.endm -.macro KMP_CFI_DEF_OFFSET sz - .cfi_def_cfa_offset \sz -.endm -.macro KMP_CFI_OFFSET reg, sz - .cfi_offset \reg,\sz -.endm -.macro KMP_CFI_REGISTER reg - .cfi_def_cfa_register \reg -.endm -.macro KMP_CFI_DEF reg, sz - .cfi_def_cfa \reg,\sz -.endm -# endif // KMP_OS_DARWIN -#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 - -#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 - -# if KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols -# define KMP_LABEL(x) L_##x // form the name of label - -.macro ALIGN - .align $0 -.endmacro - -.macro DEBUG_INFO -/* Not sure what .size does in icc, not sure if we need to do something - similar for OS X*. -*/ -.endmacro - -.macro PROC - ALIGN 4 - .globl KMP_PREFIX_UNDERSCORE($0) -KMP_PREFIX_UNDERSCORE($0): -.endmacro -# else // KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols -// Format labels so that they don't override function names in gdb's backtraces -# define KMP_LABEL(x) .L_##x // local label hidden from backtraces - -.macro ALIGN size - .align 1<<(\size) -.endm - -.macro DEBUG_INFO proc - .cfi_endproc -// Not sure why we need .type and .size for the functions - ALIGN 2 - .type \proc,@function - .size \proc,.-\proc -.endm - -.macro PROC proc - ALIGN 2 - .globl KMP_PREFIX_UNDERSCORE(\proc) -KMP_PREFIX_UNDERSCORE(\proc): - .cfi_startproc -.endm -# endif // KMP_OS_DARWIN - -#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 - -// ----------------------------------------------------------------------- -// data -// ----------------------------------------------------------------------- - -#ifdef KMP_GOMP_COMPAT - -// Support for unnamed common blocks. -// -// Because the symbol ".gomp_critical_user_" contains a ".", we have to -// put this stuff in assembly. - -# if KMP_ARCH_X86 -# if KMP_OS_DARWIN - .data - .comm .gomp_critical_user_,32 - .data - .globl ___kmp_unnamed_critical_addr -___kmp_unnamed_critical_addr: - .long .gomp_critical_user_ -# else /* Linux* OS */ - .data - .comm .gomp_critical_user_,32,8 - .data - ALIGN 4 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .4byte .gomp_critical_user_ - .type __kmp_unnamed_critical_addr,@object - .size __kmp_unnamed_critical_addr,4 -# endif /* KMP_OS_DARWIN */ -# endif /* KMP_ARCH_X86 */ - -# if KMP_ARCH_X86_64 -# if KMP_OS_DARWIN - .data - .comm .gomp_critical_user_,32 - .data - .globl ___kmp_unnamed_critical_addr -___kmp_unnamed_critical_addr: - .quad .gomp_critical_user_ -# else /* Linux* OS */ - .data - .comm .gomp_critical_user_,32,8 - .data - ALIGN 8 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .8byte .gomp_critical_user_ - .type __kmp_unnamed_critical_addr,@object - .size __kmp_unnamed_critical_addr,8 -# endif /* KMP_OS_DARWIN */ -# endif /* KMP_ARCH_X86_64 */ - -#endif /* KMP_GOMP_COMPAT */ - - -#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 - -// ----------------------------------------------------------------------- -// microtasking routines specifically written for IA-32 architecture -// running Linux* OS -// ----------------------------------------------------------------------- - - .ident "Intel Corporation" - .data - ALIGN 4 -// void -// __kmp_x86_pause( void ); - - .text - PROC __kmp_x86_pause - - pause_op - ret - - DEBUG_INFO __kmp_x86_pause - -// void -// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); - - PROC __kmp_x86_cpuid - - pushl %ebp - movl %esp,%ebp - pushl %edi - pushl %ebx - pushl %ecx - pushl %edx - - movl 8(%ebp), %eax - movl 12(%ebp), %ecx - cpuid // Query the CPUID for the current processor - - movl 16(%ebp), %edi - movl %eax, 0(%edi) - movl %ebx, 4(%edi) - movl %ecx, 8(%edi) - movl %edx, 12(%edi) - - popl %edx - popl %ecx - popl %ebx - popl %edi - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_x86_cpuid - - -# if !KMP_ASM_INTRINS - -//------------------------------------------------------------------------ -// kmp_int32 -// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); - - PROC __kmp_test_then_add32 - - movl 4(%esp), %ecx - movl 8(%esp), %eax - lock - xaddl %eax,(%ecx) - ret - - DEBUG_INFO __kmp_test_then_add32 - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed8 -// -// kmp_int32 -// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -// -// parameters: -// p: 4(%esp) -// d: 8(%esp) -// -// return: %al - PROC __kmp_xchg_fixed8 - - movl 4(%esp), %ecx // "p" - movb 8(%esp), %al // "d" - - lock - xchgb %al,(%ecx) - ret - - DEBUG_INFO __kmp_xchg_fixed8 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed16 -// -// kmp_int16 -// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -// -// parameters: -// p: 4(%esp) -// d: 8(%esp) -// return: %ax - PROC __kmp_xchg_fixed16 - - movl 4(%esp), %ecx // "p" - movw 8(%esp), %ax // "d" - - lock - xchgw %ax,(%ecx) - ret - - DEBUG_INFO __kmp_xchg_fixed16 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed32 -// -// kmp_int32 -// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -// -// parameters: -// p: 4(%esp) -// d: 8(%esp) -// -// return: %eax - PROC __kmp_xchg_fixed32 - - movl 4(%esp), %ecx // "p" - movl 8(%esp), %eax // "d" - - lock - xchgl %eax,(%ecx) - ret - - DEBUG_INFO __kmp_xchg_fixed32 - - -// kmp_int8 -// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); - PROC __kmp_compare_and_store8 - - movl 4(%esp), %ecx - movb 8(%esp), %al - movb 12(%esp), %dl - lock - cmpxchgb %dl,(%ecx) - sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - ret - - DEBUG_INFO __kmp_compare_and_store8 - -// kmp_int16 -// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); - PROC __kmp_compare_and_store16 - - movl 4(%esp), %ecx - movw 8(%esp), %ax - movw 12(%esp), %dx - lock - cmpxchgw %dx,(%ecx) - sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - ret - - DEBUG_INFO __kmp_compare_and_store16 - -// kmp_int32 -// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); - PROC __kmp_compare_and_store32 - - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - lock - cmpxchgl %edx,(%ecx) - sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - ret - - DEBUG_INFO __kmp_compare_and_store32 - -// kmp_int32 -// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); - PROC __kmp_compare_and_store64 - - pushl %ebp - movl %esp, %ebp - pushl %ebx - pushl %edi - movl 8(%ebp), %edi - movl 12(%ebp), %eax // "cv" low order word - movl 16(%ebp), %edx // "cv" high order word - movl 20(%ebp), %ebx // "sv" low order word - movl 24(%ebp), %ecx // "sv" high order word - lock - cmpxchg8b (%edi) - sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction - popl %edi - popl %ebx - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_compare_and_store64 - -// kmp_int8 -// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); - PROC __kmp_compare_and_store_ret8 - - movl 4(%esp), %ecx - movb 8(%esp), %al - movb 12(%esp), %dl - lock - cmpxchgb %dl,(%ecx) - ret - - DEBUG_INFO __kmp_compare_and_store_ret8 - -// kmp_int16 -// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, -// kmp_int16 sv); - PROC __kmp_compare_and_store_ret16 - - movl 4(%esp), %ecx - movw 8(%esp), %ax - movw 12(%esp), %dx - lock - cmpxchgw %dx,(%ecx) - ret - - DEBUG_INFO __kmp_compare_and_store_ret16 - -// kmp_int32 -// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, -// kmp_int32 sv); - PROC __kmp_compare_and_store_ret32 - - movl 4(%esp), %ecx - movl 8(%esp), %eax - movl 12(%esp), %edx - lock - cmpxchgl %edx,(%ecx) - ret - - DEBUG_INFO __kmp_compare_and_store_ret32 - -// kmp_int64 -// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, -// kmp_int64 sv); - PROC __kmp_compare_and_store_ret64 - - pushl %ebp - movl %esp, %ebp - pushl %ebx - pushl %edi - movl 8(%ebp), %edi - movl 12(%ebp), %eax // "cv" low order word - movl 16(%ebp), %edx // "cv" high order word - movl 20(%ebp), %ebx // "sv" low order word - movl 24(%ebp), %ecx // "sv" high order word - lock - cmpxchg8b (%edi) - popl %edi - popl %ebx - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_compare_and_store_ret64 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_real32 -// -// kmp_real32 -// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); -// -// parameters: -// addr: 4(%esp) -// data: 8(%esp) -// -// return: %eax - PROC __kmp_xchg_real32 - - pushl %ebp - movl %esp, %ebp - subl $4, %esp - pushl %esi - - movl 4(%ebp), %esi - flds (%esi) - // load - fsts -4(%ebp) - // store old value - - movl 8(%ebp), %eax - - lock - xchgl %eax, (%esi) - - flds -4(%ebp) - // return old value - - popl %esi - movl %ebp, %esp - popl %ebp - ret - - DEBUG_INFO __kmp_xchg_real32 - -# endif /* !KMP_ASM_INTRINS */ - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_load_x87_fpu_control_word -// -// void -// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: 4(%esp) - PROC __kmp_load_x87_fpu_control_word - - movl 4(%esp), %eax - fldcw (%eax) - ret - - DEBUG_INFO __kmp_load_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_store_x87_fpu_control_word -// -// void -// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: 4(%esp) - PROC __kmp_store_x87_fpu_control_word - - movl 4(%esp), %eax - fstcw (%eax) - ret - - DEBUG_INFO __kmp_store_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_clear_x87_fpu_status_word -// -// void -// __kmp_clear_x87_fpu_status_word(); - PROC __kmp_clear_x87_fpu_status_word - - fnclex - ret - - DEBUG_INFO __kmp_clear_x87_fpu_status_word - - -//------------------------------------------------------------------------ -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & gtid, argv[0], ... ); -// return 1; -// } - -// -- Begin __kmp_invoke_microtask -// mark_begin; - PROC __kmp_invoke_microtask - - pushl %ebp - KMP_CFI_DEF_OFFSET 8 - KMP_CFI_OFFSET ebp,-8 - movl %esp,%ebp // establish the base pointer for this routine. - KMP_CFI_REGISTER ebp - subl $8,%esp // allocate space for two local variables. - // These varibales are: - // argv: -4(%ebp) - // temp: -8(%ebp) - // - pushl %ebx // save %ebx to use during this routine - // -#if OMPT_SUPPORT - movl 28(%ebp),%ebx // get exit_frame address - movl %ebp,(%ebx) // save exit_frame -#endif - - movl 20(%ebp),%ebx // Stack alignment - # args - addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) - shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 - movl %esp,%eax // - subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this - movl %eax,%ebx // Save to %ebx - andl $0xFFFFFF80,%eax // mask off 7 bits - subl %eax,%ebx // Amount to subtract from %esp - subl %ebx,%esp // Prepare the stack ptr -- - // now it will be aligned on 128-byte boundary at the call - - movl 24(%ebp),%eax // copy from p_argv[] - movl %eax,-4(%ebp) // into the local variable *argv. - - movl 20(%ebp),%ebx // argc is 20(%ebp) - shll $2,%ebx - -KMP_LABEL(invoke_2): - cmpl $0,%ebx - jg KMP_LABEL(invoke_4) - jmp KMP_LABEL(invoke_3) - ALIGN 2 -KMP_LABEL(invoke_4): - movl -4(%ebp),%eax - subl $4,%ebx // decrement argc. - addl %ebx,%eax // index into argv. - movl (%eax),%edx - pushl %edx - - jmp KMP_LABEL(invoke_2) - ALIGN 2 -KMP_LABEL(invoke_3): - leal 16(%ebp),%eax // push & tid - pushl %eax - - leal 12(%ebp),%eax // push & gtid - pushl %eax - - movl 8(%ebp),%ebx - call *%ebx // call (*pkfn)(); - - movl $1,%eax // return 1; - - movl -12(%ebp),%ebx // restore %ebx - leave - KMP_CFI_DEF esp,4 - ret - - DEBUG_INFO __kmp_invoke_microtask -// -- End __kmp_invoke_microtask - - -// kmp_uint64 -// __kmp_hardware_timestamp(void) - PROC __kmp_hardware_timestamp - rdtsc - ret - - DEBUG_INFO __kmp_hardware_timestamp -// -- End __kmp_hardware_timestamp - -#endif /* KMP_ARCH_X86 */ - - -#if KMP_ARCH_X86_64 - -// ----------------------------------------------------------------------- -// microtasking routines specifically written for IA-32 architecture and -// Intel(R) 64 running Linux* OS -// ----------------------------------------------------------------------- - -// -- Machine type P -// mark_description "Intel Corporation"; - .ident "Intel Corporation" -// -- .file "z_Linux_asm.s" - .data - ALIGN 4 - -// To prevent getting our code into .data section .text added to every routine -// definition for x86_64. -//------------------------------------------------------------------------ -// FUNCTION __kmp_x86_cpuid -// -// void -// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); -// -// parameters: -// mode: %edi -// mode2: %esi -// cpuid_buffer: %rdx - .text - PROC __kmp_x86_cpuid - - pushq %rbp - movq %rsp,%rbp - pushq %rbx // callee-save register - - movl %esi, %ecx // "mode2" - movl %edi, %eax // "mode" - movq %rdx, %rsi // cpuid_buffer - cpuid // Query the CPUID for the current processor - - movl %eax, 0(%rsi) // store results into buffer - movl %ebx, 4(%rsi) - movl %ecx, 8(%rsi) - movl %edx, 12(%rsi) - - popq %rbx // callee-save register - movq %rbp, %rsp - popq %rbp - ret - - DEBUG_INFO __kmp_x86_cpuid - - - -# if !KMP_ASM_INTRINS - -//------------------------------------------------------------------------ -// FUNCTION __kmp_test_then_add32 -// -// kmp_int32 -// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -// -// parameters: -// p: %rdi -// d: %esi -// -// return: %eax - .text - PROC __kmp_test_then_add32 - - movl %esi, %eax // "d" - lock - xaddl %eax,(%rdi) - ret - - DEBUG_INFO __kmp_test_then_add32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_test_then_add64 -// -// kmp_int64 -// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); -// -// parameters: -// p: %rdi -// d: %rsi -// return: %rax - .text - PROC __kmp_test_then_add64 - - movq %rsi, %rax // "d" - lock - xaddq %rax,(%rdi) - ret - - DEBUG_INFO __kmp_test_then_add64 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed8 -// -// kmp_int32 -// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -// -// parameters: -// p: %rdi -// d: %sil -// -// return: %al - .text - PROC __kmp_xchg_fixed8 - - movb %sil, %al // "d" - - lock - xchgb %al,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed8 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed16 -// -// kmp_int16 -// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -// -// parameters: -// p: %rdi -// d: %si -// return: %ax - .text - PROC __kmp_xchg_fixed16 - - movw %si, %ax // "d" - - lock - xchgw %ax,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed16 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed32 -// -// kmp_int32 -// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -// -// parameters: -// p: %rdi -// d: %esi -// -// return: %eax - .text - PROC __kmp_xchg_fixed32 - - movl %esi, %eax // "d" - - lock - xchgl %eax,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_fixed64 -// -// kmp_int64 -// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); -// -// parameters: -// p: %rdi -// d: %rsi -// return: %rax - .text - PROC __kmp_xchg_fixed64 - - movq %rsi, %rax // "d" - - lock - xchgq %rax,(%rdi) - ret - - DEBUG_INFO __kmp_xchg_fixed64 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store8 -// -// kmp_int8 -// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - .text - PROC __kmp_compare_and_store8 - - movb %sil, %al // "cv" - lock - cmpxchgb %dl,(%rdi) - sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store8 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store16 -// -// kmp_int16 -// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// -// parameters: -// p: %rdi -// cv: %si -// sv: %dx -// -// return: %eax - .text - PROC __kmp_compare_and_store16 - - movw %si, %ax // "cv" - lock - cmpxchgw %dx,(%rdi) - sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store16 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store32 -// -// kmp_int32 -// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - .text - PROC __kmp_compare_and_store32 - - movl %esi, %eax // "cv" - lock - cmpxchgl %edx,(%rdi) - sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store64 -// -// kmp_int32 -// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// -// parameters: -// p: %rdi -// cv: %rsi -// sv: %rdx -// return: %eax - .text - PROC __kmp_compare_and_store64 - - movq %rsi, %rax // "cv" - lock - cmpxchgq %rdx,(%rdi) - sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 - andq $1, %rax // sign extend previous instruction for return value - ret - - DEBUG_INFO __kmp_compare_and_store64 - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store_ret8 -// -// kmp_int8 -// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - .text - PROC __kmp_compare_and_store_ret8 - - movb %sil, %al // "cv" - lock - cmpxchgb %dl,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret8 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store_ret16 -// -// kmp_int16 -// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// -// parameters: -// p: %rdi -// cv: %si -// sv: %dx -// -// return: %eax - .text - PROC __kmp_compare_and_store_ret16 - - movw %si, %ax // "cv" - lock - cmpxchgw %dx,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret16 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store_ret32 -// -// kmp_int32 -// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// -// parameters: -// p: %rdi -// cv: %esi -// sv: %edx -// -// return: %eax - .text - PROC __kmp_compare_and_store_ret32 - - movl %esi, %eax // "cv" - lock - cmpxchgl %edx,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_compare_and_store_ret64 -// -// kmp_int64 -// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// -// parameters: -// p: %rdi -// cv: %rsi -// sv: %rdx -// return: %eax - .text - PROC __kmp_compare_and_store_ret64 - - movq %rsi, %rax // "cv" - lock - cmpxchgq %rdx,(%rdi) - ret - - DEBUG_INFO __kmp_compare_and_store_ret64 - -# endif /* !KMP_ASM_INTRINS */ - - -# if !KMP_MIC - -# if !KMP_ASM_INTRINS - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_real32 -// -// kmp_real32 -// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); -// -// parameters: -// addr: %rdi -// data: %xmm0 (lower 4 bytes) -// -// return: %xmm0 (lower 4 bytes) - .text - PROC __kmp_xchg_real32 - - movd %xmm0, %eax // load "data" to eax - - lock - xchgl %eax, (%rdi) - - movd %eax, %xmm0 // load old value into return register - - ret - - DEBUG_INFO __kmp_xchg_real32 - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_xchg_real64 -// -// kmp_real64 -// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); -// -// parameters: -// addr: %rdi -// data: %xmm0 (lower 8 bytes) -// return: %xmm0 (lower 8 bytes) - .text - PROC __kmp_xchg_real64 - - movd %xmm0, %rax // load "data" to rax - - lock - xchgq %rax, (%rdi) - - movd %rax, %xmm0 // load old value into return register - ret - - DEBUG_INFO __kmp_xchg_real64 - - -# endif /* !KMP_MIC */ - -# endif /* !KMP_ASM_INTRINS */ - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_load_x87_fpu_control_word -// -// void -// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: %rdi - .text - PROC __kmp_load_x87_fpu_control_word - - fldcw (%rdi) - ret - - DEBUG_INFO __kmp_load_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_store_x87_fpu_control_word -// -// void -// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); -// -// parameters: -// p: %rdi - .text - PROC __kmp_store_x87_fpu_control_word - - fstcw (%rdi) - ret - - DEBUG_INFO __kmp_store_x87_fpu_control_word - - -//------------------------------------------------------------------------ -// FUNCTION __kmp_clear_x87_fpu_status_word -// -// void -// __kmp_clear_x87_fpu_status_word(); - .text - PROC __kmp_clear_x87_fpu_status_word - -#if KMP_MIC -// TODO: remove the workaround for problem with fnclex instruction (no CQ known) - fstenv -32(%rsp) // store FP env - andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW - fldenv -32(%rsp) // load FP env back - ret -#else - fnclex - ret -#endif - - DEBUG_INFO __kmp_clear_x87_fpu_status_word - - -//------------------------------------------------------------------------ -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; -// } -// -// note: at call to pkfn must have %rsp 128-byte aligned for compiler -// -// parameters: -// %rdi: pkfn -// %esi: gtid -// %edx: tid -// %ecx: argc -// %r8: p_argv -// %r9: &exit_frame -// -// locals: -// __gtid: gtid parm pushed on stack so can pass >id to pkfn -// __tid: tid parm pushed on stack so can pass &tid to pkfn -// -// reg temps: -// %rax: used all over the place -// %rdx: used in stack pointer alignment calculation -// %r11: used to traverse p_argv array -// %rsi: used as temporary for stack parameters -// used as temporary for number of pkfn parms to push -// %rbx: used to hold pkfn address, and zero constant, callee-save -// -// return: %eax (always 1/TRUE) -__gtid = -16 -__tid = -24 - -// -- Begin __kmp_invoke_microtask -// mark_begin; - .text - PROC __kmp_invoke_microtask - - pushq %rbp // save base pointer - KMP_CFI_DEF_OFFSET 16 - KMP_CFI_OFFSET rbp,-16 - movq %rsp,%rbp // establish the base pointer for this routine. - KMP_CFI_REGISTER rbp - -#if OMPT_SUPPORT - movq %rbp, (%r9) // save exit_frame -#endif - - pushq %rbx // %rbx is callee-saved register - pushq %rsi // Put gtid on stack so can pass &tgid to pkfn - pushq %rdx // Put tid on stack so can pass &tid to pkfn - - movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax - movq $0, %rbx // constant for cmovs later - subq $4, %rax // subtract four args passed in registers to pkfn -#if KMP_MIC - js KMP_LABEL(kmp_0) // jump to movq - jmp KMP_LABEL(kmp_0_exit) // jump ahead -KMP_LABEL(kmp_0): - movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) -KMP_LABEL(kmp_0_exit): -#else - cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) -#endif // KMP_MIC - - movq %rax, %rsi // save max(0, argc-4) -> %rsi for later - shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 - - movq %rsp, %rdx // - subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- - // without align, stack ptr would be this - movq %rdx, %rax // Save to %rax - - andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) - subq %rax, %rdx // Amount to subtract from %rsp - subq %rdx, %rsp // Prepare the stack ptr -- - // now %rsp will align to 128-byte boundary at call site - - // setup pkfn parameter reg and stack - movq %rcx, %rax // argc -> %rax - cmpq $0, %rsi - je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push - shlq $3, %rcx // argc*8 -> %rcx - movq %r8, %rdx // p_argv -> %rdx - addq %rcx, %rdx // &p_argv[argc] -> %rdx - - movq %rsi, %rcx // max (0, argc-4) -> %rcx - -KMP_LABEL(kmp_invoke_push_parms): - // push nth - 7th parms to pkfn on stack - subq $8, %rdx // decrement p_argv pointer to previous parm - movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi - pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) - subl $1, %ecx - -// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e -// if the name of the label that is an operand of this jecxz starts with a dot ("."); -// Apple's linker does not support 1-byte length relocation; -// Resolution: replace all .labelX entries with L_labelX. - - jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left - jmp KMP_LABEL(kmp_invoke_push_parms) - ALIGN 3 -KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. - // order here is important to avoid trashing - // registers used for both input and output parms! - movq %rdi, %rbx // pkfn -> %rbx - leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) - leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) - - movq %r8, %r11 // p_argv -> %r11 - -#if KMP_MIC - cmpq $4, %rax // argc >= 4? - jns KMP_LABEL(kmp_4) // jump to movq - jmp KMP_LABEL(kmp_4_exit) // jump ahead -KMP_LABEL(kmp_4): - movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) -KMP_LABEL(kmp_4_exit): - - cmpq $3, %rax // argc >= 3? - jns KMP_LABEL(kmp_3) // jump to movq - jmp KMP_LABEL(kmp_3_exit) // jump ahead -KMP_LABEL(kmp_3): - movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) -KMP_LABEL(kmp_3_exit): - - cmpq $2, %rax // argc >= 2? - jns KMP_LABEL(kmp_2) // jump to movq - jmp KMP_LABEL(kmp_2_exit) // jump ahead -KMP_LABEL(kmp_2): - movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) -KMP_LABEL(kmp_2_exit): - - cmpq $1, %rax // argc >= 1? - jns KMP_LABEL(kmp_1) // jump to movq - jmp KMP_LABEL(kmp_1_exit) // jump ahead -KMP_LABEL(kmp_1): - movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) -KMP_LABEL(kmp_1_exit): -#else - cmpq $4, %rax // argc >= 4? - cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) - - cmpq $3, %rax // argc >= 3? - cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) - - cmpq $2, %rax // argc >= 2? - cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) - - cmpq $1, %rax // argc >= 1? - cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) -#endif // KMP_MIC - - call *%rbx // call (*pkfn)(); - movq $1, %rax // move 1 into return register; - - movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified - movq %rbp, %rsp // restore stack pointer - popq %rbp // restore frame pointer - KMP_CFI_DEF rsp,8 - ret - - DEBUG_INFO __kmp_invoke_microtask -// -- End __kmp_invoke_microtask - -// kmp_uint64 -// __kmp_hardware_timestamp(void) - .text - PROC __kmp_hardware_timestamp - rdtsc - shlq $32, %rdx - orq %rdx, %rax - ret - - DEBUG_INFO __kmp_hardware_timestamp -// -- End __kmp_hardware_timestamp - -//------------------------------------------------------------------------ -// FUNCTION __kmp_bsr32 -// -// int -// __kmp_bsr32( int ); - .text - PROC __kmp_bsr32 - - bsr %edi,%eax - ret - - DEBUG_INFO __kmp_bsr32 - - -// ----------------------------------------------------------------------- -#endif /* KMP_ARCH_X86_64 */ - -// ' -#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 - -//------------------------------------------------------------------------ -// -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; -// } -// -// parameters: -// x0: pkfn -// w1: gtid -// w2: tid -// w3: argc -// x4: p_argv -// x5: &exit_frame -// -// locals: -// __gtid: gtid parm pushed on stack so can pass >id to pkfn -// __tid: tid parm pushed on stack so can pass &tid to pkfn -// -// reg temps: -// x8: used to hold pkfn address -// w9: used as temporary for number of pkfn parms -// x10: used to traverse p_argv array -// x11: used as temporary for stack placement calculation -// x12: used as temporary for stack parameters -// x19: used to preserve exit_frame_ptr, callee-save -// -// return: w0 (always 1/TRUE) -// - -__gtid = 4 -__tid = 8 - -// -- Begin __kmp_invoke_microtask -// mark_begin; - .text - PROC __kmp_invoke_microtask - - stp x29, x30, [sp, #-16]! -# if OMPT_SUPPORT - stp x19, x20, [sp, #-16]! -# endif - mov x29, sp - - orr w9, wzr, #1 - add w9, w9, w3, lsr #1 - sub sp, sp, w9, lsl #4 - mov x11, sp - - mov x8, x0 - str w1, [x29, #-__gtid] - str w2, [x29, #-__tid] - mov w9, w3 - mov x10, x4 -# if OMPT_SUPPORT - mov x19, x5 - str x29, [x19] -# endif - - sub x0, x29, #__gtid - sub x1, x29, #__tid - - cbz w9, KMP_LABEL(kmp_1) - ldr x2, [x10] - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x3, [x10, #8]! - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x4, [x10, #8]! - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x5, [x10, #8]! - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x6, [x10, #8]! - - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x7, [x10, #8]! - -KMP_LABEL(kmp_0): - sub w9, w9, #1 - cbz w9, KMP_LABEL(kmp_1) - ldr x12, [x10, #8]! - str x12, [x11], #8 - b KMP_LABEL(kmp_0) -KMP_LABEL(kmp_1): - blr x8 - orr w0, wzr, #1 - mov sp, x29 -# if OMPT_SUPPORT - str xzr, [x19] - ldp x19, x20, [sp], #16 -# endif - ldp x29, x30, [sp], #16 - ret - - DEBUG_INFO __kmp_invoke_microtask -// -- End __kmp_invoke_microtask - -#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */ - -#if KMP_ARCH_PPC64 - -//------------------------------------------------------------------------ -// -// typedef void (*microtask_t)( int *gtid, int *tid, ... ); -// -// int -// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), -// int gtid, int tid, -// int argc, void *p_argv[] ) { -// (*pkfn)( & gtid, & tid, argv[0], ... ); -// return 1; -// } -// -// parameters: -// r3: pkfn -// r4: gtid -// r5: tid -// r6: argc -// r7: p_argv -// r8: &exit_frame -// -// return: r3 (always 1/TRUE) -// - .text -# if KMP_ARCH_PPC64_LE - .abiversion 2 -# endif - .globl __kmp_invoke_microtask - -# if KMP_ARCH_PPC64_LE - .p2align 4 -# else - .p2align 2 -# endif - - .type __kmp_invoke_microtask,@function - -# if KMP_ARCH_PPC64_LE -__kmp_invoke_microtask: -.Lfunc_begin0: -.Lfunc_gep0: - addis 2, 12, .TOC.-.Lfunc_gep0@ha - addi 2, 2, .TOC.-.Lfunc_gep0@l -.Lfunc_lep0: - .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 -# else - .section .opd,"aw",@progbits -__kmp_invoke_microtask: - .p2align 3 - .quad .Lfunc_begin0 - .quad .TOC.@tocbase - .quad 0 - .text -.Lfunc_begin0: -# endif - -// -- Begin __kmp_invoke_microtask -// mark_begin; - -// We need to allocate a stack frame large enough to hold all of the parameters -// on the stack for the microtask plus what this function needs. That's 48 -// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the -// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, -// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes -// to save r30 to hold a copy of r8. - - .cfi_startproc - mflr 0 - std 31, -8(1) - std 0, 16(1) - -// This is unusual because normally we'd set r31 equal to r1 after the stack -// frame is established. In this case, however, we need to dynamically compute -// the stack frame size, and so we keep a direct copy of r1 to access our -// register save areas and restore the r1 value before returning. - mr 31, 1 - .cfi_def_cfa_register r31 - .cfi_offset r31, -8 - .cfi_offset lr, 16 - -// Compute the size necessary for the local stack frame. -# if KMP_ARCH_PPC64_LE - li 12, 72 -# else - li 12, 88 -# endif - sldi 0, 6, 3 - add 12, 0, 12 - neg 12, 12 - -// We need to make sure that the stack frame stays aligned (to 16 bytes, except -// under the BG/Q CNK, where it must be to 32 bytes). -# if KMP_OS_CNK - li 0, -32 -# else - li 0, -16 -# endif - and 12, 0, 12 - -// Establish the local stack frame. - stdux 1, 1, 12 - -# if OMPT_SUPPORT - .cfi_offset r30, -16 - std 30, -16(31) - std 1, 0(8) - mr 30, 8 -# endif - -// Store gtid and tid to the stack because they're passed by reference to the microtask. - stw 4, -20(31) - stw 5, -24(31) - - mr 12, 6 - mr 4, 7 - - cmpwi 0, 12, 1 - blt 0, .Lcall - - ld 5, 0(4) - - cmpwi 0, 12, 2 - blt 0, .Lcall - - ld 6, 8(4) - - cmpwi 0, 12, 3 - blt 0, .Lcall - - ld 7, 16(4) - - cmpwi 0, 12, 4 - blt 0, .Lcall - - ld 8, 24(4) - - cmpwi 0, 12, 5 - blt 0, .Lcall - - ld 9, 32(4) - - cmpwi 0, 12, 6 - blt 0, .Lcall - - ld 10, 40(4) - - cmpwi 0, 12, 7 - blt 0, .Lcall - -// There are more than 6 microtask parameters, so we need to store the -// remainder to the stack. - addi 12, 12, -6 - mtctr 12 - -// These are set to 8 bytes before the first desired store address (we're using -// pre-increment loads and stores in the loop below). The parameter save area -// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and -// 32 + 8*8 == 96 bytes above r1 for ELFv2. - addi 4, 4, 40 -# if KMP_ARCH_PPC64_LE - addi 12, 1, 88 -# else - addi 12, 1, 104 -# endif - -.Lnext: - ldu 0, 8(4) - stdu 0, 8(12) - bdnz .Lnext - -.Lcall: -# if KMP_ARCH_PPC64_LE - std 2, 24(1) - mr 12, 3 -#else - std 2, 40(1) -// For ELFv1, we need to load the actual function address from the function descriptor. - ld 12, 0(3) - ld 2, 8(3) - ld 11, 16(3) -#endif - - addi 3, 31, -20 - addi 4, 31, -24 - - mtctr 12 - bctrl -# if KMP_ARCH_PPC64_LE - ld 2, 24(1) -# else - ld 2, 40(1) -# endif - -# if OMPT_SUPPORT - li 3, 0 - std 3, 0(30) -# endif - - li 3, 1 - -# if OMPT_SUPPORT - ld 30, -16(31) -# endif - - mr 1, 31 - ld 0, 16(1) - ld 31, -8(1) - mtlr 0 - blr - - .long 0 - .quad 0 -.Lfunc_end0: - .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 - .cfi_endproc - -// -- End __kmp_invoke_microtask - -#endif /* KMP_ARCH_PPC64 */ - -#if KMP_ARCH_ARM || KMP_ARCH_MIPS - .data - .comm .gomp_critical_user_,32,8 - .data - .align 4 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .4byte .gomp_critical_user_ - .size __kmp_unnamed_critical_addr,4 -#endif /* KMP_ARCH_ARM */ - -#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 - .data - .comm .gomp_critical_user_,32,8 - .data - .align 8 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: - .8byte .gomp_critical_user_ - .size __kmp_unnamed_critical_addr,8 -#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */ - -#if KMP_OS_LINUX -# if KMP_ARCH_ARM -.section .note.GNU-stack,"",%progbits -# else -.section .note.GNU-stack,"",@progbits -# endif -#endif