diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -180,6 +180,209 @@ PPC64_LR(31) #if defined(__VSX__) +#if defined(__LITTLE_ENDIAN__) && defined(__linux__) + +TEST_ISA3(isa3restore) + + // restore VS registers + // (note that this also restores floating point registers and V registers, + // because part of VS is mapped to these registers) + + addi 4, 3, PPC64_OFFS_FP + +// load VS register +#define PPC64_LVS(n) \ + lxvd2x n, 0, 4 ;\ + xxswapd n, n ;\ + addi 4, 4, 16 + +#define PPC64_LVS_ISA3(n) \ + lxv n, (16 * n)(4) + + // restore the first 32 VS regs (and also all floating point regs) + PPC64_LVS(0) + PPC64_LVS(1) + PPC64_LVS(2) + PPC64_LVS(3) + PPC64_LVS(4) + PPC64_LVS(5) + PPC64_LVS(6) + PPC64_LVS(7) + PPC64_LVS(8) + PPC64_LVS(9) + PPC64_LVS(10) + PPC64_LVS(11) + PPC64_LVS(12) + PPC64_LVS(13) + PPC64_LVS(14) + PPC64_LVS(15) + PPC64_LVS(16) + PPC64_LVS(17) + PPC64_LVS(18) + PPC64_LVS(19) + PPC64_LVS(20) + PPC64_LVS(21) + PPC64_LVS(22) + PPC64_LVS(23) + PPC64_LVS(24) + PPC64_LVS(25) + PPC64_LVS(26) + PPC64_LVS(27) + PPC64_LVS(28) + PPC64_LVS(29) + PPC64_LVS(30) + PPC64_LVS(31) + +#define PPC64_CLVS_RESTORE(n) \ + addi 4, 3, PPC64_OFFS_FP + n * 16 ;\ + lxvd2x n, 0, 4 ;\ + xxswapd n, n + + // use VRSAVE to conditionally restore the remaining VS regs, that are + // where the V regs are mapped. In the AIX ABI, VRSAVE is not used. + ld 5, PPC64_OFFS_VRSAVE(3) // test VRsave + cmpwi 5, 0 + beq Lnovec + +// conditionally load VS +#define PPC64_CLVSl(n) \ + andis. 0, 5, (1 PPC_LEFT_SHIFT(47-n)) ;\ + beq Ldone##n ;\ + PPC64_CLVS_RESTORE(n) ;\ +Ldone##n: + +#define PPC64_CLVSh(n) \ + andi. 0, 5, (1 PPC_LEFT_SHIFT(63-n)) ;\ + beq Ldone##n ;\ + PPC64_CLVS_RESTORE(n) ;\ +Ldone##n: + + PPC64_CLVSl(32) + PPC64_CLVSl(33) + PPC64_CLVSl(34) + PPC64_CLVSl(35) + PPC64_CLVSl(36) + PPC64_CLVSl(37) + PPC64_CLVSl(38) + PPC64_CLVSl(39) + PPC64_CLVSl(40) + PPC64_CLVSl(41) + PPC64_CLVSl(42) + PPC64_CLVSl(43) + PPC64_CLVSl(44) + PPC64_CLVSl(45) + PPC64_CLVSl(46) + PPC64_CLVSl(47) + PPC64_CLVSh(48) + PPC64_CLVSh(49) + PPC64_CLVSh(50) + PPC64_CLVSh(51) + PPC64_CLVSh(52) + PPC64_CLVSh(53) + PPC64_CLVSh(54) + PPC64_CLVSh(55) + PPC64_CLVSh(56) + PPC64_CLVSh(57) + PPC64_CLVSh(58) + PPC64_CLVSh(59) + PPC64_CLVSh(60) + PPC64_CLVSh(61) + PPC64_CLVSh(62) + PPC64_CLVSh(63) + b Lnovec + +isa3restore: + addi 4, 3, PPC64_OFFS_FP + // restore the first 32 VS regs (and also all floating point regs) + PPC64_LVS_ISA3(0) + PPC64_LVS_ISA3(1) + PPC64_LVS_ISA3(2) + PPC64_LVS_ISA3(3) + PPC64_LVS_ISA3(4) + PPC64_LVS_ISA3(5) + PPC64_LVS_ISA3(6) + PPC64_LVS_ISA3(7) + PPC64_LVS_ISA3(8) + PPC64_LVS_ISA3(9) + PPC64_LVS_ISA3(10) + PPC64_LVS_ISA3(11) + PPC64_LVS_ISA3(12) + PPC64_LVS_ISA3(13) + PPC64_LVS_ISA3(14) + PPC64_LVS_ISA3(15) + PPC64_LVS_ISA3(16) + PPC64_LVS_ISA3(17) + PPC64_LVS_ISA3(18) + PPC64_LVS_ISA3(19) + PPC64_LVS_ISA3(20) + PPC64_LVS_ISA3(21) + PPC64_LVS_ISA3(22) + PPC64_LVS_ISA3(23) + PPC64_LVS_ISA3(24) + PPC64_LVS_ISA3(25) + PPC64_LVS_ISA3(26) + PPC64_LVS_ISA3(27) + PPC64_LVS_ISA3(28) + PPC64_LVS_ISA3(29) + PPC64_LVS_ISA3(30) + PPC64_LVS_ISA3(31) + +#define PPC64_CLVS_RESTORE_ISA3(n) \ + lxv n, n * 16(4) + + // use VRSAVE to conditionally restore the remaining VS regs, that are + // where the V regs are mapped. In the AIX ABI, VRSAVE is not used. + ld 5, PPC64_OFFS_VRSAVE(3) // test VRsave + cmpwi 5, 0 + beq Lnovec + +// conditionally load VS +#define PPC64_CLVSl_isa3(n) \ + andis. 0, 5, (1 PPC_LEFT_SHIFT(47-n)) ;\ + beq Ldone_isa3_##n ;\ + PPC64_CLVS_RESTORE_ISA3(n) ;\ +Ldone_isa3_##n: + +#define PPC64_CLVSh_isa3(n) \ + andi. 0, 5, (1 PPC_LEFT_SHIFT(63-n)) ;\ + beq Ldone_isa3_##n ;\ + PPC64_CLVS_RESTORE_ISA3(n) ;\ +Ldone_isa3_##n: + + PPC64_CLVSl_isa3(32) + PPC64_CLVSl_isa3(33) + PPC64_CLVSl_isa3(34) + PPC64_CLVSl_isa3(35) + PPC64_CLVSl_isa3(36) + PPC64_CLVSl_isa3(37) + PPC64_CLVSl_isa3(38) + PPC64_CLVSl_isa3(39) + PPC64_CLVSl_isa3(40) + PPC64_CLVSl_isa3(41) + PPC64_CLVSl_isa3(42) + PPC64_CLVSl_isa3(43) + PPC64_CLVSl_isa3(44) + PPC64_CLVSl_isa3(45) + PPC64_CLVSl_isa3(46) + PPC64_CLVSl_isa3(47) + PPC64_CLVSh_isa3(48) + PPC64_CLVSh_isa3(49) + PPC64_CLVSh_isa3(50) + PPC64_CLVSh_isa3(51) + PPC64_CLVSh_isa3(52) + PPC64_CLVSh_isa3(53) + PPC64_CLVSh_isa3(54) + PPC64_CLVSh_isa3(55) + PPC64_CLVSh_isa3(56) + PPC64_CLVSh_isa3(57) + PPC64_CLVSh_isa3(58) + PPC64_CLVSh_isa3(59) + PPC64_CLVSh_isa3(60) + PPC64_CLVSh_isa3(61) + PPC64_CLVSh_isa3(62) + PPC64_CLVSh_isa3(63) + +#else // __LITTLE_ENDIAN__ && __linux__ // restore VS registers // (note that this also restores floating point registers and V registers, @@ -290,6 +493,7 @@ PPC64_CLVSh(62) PPC64_CLVSh(63) +#endif // __LITTLE_ENDIAN__ && __linux__ #else // load FP register @@ -1294,4 +1498,4 @@ #endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ NO_EXEC_STACK_DIRECTIVE - +GLIBC_VERSION_SYM diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -395,6 +395,159 @@ std 0, PPC64_OFFS_VRSAVE(3) #if defined(__VSX__) +#if defined(__LITTLE_ENDIAN__) && defined(__linux__) + +TEST_ISA3(isa3save) + + // save VS registers + // (note that this also saves floating point registers and V registers, + // because part of VS is mapped to these registers) + + addi 4, 3, PPC64_OFFS_FP + +// store VS register +#define PPC64_STVS(n) \ + xxswapd n, n ;\ + stxvd2x n, 0, 4 ;\ + addi 4, 4, 16 +#define PPC64_STVS_ISA3(n) \ + stxv n, 16 * n(4) + + PPC64_STVS(0) + PPC64_STVS(1) + PPC64_STVS(2) + PPC64_STVS(3) + PPC64_STVS(4) + PPC64_STVS(5) + PPC64_STVS(6) + PPC64_STVS(7) + PPC64_STVS(8) + PPC64_STVS(9) + PPC64_STVS(10) + PPC64_STVS(11) + PPC64_STVS(12) + PPC64_STVS(13) + PPC64_STVS(14) + PPC64_STVS(15) + PPC64_STVS(16) + PPC64_STVS(17) + PPC64_STVS(18) + PPC64_STVS(19) + PPC64_STVS(20) + PPC64_STVS(21) + PPC64_STVS(22) + PPC64_STVS(23) + PPC64_STVS(24) + PPC64_STVS(25) + PPC64_STVS(26) + PPC64_STVS(27) + PPC64_STVS(28) + PPC64_STVS(29) + PPC64_STVS(30) + PPC64_STVS(31) + PPC64_STVS(32) + PPC64_STVS(33) + PPC64_STVS(34) + PPC64_STVS(35) + PPC64_STVS(36) + PPC64_STVS(37) + PPC64_STVS(38) + PPC64_STVS(39) + PPC64_STVS(40) + PPC64_STVS(41) + PPC64_STVS(42) + PPC64_STVS(43) + PPC64_STVS(44) + PPC64_STVS(45) + PPC64_STVS(46) + PPC64_STVS(47) + PPC64_STVS(48) + PPC64_STVS(49) + PPC64_STVS(50) + PPC64_STVS(51) + PPC64_STVS(52) + PPC64_STVS(53) + PPC64_STVS(54) + PPC64_STVS(55) + PPC64_STVS(56) + PPC64_STVS(57) + PPC64_STVS(58) + PPC64_STVS(59) + PPC64_STVS(60) + PPC64_STVS(61) + PPC64_STVS(62) + PPC64_STVS(63) + b isa3savedone + +isa3save: + addi 4, 3, PPC64_OFFS_FP + PPC64_STVS_ISA3(0) + PPC64_STVS_ISA3(1) + PPC64_STVS_ISA3(2) + PPC64_STVS_ISA3(3) + PPC64_STVS_ISA3(4) + PPC64_STVS_ISA3(5) + PPC64_STVS_ISA3(6) + PPC64_STVS_ISA3(7) + PPC64_STVS_ISA3(8) + PPC64_STVS_ISA3(9) + PPC64_STVS_ISA3(10) + PPC64_STVS_ISA3(11) + PPC64_STVS_ISA3(12) + PPC64_STVS_ISA3(13) + PPC64_STVS_ISA3(14) + PPC64_STVS_ISA3(15) + PPC64_STVS_ISA3(16) + PPC64_STVS_ISA3(17) + PPC64_STVS_ISA3(18) + PPC64_STVS_ISA3(19) + PPC64_STVS_ISA3(20) + PPC64_STVS_ISA3(21) + PPC64_STVS_ISA3(22) + PPC64_STVS_ISA3(23) + PPC64_STVS_ISA3(24) + PPC64_STVS_ISA3(25) + PPC64_STVS_ISA3(26) + PPC64_STVS_ISA3(27) + PPC64_STVS_ISA3(28) + PPC64_STVS_ISA3(29) + PPC64_STVS_ISA3(30) + PPC64_STVS_ISA3(31) + PPC64_STVS_ISA3(32) + PPC64_STVS_ISA3(33) + PPC64_STVS_ISA3(34) + PPC64_STVS_ISA3(35) + PPC64_STVS_ISA3(36) + PPC64_STVS_ISA3(37) + PPC64_STVS_ISA3(38) + PPC64_STVS_ISA3(39) + PPC64_STVS_ISA3(40) + PPC64_STVS_ISA3(41) + PPC64_STVS_ISA3(42) + PPC64_STVS_ISA3(43) + PPC64_STVS_ISA3(44) + PPC64_STVS_ISA3(45) + PPC64_STVS_ISA3(46) + PPC64_STVS_ISA3(47) + PPC64_STVS_ISA3(48) + PPC64_STVS_ISA3(49) + PPC64_STVS_ISA3(50) + PPC64_STVS_ISA3(51) + PPC64_STVS_ISA3(52) + PPC64_STVS_ISA3(53) + PPC64_STVS_ISA3(54) + PPC64_STVS_ISA3(55) + PPC64_STVS_ISA3(56) + PPC64_STVS_ISA3(57) + PPC64_STVS_ISA3(58) + PPC64_STVS_ISA3(59) + PPC64_STVS_ISA3(60) + PPC64_STVS_ISA3(61) + PPC64_STVS_ISA3(62) + PPC64_STVS_ISA3(63) +isa3savedone: + +#else // __LITTLE_ENDIAN__ && __linux__ // save VS registers // (note that this also saves floating point registers and V registers, // because part of VS is mapped to these registers) @@ -471,6 +624,7 @@ PPC64_STVS(62) PPC64_STVS(63) +#endif // __LITTLE_ENDIAN__ && __linux__ #else // store FP register @@ -1229,3 +1383,4 @@ #endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ NO_EXEC_STACK_DIRECTIVE +GLIBC_VERSION_SYM diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h --- a/libunwind/src/assembly.h +++ b/libunwind/src/assembly.h @@ -23,6 +23,24 @@ #endif #if defined(__powerpc64__) +#if defined(__LITTLE_ENDIAN__) && defined(__linux__) +#include +#define HWCAP2_OFF -28776 +#define PPC_FEATURE2_ARCH_3_00_HI 0x80 +// Test the HWCAP2 bit 0x00800000 (PPC_FEATURE2_ARCH_3_00) +// This allows the unwinder to use ISA 3.0 instructions lxv and stxv +// for restoring and saving VSX registers respectively, thereby avoiding +// the need for swaps. +#if (__GLIBC__ >= 2) && (__GLIBC_MINOR__ >= 23) +#define TEST_ISA3(label) \ + lwz 4, HWCAP2_OFF(13) ;\ + andis. 4, 4, PPC_FEATURE2_ARCH_3_00_HI ;\ + bne 0, label +#else +#define TEST_ISA3(label) +#endif + +#endif #define SEPARATOR ; #define PPC64_OFFS_SRR0 0 #define PPC64_OFFS_CR 272 @@ -164,6 +182,19 @@ #else #define NO_EXEC_STACK_DIRECTIVE #endif +#if defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ + defined (__linux__) && (__GLIBC__ >= 2) && (__GLIBC_MINOR__ >= 23) +// Since we are testing a bit in HWCAP2 on little endian PPC systems, +// we need to link/run against a GLIBC that provides it. This is provided +// by GLIBC 2.23 and above and it is communicated by providing a +// definition of the below symbol. All currently supported distros +// on PPC LE should have at least the required GLIBC version. This undefined +// reference is added to ensure a user doesn't inadvertently run +// against a version that does not provide the HWCAP2. +#define GLIBC_VERSION_SYM .quad __parse_hwcap_and_convert_at_platform +#else +#define GLIBC_VERSION_SYM +#endif #elif defined(_WIN32)