Index: compiler-rt/trunk/lib/builtins/absvdi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/absvdi2.c +++ compiler-rt/trunk/lib/builtins/absvdi2.c @@ -17,12 +17,10 @@ /* Effects: aborts if abs(x) < 0 */ -COMPILER_RT_ABI di_int -__absvdi2(di_int a) -{ - const int N = (int)(sizeof(di_int) * CHAR_BIT); - if (a == ((di_int)1 << (N-1))) - compilerrt_abort(); - const di_int t = a >> (N - 1); - return (a ^ t) - t; +COMPILER_RT_ABI di_int __absvdi2(di_int a) { + const int N = (int)(sizeof(di_int) * CHAR_BIT); + if (a == ((di_int)1 << (N - 1))) + compilerrt_abort(); + const di_int t = a >> (N - 1); + return (a ^ t) - t; } Index: compiler-rt/trunk/lib/builtins/absvsi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/absvsi2.c +++ compiler-rt/trunk/lib/builtins/absvsi2.c @@ -17,12 +17,10 @@ /* Effects: aborts if abs(x) < 0 */ -COMPILER_RT_ABI si_int -__absvsi2(si_int a) -{ - const int N = (int)(sizeof(si_int) * CHAR_BIT); - if (a == (1 << (N-1))) - compilerrt_abort(); - const si_int t = a >> (N - 1); - return (a ^ t) - t; +COMPILER_RT_ABI si_int __absvsi2(si_int a) { + const int N = (int)(sizeof(si_int) * CHAR_BIT); + if (a == (1 << (N - 1))) + compilerrt_abort(); + const si_int t = a >> (N - 1); + return (a ^ t) - t; } Index: compiler-rt/trunk/lib/builtins/absvti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/absvti2.c +++ compiler-rt/trunk/lib/builtins/absvti2.c @@ -19,15 +19,12 @@ /* Effects: aborts if abs(x) < 0 */ -COMPILER_RT_ABI ti_int -__absvti2(ti_int a) -{ - const int N = (int)(sizeof(ti_int) * CHAR_BIT); - if (a == ((ti_int)1 << (N-1))) - compilerrt_abort(); - const ti_int s = a >> (N - 1); - return (a ^ s) - s; +COMPILER_RT_ABI ti_int __absvti2(ti_int a) { + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + if (a == ((ti_int)1 << (N - 1))) + compilerrt_abort(); + const ti_int s = a >> (N - 1); + return (a ^ s) - s; } #endif /* CRT_HAS_128BIT */ - Index: compiler-rt/trunk/lib/builtins/adddf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/adddf3.c +++ compiler-rt/trunk/lib/builtins/adddf3.c @@ -14,15 +14,11 @@ #define DOUBLE_PRECISION #include "fp_add_impl.inc" -COMPILER_RT_ABI double __adddf3(double a, double b){ - return __addXf3__(a, b); -} +COMPILER_RT_ABI double __adddf3(double a, double b) { return __addXf3__(a, b); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI double __aeabi_dadd(double a, double b) { - return __adddf3(a, b); -} +AEABI_RTABI double __aeabi_dadd(double a, double b) { return __adddf3(a, b); } #else AEABI_RTABI double __aeabi_dadd(double a, double b) COMPILER_RT_ALIAS(__adddf3); #endif Index: compiler-rt/trunk/lib/builtins/addsf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/addsf3.c +++ compiler-rt/trunk/lib/builtins/addsf3.c @@ -14,15 +14,11 @@ #define SINGLE_PRECISION #include "fp_add_impl.inc" -COMPILER_RT_ABI float __addsf3(float a, float b) { - return __addXf3__(a, b); -} +COMPILER_RT_ABI float __addsf3(float a, float b) { return __addXf3__(a, b); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI float __aeabi_fadd(float a, float b) { - return __addsf3(a, b); -} +AEABI_RTABI float __aeabi_fadd(float a, float b) { return __addsf3(a, b); } #else AEABI_RTABI float __aeabi_fadd(float a, float b) COMPILER_RT_ALIAS(__addsf3); #endif Index: compiler-rt/trunk/lib/builtins/addtf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/addtf3.c +++ compiler-rt/trunk/lib/builtins/addtf3.c @@ -17,8 +17,8 @@ #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) #include "fp_add_impl.inc" -COMPILER_RT_ABI long double __addtf3(long double a, long double b){ - return __addXf3__(a, b); +COMPILER_RT_ABI long double __addtf3(long double a, long double b) { + return __addXf3__(a, b); } #endif Index: compiler-rt/trunk/lib/builtins/addvdi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/addvdi3.c +++ compiler-rt/trunk/lib/builtins/addvdi3.c @@ -17,19 +17,14 @@ /* Effects: aborts if a + b overflows */ -COMPILER_RT_ABI di_int -__addvdi3(di_int a, di_int b) -{ - di_int s = (du_int) a + (du_int) b; - if (b >= 0) - { - if (s < a) - compilerrt_abort(); - } - else - { - if (s >= a) - compilerrt_abort(); - } - return s; +COMPILER_RT_ABI di_int __addvdi3(di_int a, di_int b) { + di_int s = (du_int)a + (du_int)b; + if (b >= 0) { + if (s < a) + compilerrt_abort(); + } else { + if (s >= a) + compilerrt_abort(); + } + return s; } Index: compiler-rt/trunk/lib/builtins/addvsi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/addvsi3.c +++ compiler-rt/trunk/lib/builtins/addvsi3.c @@ -17,19 +17,14 @@ /* Effects: aborts if a + b overflows */ -COMPILER_RT_ABI si_int -__addvsi3(si_int a, si_int b) -{ - si_int s = (su_int) a + (su_int) b; - if (b >= 0) - { - if (s < a) - compilerrt_abort(); - } - else - { - if (s >= a) - compilerrt_abort(); - } - return s; +COMPILER_RT_ABI si_int __addvsi3(si_int a, si_int b) { + si_int s = (su_int)a + (su_int)b; + if (b >= 0) { + if (s < a) + compilerrt_abort(); + } else { + if (s >= a) + compilerrt_abort(); + } + return s; } Index: compiler-rt/trunk/lib/builtins/addvti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/addvti3.c +++ compiler-rt/trunk/lib/builtins/addvti3.c @@ -19,21 +19,16 @@ /* Effects: aborts if a + b overflows */ -COMPILER_RT_ABI ti_int -__addvti3(ti_int a, ti_int b) -{ - ti_int s = (tu_int) a + (tu_int) b; - if (b >= 0) - { - if (s < a) - compilerrt_abort(); - } - else - { - if (s >= a) - compilerrt_abort(); - } - return s; +COMPILER_RT_ABI ti_int __addvti3(ti_int a, ti_int b) { + ti_int s = (tu_int)a + (tu_int)b; + if (b >= 0) { + if (s < a) + compilerrt_abort(); + } else { + if (s >= a) + compilerrt_abort(); + } + return s; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/apple_versioning.c =================================================================== --- compiler-rt/trunk/lib/builtins/apple_versioning.c +++ compiler-rt/trunk/lib/builtins/apple_versioning.c @@ -7,38 +7,36 @@ * ===----------------------------------------------------------------------=== */ - #if __APPLE__ - #include - - #if __IPHONE_OS_VERSION_MIN_REQUIRED - #define NOT_HERE_BEFORE_10_6(sym) - #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ - extern const char sym##_tmp61 __asm("$ld$hide$os6.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp61 = 0; \ - extern const char sym##_tmp60 __asm("$ld$hide$os6.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp60 = 0; \ - extern const char sym##_tmp51 __asm("$ld$hide$os5.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp51 = 0; \ - extern const char sym##_tmp50 __asm("$ld$hide$os5.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp50 = 0; - #else - #define NOT_HERE_BEFORE_10_6(sym) \ - extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ - extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp5 = 0; - #define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ - extern const char sym##_tmp8 __asm("$ld$hide$os10.8$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp8 = 0; \ - extern const char sym##_tmp7 __asm("$ld$hide$os10.7$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp7 = 0; \ - extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp6 = 0; - #endif +#include +#if __IPHONE_OS_VERSION_MIN_REQUIRED +#define NOT_HERE_BEFORE_10_6(sym) +#define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ + extern const char sym##_tmp61 __asm("$ld$hide$os6.1$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp61 = 0; \ + extern const char sym##_tmp60 __asm("$ld$hide$os6.0$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp60 = 0; \ + extern const char sym##_tmp51 __asm("$ld$hide$os5.1$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp51 = 0; \ + extern const char sym##_tmp50 __asm("$ld$hide$os5.0$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp50 = 0; +#else +#define NOT_HERE_BEFORE_10_6(sym) \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; +#define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ + extern const char sym##_tmp8 __asm("$ld$hide$os10.8$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp8 = 0; \ + extern const char sym##_tmp7 __asm("$ld$hide$os10.7$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp7 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; +#endif -/* Symbols in libSystem.dylib in 10.6 and later, +/* Symbols in libSystem.dylib in 10.6 and later, * but are in libgcc_s.dylib in earlier versions */ @@ -142,7 +140,6 @@ NOT_HERE_BEFORE_10_6(__umoddi3) NOT_HERE_BEFORE_10_6(__umodti3) - #if __ppc__ NOT_HERE_BEFORE_10_6(__gcc_qadd) NOT_HERE_BEFORE_10_6(__gcc_qdiv) @@ -200,24 +197,23 @@ NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_4) NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_8) - #if __arm__ && __DYNAMIC__ - #define NOT_HERE_UNTIL_AFTER_4_3(sym) \ - extern const char sym##_tmp1 __asm("$ld$hide$os3.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp1 = 0; \ - extern const char sym##_tmp2 __asm("$ld$hide$os3.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp2 = 0; \ - extern const char sym##_tmp3 __asm("$ld$hide$os3.2$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \ - extern const char sym##_tmp4 __asm("$ld$hide$os4.0$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ - extern const char sym##_tmp5 __asm("$ld$hide$os4.1$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ - extern const char sym##_tmp6 __asm("$ld$hide$os4.2$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp6 = 0; \ - extern const char sym##_tmp7 __asm("$ld$hide$os4.3$_" #sym ); \ - __attribute__((visibility("default"))) const char sym##_tmp7 = 0; - +#define NOT_HERE_UNTIL_AFTER_4_3(sym) \ + extern const char sym##_tmp1 __asm("$ld$hide$os3.0$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp1 = 0; \ + extern const char sym##_tmp2 __asm("$ld$hide$os3.1$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp2 = 0; \ + extern const char sym##_tmp3 __asm("$ld$hide$os3.2$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \ + extern const char sym##_tmp4 __asm("$ld$hide$os4.0$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os4.1$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os4.2$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; \ + extern const char sym##_tmp7 __asm("$ld$hide$os4.3$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp7 = 0; + NOT_HERE_UNTIL_AFTER_4_3(__absvdi2) NOT_HERE_UNTIL_AFTER_4_3(__absvsi2) NOT_HERE_UNTIL_AFTER_4_3(__adddf3) @@ -338,10 +334,6 @@ NOT_HERE_UNTIL_AFTER_4_3(__udivmodsi4) #endif // __arm__ && __DYNAMIC__ - - - - #else /* !__APPLE__ */ extern int avoid_empty_file; Index: compiler-rt/trunk/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c =================================================================== --- compiler-rt/trunk/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c +++ compiler-rt/trunk/lib/builtins/arm/aeabi_cdcmpeq_check_nan.c @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include #include "../int_lib.h" +#include -AEABI_RTABI __attribute__((visibility("hidden"))) -int __aeabi_cdcmpeq_check_nan(double a, double b) { - return __builtin_isnan(a) || __builtin_isnan(b); +AEABI_RTABI __attribute__((visibility("hidden"))) int +__aeabi_cdcmpeq_check_nan(double a, double b) { + return __builtin_isnan(a) || __builtin_isnan(b); } Index: compiler-rt/trunk/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c =================================================================== --- compiler-rt/trunk/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c +++ compiler-rt/trunk/lib/builtins/arm/aeabi_cfcmpeq_check_nan.c @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include #include "../int_lib.h" +#include -AEABI_RTABI __attribute__((visibility("hidden"))) -int __aeabi_cfcmpeq_check_nan(float a, float b) { - return __builtin_isnan(a) || __builtin_isnan(b); +AEABI_RTABI __attribute__((visibility("hidden"))) int +__aeabi_cfcmpeq_check_nan(float a, float b) { + return __builtin_isnan(a) || __builtin_isnan(b); } Index: compiler-rt/trunk/lib/builtins/arm/aeabi_div0.c =================================================================== --- compiler-rt/trunk/lib/builtins/arm/aeabi_div0.c +++ compiler-rt/trunk/lib/builtins/arm/aeabi_div0.c @@ -36,9 +36,8 @@ return return_value; } -AEABI_RTABI long long __attribute__((weak)) __attribute__((visibility("hidden"))) -__aeabi_ldiv0(long long return_value) { +AEABI_RTABI long long __attribute__((weak)) +__attribute__((visibility("hidden"))) __aeabi_ldiv0(long long return_value) { return return_value; } #endif - Index: compiler-rt/trunk/lib/builtins/arm/aeabi_drsub.c =================================================================== --- compiler-rt/trunk/lib/builtins/arm/aeabi_drsub.c +++ compiler-rt/trunk/lib/builtins/arm/aeabi_drsub.c @@ -9,10 +9,6 @@ #define DOUBLE_PRECISION #include "../fp_lib.h" -AEABI_RTABI fp_t -__aeabi_dsub(fp_t, fp_t); +AEABI_RTABI fp_t __aeabi_dsub(fp_t, fp_t); -AEABI_RTABI fp_t -__aeabi_drsub(fp_t a, fp_t b) { - return __aeabi_dsub(b, a); -} +AEABI_RTABI fp_t __aeabi_drsub(fp_t a, fp_t b) { return __aeabi_dsub(b, a); } Index: compiler-rt/trunk/lib/builtins/arm/aeabi_frsub.c =================================================================== --- compiler-rt/trunk/lib/builtins/arm/aeabi_frsub.c +++ compiler-rt/trunk/lib/builtins/arm/aeabi_frsub.c @@ -9,10 +9,6 @@ #define SINGLE_PRECISION #include "../fp_lib.h" -AEABI_RTABI fp_t -__aeabi_fsub(fp_t, fp_t); +AEABI_RTABI fp_t __aeabi_fsub(fp_t, fp_t); -AEABI_RTABI fp_t -__aeabi_frsub(fp_t a, fp_t b) { - return __aeabi_fsub(b, a); -} +AEABI_RTABI fp_t __aeabi_frsub(fp_t a, fp_t b) { return __aeabi_fsub(b, a); } Index: compiler-rt/trunk/lib/builtins/arm/sync-ops.h =================================================================== --- compiler-rt/trunk/lib/builtins/arm/sync-ops.h +++ compiler-rt/trunk/lib/builtins/arm/sync-ops.h @@ -14,50 +14,48 @@ #include "../assembly.h" -#define SYNC_OP_4(op) \ - .p2align 2 ; \ - .thumb ; \ - .syntax unified ; \ - DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ - dmb ; \ - mov r12, r0 ; \ - LOCAL_LABEL(tryatomic_ ## op): \ - ldrex r0, [r12] ; \ - op(r2, r0, r1) ; \ - strex r3, r2, [r12] ; \ - cmp r3, #0 ; \ - bne LOCAL_LABEL(tryatomic_ ## op) ; \ - dmb ; \ - bx lr +#define SYNC_OP_4(op) \ + .p2align 2; \ + .thumb; \ + .syntax unified; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_##op) \ + dmb; \ + mov r12, r0; \ + LOCAL_LABEL(tryatomic_##op) : ldrex r0, [r12]; \ + op(r2, r0, r1); \ + strex r3, r2, [r12]; \ + cmp r3, #0; \ + bne LOCAL_LABEL(tryatomic_##op); \ + dmb; \ + bx lr -#define SYNC_OP_8(op) \ - .p2align 2 ; \ - .thumb ; \ - .syntax unified ; \ - DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ - push {r4, r5, r6, lr} ; \ - dmb ; \ - mov r12, r0 ; \ - LOCAL_LABEL(tryatomic_ ## op): \ - ldrexd r0, r1, [r12] ; \ - op(r4, r5, r0, r1, r2, r3) ; \ - strexd r6, r4, r5, [r12] ; \ - cmp r6, #0 ; \ - bne LOCAL_LABEL(tryatomic_ ## op) ; \ - dmb ; \ - pop {r4, r5, r6, pc} +#define SYNC_OP_8(op) \ + .p2align 2; \ + .thumb; \ + .syntax unified; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_##op) \ + push{r4, r5, r6, lr}; \ + dmb; \ + mov r12, r0; \ + LOCAL_LABEL(tryatomic_##op) : ldrexd r0, r1, [r12]; \ + op(r4, r5, r0, r1, r2, r3); \ + strexd r6, r4, r5, [r12]; \ + cmp r6, #0; \ + bne LOCAL_LABEL(tryatomic_##op); \ + dmb; \ + pop { r4, r5, r6, pc } -#define MINMAX_4(rD, rN, rM, cmp_kind) \ - cmp rN, rM ; \ - mov rD, rM ; \ - it cmp_kind ; \ - mov##cmp_kind rD, rN +#define MINMAX_4(rD, rN, rM, cmp_kind) \ + cmp rN, rM; \ + mov rD, rM; \ + it cmp_kind; \ + mov##cmp_kind rD, rN -#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \ - cmp rN_LO, rM_LO ; \ - sbcs rN_HI, rM_HI ; \ - mov rD_LO, rM_LO ; \ - mov rD_HI, rM_HI ; \ - itt cmp_kind ; \ - mov##cmp_kind rD_LO, rN_LO ; \ - mov##cmp_kind rD_HI, rN_HI +#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \ + cmp rN_LO, rM_LO; \ + sbcs rN_HI, rM_HI; \ + mov rD_LO, rM_LO; \ + mov rD_HI, rM_HI; \ + itt cmp_kind; \ + mov##cmp_kind rD_LO, rN_LO; \ + mov##cmp_kind rD_HI, rN_HI Index: compiler-rt/trunk/lib/builtins/ashldi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/ashldi3.c +++ compiler-rt/trunk/lib/builtins/ashldi3.c @@ -17,28 +17,26 @@ /* Precondition: 0 <= b < bits_in_dword */ -COMPILER_RT_ABI di_int -__ashldi3(di_int a, si_int b) -{ - const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); - dwords input; - dwords result; - input.all = a; - if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ - { - result.s.low = 0; - result.s.high = input.s.low << (b - bits_in_word); - } - else /* 0 <= b < bits_in_word */ - { - if (b == 0) - return a; - result.s.low = input.s.low << b; - result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b)); - } - return result.all; +COMPILER_RT_ABI di_int __ashldi3(di_int a, si_int b) { + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ + { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_word); + } else /* 0 <= b < bits_in_word */ + { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b)); + } + return result.all; } #if defined(__ARM_EABI__) -AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) COMPILER_RT_ALIAS(__ashldi3); +AEABI_RTABI di_int __aeabi_llsl(di_int a, si_int b) + COMPILER_RT_ALIAS(__ashldi3); #endif Index: compiler-rt/trunk/lib/builtins/ashlti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/ashlti3.c +++ compiler-rt/trunk/lib/builtins/ashlti3.c @@ -19,26 +19,23 @@ /* Precondition: 0 <= b < bits_in_tword */ -COMPILER_RT_ABI ti_int -__ashlti3(ti_int a, si_int b) -{ - const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); - twords input; - twords result; - input.all = a; - if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ - { - result.s.low = 0; - result.s.high = input.s.low << (b - bits_in_dword); - } - else /* 0 <= b < bits_in_dword */ - { - if (b == 0) - return a; - result.s.low = input.s.low << b; - result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b)); - } - return result.all; +COMPILER_RT_ABI ti_int __ashlti3(ti_int a, si_int b) { + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ + { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_dword); + } else /* 0 <= b < bits_in_dword */ + { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b)); + } + return result.all; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/ashrdi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/ashrdi3.c +++ compiler-rt/trunk/lib/builtins/ashrdi3.c @@ -17,29 +17,27 @@ /* Precondition: 0 <= b < bits_in_dword */ -COMPILER_RT_ABI di_int -__ashrdi3(di_int a, si_int b) -{ - const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); - dwords input; - dwords result; - input.all = a; - if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ - { - /* result.s.high = input.s.high < 0 ? -1 : 0 */ - result.s.high = input.s.high >> (bits_in_word - 1); - result.s.low = input.s.high >> (b - bits_in_word); - } - else /* 0 <= b < bits_in_word */ - { - if (b == 0) - return a; - result.s.high = input.s.high >> b; - result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); - } - return result.all; +COMPILER_RT_ABI di_int __ashrdi3(di_int a, si_int b) { + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ + { + /* result.s.high = input.s.high < 0 ? -1 : 0 */ + result.s.high = input.s.high >> (bits_in_word - 1); + result.s.low = input.s.high >> (b - bits_in_word); + } else /* 0 <= b < bits_in_word */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); + } + return result.all; } #if defined(__ARM_EABI__) -AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) COMPILER_RT_ALIAS(__ashrdi3); +AEABI_RTABI di_int __aeabi_lasr(di_int a, si_int b) + COMPILER_RT_ALIAS(__ashrdi3); #endif Index: compiler-rt/trunk/lib/builtins/ashrti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/ashrti3.c +++ compiler-rt/trunk/lib/builtins/ashrti3.c @@ -19,27 +19,24 @@ /* Precondition: 0 <= b < bits_in_tword */ -COMPILER_RT_ABI ti_int -__ashrti3(ti_int a, si_int b) -{ - const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); - twords input; - twords result; - input.all = a; - if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ - { - /* result.s.high = input.s.high < 0 ? -1 : 0 */ - result.s.high = input.s.high >> (bits_in_dword - 1); - result.s.low = input.s.high >> (b - bits_in_dword); - } - else /* 0 <= b < bits_in_dword */ - { - if (b == 0) - return a; - result.s.high = input.s.high >> b; - result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); - } - return result.all; +COMPILER_RT_ABI ti_int __ashrti3(ti_int a, si_int b) { + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ + { + /* result.s.high = input.s.high < 0 ? -1 : 0 */ + result.s.high = input.s.high >> (bits_in_dword - 1); + result.s.low = input.s.high >> (b - bits_in_dword); + } else /* 0 <= b < bits_in_dword */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); + } + return result.all; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/atomic.c =================================================================== --- compiler-rt/trunk/lib/builtins/atomic.c +++ compiler-rt/trunk/lib/builtins/atomic.c @@ -9,13 +9,13 @@ * atomic.c defines a set of functions for performing atomic accesses on * arbitrary-sized memory locations. This design uses locks that should * be fast in the uncontended case, for two reasons: - * + * * 1) This code must work with C programs that do not link to anything * (including pthreads) and so it should not depend on any pthread * functions. * 2) Atomic operations, rather than explicit mutexes, are most commonly used * on code where contended operations are rate. - * + * * To avoid needing a per-object lock, this code allocates an array of * locks and hashes the object pointers to find the one that it should use. * For operations that must be atomic on two locations, the lower lock is @@ -34,13 +34,14 @@ #pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load) #pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store) #pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange) -#pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME(__atomic_compare_exchange) +#pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME( \ + __atomic_compare_exchange) /// Number of locks. This allocates one page on 32-bit platforms, two on /// 64-bit. This can be specified externally if a different trade between /// memory usage and contention probability is required for a given platform. #ifndef SPINLOCK_COUNT -#define SPINLOCK_COUNT (1<<10) +#define SPINLOCK_COUNT (1 << 10) #endif static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; @@ -51,38 +52,35 @@ //////////////////////////////////////////////////////////////////////////////// #ifdef __FreeBSD__ #include -#include #include +#include #include typedef struct _usem Lock; __inline static void unlock(Lock *l) { - __c11_atomic_store((_Atomic(uint32_t)*)&l->_count, 1, __ATOMIC_RELEASE); + __c11_atomic_store((_Atomic(uint32_t) *)&l->_count, 1, __ATOMIC_RELEASE); __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); if (l->_has_waiters) - _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0); + _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0); } __inline static void lock(Lock *l) { uint32_t old = 1; - while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t)*)&l->_count, &old, - 0, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { + while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t) *)&l->_count, + &old, 0, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) { _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0); old = 1; } } /// locks for atomic operations -static Lock locks[SPINLOCK_COUNT] = { [0 ... SPINLOCK_COUNT-1] = {0,1,0} }; +static Lock locks[SPINLOCK_COUNT] = {[0 ... SPINLOCK_COUNT - 1] = {0, 1, 0}}; #elif defined(__APPLE__) #include typedef OSSpinLock Lock; -__inline static void unlock(Lock *l) { - OSSpinLockUnlock(l); -} +__inline static void unlock(Lock *l) { OSSpinLockUnlock(l); } /// Locks a lock. In the current implementation, this is potentially /// unbounded in the contended case. -__inline static void lock(Lock *l) { - OSSpinLockLock(l); -} +__inline static void lock(Lock *l) { OSSpinLockLock(l); } static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0 #else @@ -96,20 +94,19 @@ __inline static void lock(Lock *l) { uintptr_t old = 0; while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE, - __ATOMIC_RELAXED)) + __ATOMIC_RELAXED)) old = 0; } /// locks for atomic operations static Lock locks[SPINLOCK_COUNT]; #endif - -/// Returns a lock to use for a given pointer. +/// Returns a lock to use for a given pointer. static __inline Lock *lock_for_pointer(void *ptr) { intptr_t hash = (intptr_t)ptr; // Disregard the lowest 4 bits. We want all values that may be part of the // same memory operation to hash to the same value and therefore use the same - // lock. + // lock. hash >>= 4; // Use the next bits as the basis for the hash intptr_t low = hash & SPINLOCK_MASK; @@ -132,45 +129,44 @@ /// Macro that calls the compiler-generated lock-free versions of functions /// when they exist. -#define LOCK_FREE_CASES() \ - do {\ - switch (size) {\ - case 1:\ - if (IS_LOCK_FREE_1) {\ - LOCK_FREE_ACTION(uint8_t);\ - }\ - break; \ - case 2:\ - if (IS_LOCK_FREE_2) {\ - LOCK_FREE_ACTION(uint16_t);\ - }\ - break; \ - case 4:\ - if (IS_LOCK_FREE_4) {\ - LOCK_FREE_ACTION(uint32_t);\ - }\ - break; \ - case 8:\ - if (IS_LOCK_FREE_8) {\ - LOCK_FREE_ACTION(uint64_t);\ - }\ - break; \ - case 16:\ - if (IS_LOCK_FREE_16) {\ - /* FIXME: __uint128_t isn't available on 32 bit platforms. - LOCK_FREE_ACTION(__uint128_t);*/\ - }\ - break; \ - }\ +#define LOCK_FREE_CASES() \ + do { \ + switch (size) { \ + case 1: \ + if (IS_LOCK_FREE_1) { \ + LOCK_FREE_ACTION(uint8_t); \ + } \ + break; \ + case 2: \ + if (IS_LOCK_FREE_2) { \ + LOCK_FREE_ACTION(uint16_t); \ + } \ + break; \ + case 4: \ + if (IS_LOCK_FREE_4) { \ + LOCK_FREE_ACTION(uint32_t); \ + } \ + break; \ + case 8: \ + if (IS_LOCK_FREE_8) { \ + LOCK_FREE_ACTION(uint64_t); \ + } \ + break; \ + case 16: \ + if (IS_LOCK_FREE_16) { \ + /* FIXME: __uint128_t isn't available on 32 bit platforms. \ + LOCK_FREE_ACTION(__uint128_t);*/ \ + } \ + break; \ + } \ } while (0) - /// An atomic load operation. This is atomic with respect to the source /// pointer only. void __atomic_load_c(int size, void *src, void *dest, int model) { -#define LOCK_FREE_ACTION(type) \ - *((type*)dest) = __c11_atomic_load((_Atomic(type)*)src, model);\ - return; +#define LOCK_FREE_ACTION(type) \ + *((type *)dest) = __c11_atomic_load((_Atomic(type) *)src, model); \ + return; LOCK_FREE_CASES(); #undef LOCK_FREE_ACTION Lock *l = lock_for_pointer(src); @@ -182,9 +178,9 @@ /// An atomic store operation. This is atomic with respect to the destination /// pointer only. void __atomic_store_c(int size, void *dest, void *src, int model) { -#define LOCK_FREE_ACTION(type) \ - __c11_atomic_store((_Atomic(type)*)dest, *(type*)src, model);\ - return; +#define LOCK_FREE_ACTION(type) \ + __c11_atomic_store((_Atomic(type) *)dest, *(type *)src, model); \ + return; LOCK_FREE_CASES(); #undef LOCK_FREE_ACTION Lock *l = lock_for_pointer(dest); @@ -197,12 +193,13 @@ /// to the value at *expected, then this copies value at *desired to *ptr. If /// they are not, then this stores the current value from *ptr in *expected. /// -/// This function returns 1 if the exchange takes place or 0 if it fails. +/// This function returns 1 if the exchange takes place or 0 if it fails. int __atomic_compare_exchange_c(int size, void *ptr, void *expected, - void *desired, int success, int failure) { -#define LOCK_FREE_ACTION(type) \ - return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, (type*)expected,\ - *(type*)desired, success, failure) + void *desired, int success, int failure) { +#define LOCK_FREE_ACTION(type) \ + return __c11_atomic_compare_exchange_strong( \ + (_Atomic(type) *)ptr, (type *)expected, *(type *)desired, success, \ + failure) LOCK_FREE_CASES(); #undef LOCK_FREE_ACTION Lock *l = lock_for_pointer(ptr); @@ -220,10 +217,10 @@ /// Performs an atomic exchange operation between two pointers. This is atomic /// with respect to the target address. void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { -#define LOCK_FREE_ACTION(type) \ - *(type*)old = __c11_atomic_exchange((_Atomic(type)*)ptr, *(type*)val,\ - model);\ - return; +#define LOCK_FREE_ACTION(type) \ + *(type *)old = \ + __c11_atomic_exchange((_Atomic(type) *)ptr, *(type *)val, model); \ + return; LOCK_FREE_CASES(); #undef LOCK_FREE_ACTION Lock *l = lock_for_pointer(ptr); @@ -238,96 +235,96 @@ // specialised versions of the above functions. //////////////////////////////////////////////////////////////////////////////// #ifdef __SIZEOF_INT128__ -#define OPTIMISED_CASES\ - OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\ - OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\ - OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\ - OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t)\ +#define OPTIMISED_CASES \ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ + OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) \ OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t) #else -#define OPTIMISED_CASES\ - OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t)\ - OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t)\ - OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t)\ +#define OPTIMISED_CASES \ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) #endif -#define OPTIMISED_CASE(n, lockfree, type)\ -type __atomic_load_##n(type *src, int model) {\ - if (lockfree)\ - return __c11_atomic_load((_Atomic(type)*)src, model);\ - Lock *l = lock_for_pointer(src);\ - lock(l);\ - type val = *src;\ - unlock(l);\ - return val;\ -} +#define OPTIMISED_CASE(n, lockfree, type) \ + type __atomic_load_##n(type *src, int model) { \ + if (lockfree) \ + return __c11_atomic_load((_Atomic(type) *)src, model); \ + Lock *l = lock_for_pointer(src); \ + lock(l); \ + type val = *src; \ + unlock(l); \ + return val; \ + } OPTIMISED_CASES #undef OPTIMISED_CASE -#define OPTIMISED_CASE(n, lockfree, type)\ -void __atomic_store_##n(type *dest, type val, int model) {\ - if (lockfree) {\ - __c11_atomic_store((_Atomic(type)*)dest, val, model);\ - return;\ - }\ - Lock *l = lock_for_pointer(dest);\ - lock(l);\ - *dest = val;\ - unlock(l);\ - return;\ -} +#define OPTIMISED_CASE(n, lockfree, type) \ + void __atomic_store_##n(type *dest, type val, int model) { \ + if (lockfree) { \ + __c11_atomic_store((_Atomic(type) *)dest, val, model); \ + return; \ + } \ + Lock *l = lock_for_pointer(dest); \ + lock(l); \ + *dest = val; \ + unlock(l); \ + return; \ + } OPTIMISED_CASES #undef OPTIMISED_CASE -#define OPTIMISED_CASE(n, lockfree, type)\ -type __atomic_exchange_##n(type *dest, type val, int model) {\ - if (lockfree)\ - return __c11_atomic_exchange((_Atomic(type)*)dest, val, model);\ - Lock *l = lock_for_pointer(dest);\ - lock(l);\ - type tmp = *dest;\ - *dest = val;\ - unlock(l);\ - return tmp;\ -} +#define OPTIMISED_CASE(n, lockfree, type) \ + type __atomic_exchange_##n(type *dest, type val, int model) { \ + if (lockfree) \ + return __c11_atomic_exchange((_Atomic(type) *)dest, val, model); \ + Lock *l = lock_for_pointer(dest); \ + lock(l); \ + type tmp = *dest; \ + *dest = val; \ + unlock(l); \ + return tmp; \ + } OPTIMISED_CASES #undef OPTIMISED_CASE -#define OPTIMISED_CASE(n, lockfree, type)\ -int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired,\ - int success, int failure) {\ - if (lockfree)\ - return __c11_atomic_compare_exchange_strong((_Atomic(type)*)ptr, expected, desired,\ - success, failure);\ - Lock *l = lock_for_pointer(ptr);\ - lock(l);\ - if (*ptr == *expected) {\ - *ptr = desired;\ - unlock(l);\ - return 1;\ - }\ - *expected = *ptr;\ - unlock(l);\ - return 0;\ -} +#define OPTIMISED_CASE(n, lockfree, type) \ + int __atomic_compare_exchange_##n(type *ptr, type *expected, type desired, \ + int success, int failure) { \ + if (lockfree) \ + return __c11_atomic_compare_exchange_strong( \ + (_Atomic(type) *)ptr, expected, desired, success, failure); \ + Lock *l = lock_for_pointer(ptr); \ + lock(l); \ + if (*ptr == *expected) { \ + *ptr = desired; \ + unlock(l); \ + return 1; \ + } \ + *expected = *ptr; \ + unlock(l); \ + return 0; \ + } OPTIMISED_CASES #undef OPTIMISED_CASE //////////////////////////////////////////////////////////////////////////////// // Atomic read-modify-write operations for integers of various sizes. //////////////////////////////////////////////////////////////////////////////// -#define ATOMIC_RMW(n, lockfree, type, opname, op) \ -type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) {\ - if (lockfree) \ - return __c11_atomic_fetch_##opname((_Atomic(type)*)ptr, val, model);\ - Lock *l = lock_for_pointer(ptr);\ - lock(l);\ - type tmp = *ptr;\ - *ptr = tmp op val;\ - unlock(l);\ - return tmp;\ -} +#define ATOMIC_RMW(n, lockfree, type, opname, op) \ + type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) { \ + if (lockfree) \ + return __c11_atomic_fetch_##opname((_Atomic(type) *)ptr, val, model); \ + Lock *l = lock_for_pointer(ptr); \ + lock(l); \ + type tmp = *ptr; \ + *ptr = tmp op val; \ + unlock(l); \ + return tmp; \ + } #define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +) OPTIMISED_CASES Index: compiler-rt/trunk/lib/builtins/bswapsi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/bswapsi2.c +++ compiler-rt/trunk/lib/builtins/bswapsi2.c @@ -14,9 +14,8 @@ #include "int_lib.h" COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) { - return ( - (((u)&0xff000000) >> 24) | - (((u)&0x00ff0000) >> 8) | - (((u)&0x0000ff00) << 8) | - (((u)&0x000000ff) << 24)); + return ((((u)&0xff000000) >> 24) | + (((u)&0x00ff0000) >> 8) | + (((u)&0x0000ff00) << 8) | + (((u)&0x000000ff) << 24)); } Index: compiler-rt/trunk/lib/builtins/clear_cache.c =================================================================== --- compiler-rt/trunk/lib/builtins/clear_cache.c +++ compiler-rt/trunk/lib/builtins/clear_cache.c @@ -12,7 +12,7 @@ #include #if __APPLE__ - #include +#include #endif #if defined(_WIN32) @@ -24,73 +24,71 @@ #endif #if defined(__FreeBSD__) && defined(__arm__) - #include - #include +#include +#include #endif #if defined(__NetBSD__) && defined(__arm__) - #include +#include #endif #if defined(__OpenBSD__) && defined(__mips__) - #include - #include +#include +#include #endif #if defined(__linux__) && defined(__mips__) - #include - #include - #include - #if defined(__ANDROID__) && defined(__LP64__) - /* - * clear_mips_cache - Invalidates instruction cache for Mips. - */ - static void clear_mips_cache(const void* Addr, size_t Size) { - __asm__ volatile ( - ".set push\n" - ".set noreorder\n" - ".set noat\n" - "beq %[Size], $zero, 20f\n" /* If size == 0, branch around. */ - "nop\n" - "daddu %[Size], %[Addr], %[Size]\n" /* Calculate end address + 1 */ - "rdhwr $v0, $1\n" /* Get step size for SYNCI. - $1 is $HW_SYNCI_Step */ - "beq $v0, $zero, 20f\n" /* If no caches require - synchronization, branch - around. */ - "nop\n" - "10:\n" - "synci 0(%[Addr])\n" /* Synchronize all caches around - address. */ - "daddu %[Addr], %[Addr], $v0\n" /* Add step size. */ - "sltu $at, %[Addr], %[Size]\n" /* Compare current with end - address. */ - "bne $at, $zero, 10b\n" /* Branch if more to do. */ - "nop\n" - "sync\n" /* Clear memory hazards. */ - "20:\n" - "bal 30f\n" - "nop\n" - "30:\n" - "daddiu $ra, $ra, 12\n" /* $ra has a value of $pc here. - Add offset of 12 to point to the - instruction after the last nop. - */ - "jr.hb $ra\n" /* Return, clearing instruction - hazards. */ - "nop\n" - ".set pop\n" - : [Addr] "+r"(Addr), [Size] "+r"(Size) - :: "at", "ra", "v0", "memory" - ); - } - #endif +#include +#include +#include +#if defined(__ANDROID__) && defined(__LP64__) +/* + * clear_mips_cache - Invalidates instruction cache for Mips. + */ +static void clear_mips_cache(const void *Addr, size_t Size) { + __asm__ volatile( + ".set push\n" + ".set noreorder\n" + ".set noat\n" + "beq %[Size], $zero, 20f\n" /* If size == 0, branch around. */ + "nop\n" + "daddu %[Size], %[Addr], %[Size]\n" /* Calculate end address + 1 */ + "rdhwr $v0, $1\n" /* Get step size for SYNCI. + $1 is $HW_SYNCI_Step */ + "beq $v0, $zero, 20f\n" /* If no caches require + synchronization, branch + around. */ + "nop\n" + "10:\n" + "synci 0(%[Addr])\n" /* Synchronize all caches around + address. */ + "daddu %[Addr], %[Addr], $v0\n" /* Add step size. */ + "sltu $at, %[Addr], %[Size]\n" /* Compare current with end + address. */ + "bne $at, $zero, 10b\n" /* Branch if more to do. */ + "nop\n" + "sync\n" /* Clear memory hazards. */ + "20:\n" + "bal 30f\n" + "nop\n" + "30:\n" + "daddiu $ra, $ra, 12\n" /* $ra has a value of $pc here. + Add offset of 12 to point to the + instruction after the last nop. + */ + "jr.hb $ra\n" /* Return, clearing instruction + hazards. */ + "nop\n" + ".set pop\n" + : [ Addr ] "+r"(Addr), [ Size ] "+r"(Size)::"at", "ra", "v0", "memory"); +} +#endif #endif /* - * The compiler generates calls to __clear_cache() when creating + * The compiler generates calls to __clear_cache() when creating * trampoline functions on the stack for use with nested functions. - * It is expected to invalidate the instruction cache for the + * It is expected to invalidate the instruction cache for the * specified range. */ @@ -101,56 +99,55 @@ * so there is nothing to do */ #elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) - FlushInstructionCache(GetCurrentProcess(), start, end - start); + FlushInstructionCache(GetCurrentProcess(), start, end - start); #elif defined(__arm__) && !defined(__APPLE__) - #if defined(__FreeBSD__) || defined(__NetBSD__) - struct arm_sync_icache_args arg; +#if defined(__FreeBSD__) || defined(__NetBSD__) + struct arm_sync_icache_args arg; - arg.addr = (uintptr_t)start; - arg.len = (uintptr_t)end - (uintptr_t)start; + arg.addr = (uintptr_t)start; + arg.len = (uintptr_t)end - (uintptr_t)start; - sysarch(ARM_SYNC_ICACHE, &arg); - #elif defined(__linux__) - /* - * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but - * it also brought many other unused defines, as well as a dependency on - * kernel headers to be installed. - * - * This value is stable at least since Linux 3.13 and should remain so for - * compatibility reasons, warranting it's re-definition here. - */ - #define __ARM_NR_cacheflush 0x0f0002 - register int start_reg __asm("r0") = (int) (intptr_t) start; - const register int end_reg __asm("r1") = (int) (intptr_t) end; - const register int flags __asm("r2") = 0; - const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush; - __asm __volatile("svc 0x0" - : "=r"(start_reg) - : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), - "r"(flags)); - assert(start_reg == 0 && "Cache flush syscall failed."); - #else - compilerrt_abort(); - #endif + sysarch(ARM_SYNC_ICACHE, &arg); +#elif defined(__linux__) +/* + * We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but + * it also brought many other unused defines, as well as a dependency on + * kernel headers to be installed. + * + * This value is stable at least since Linux 3.13 and should remain so for + * compatibility reasons, warranting it's re-definition here. + */ +#define __ARM_NR_cacheflush 0x0f0002 + register int start_reg __asm("r0") = (int)(intptr_t)start; + const register int end_reg __asm("r1") = (int)(intptr_t)end; + const register int flags __asm("r2") = 0; + const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush; + __asm __volatile("svc 0x0" + : "=r"(start_reg) + : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags)); + assert(start_reg == 0 && "Cache flush syscall failed."); +#else + compilerrt_abort(); +#endif #elif defined(__linux__) && defined(__mips__) - const uintptr_t start_int = (uintptr_t) start; - const uintptr_t end_int = (uintptr_t) end; - #if defined(__ANDROID__) && defined(__LP64__) - // Call synci implementation for short address range. - const uintptr_t address_range_limit = 256; - if ((end_int - start_int) <= address_range_limit) { - clear_mips_cache(start, (end_int - start_int)); - } else { - syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); - } - #else - syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); - #endif + const uintptr_t start_int = (uintptr_t)start; + const uintptr_t end_int = (uintptr_t)end; +#if defined(__ANDROID__) && defined(__LP64__) + // Call synci implementation for short address range. + const uintptr_t address_range_limit = 256; + if ((end_int - start_int) <= address_range_limit) { + clear_mips_cache(start, (end_int - start_int)); + } else { + syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); + } +#else + syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE); +#endif #elif defined(__mips__) && defined(__OpenBSD__) cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE); #elif defined(__aarch64__) && !defined(__APPLE__) - uint64_t xstart = (uint64_t)(uintptr_t) start; - uint64_t xend = (uint64_t)(uintptr_t) end; + uint64_t xstart = (uint64_t)(uintptr_t)start; + uint64_t xend = (uint64_t)(uintptr_t)end; uint64_t addr; // Get Cache Type Info @@ -164,15 +161,15 @@ const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); for (addr = xstart & ~(dcache_line_size - 1); addr < xend; addr += dcache_line_size) - __asm __volatile("dc cvau, %0" :: "r"(addr)); + __asm __volatile("dc cvau, %0" ::"r"(addr)); __asm __volatile("dsb ish"); const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); for (addr = xstart & ~(icache_line_size - 1); addr < xend; addr += icache_line_size) - __asm __volatile("ic ivau, %0" :: "r"(addr)); + __asm __volatile("ic ivau, %0" ::"r"(addr)); __asm __volatile("isb sy"); -#elif defined (__powerpc64__) +#elif defined(__powerpc64__) const size_t line_size = 32; const size_t len = (uintptr_t)end - (uintptr_t)start; @@ -188,11 +185,11 @@ __asm__ volatile("icbi 0, %0" : : "r"(line)); __asm__ volatile("isync"); #else - #if __APPLE__ - /* On Darwin, sys_icache_invalidate() provides this functionality */ - sys_icache_invalidate(start, end-start); - #else - compilerrt_abort(); - #endif +#if __APPLE__ + /* On Darwin, sys_icache_invalidate() provides this functionality */ + sys_icache_invalidate(start, end - start); +#else + compilerrt_abort(); +#endif #endif } Index: compiler-rt/trunk/lib/builtins/clzdi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/clzdi2.c +++ compiler-rt/trunk/lib/builtins/clzdi2.c @@ -16,8 +16,7 @@ /* Returns: the number of leading 0-bits */ #if !defined(__clang__) && \ - ((defined(__sparc__) && defined(__arch64__)) || \ - defined(__mips64) || \ + ((defined(__sparc__) && defined(__arch64__)) || defined(__mips64) || \ (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) /* On 64-bit architectures with neither a native clz instruction nor a native * ctz instruction, gcc resolves __builtin_clz to __clzdi2 rather than @@ -28,12 +27,10 @@ /* Precondition: a != 0 */ -COMPILER_RT_ABI si_int -__clzdi2(di_int a) -{ - dwords x; - x.all = a; - const si_int f = -(x.s.high == 0); - return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) + - (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +COMPILER_RT_ABI si_int __clzdi2(di_int a) { + dwords x; + x.all = a; + const si_int f = -(x.s.high == 0); + return __builtin_clz((x.s.high & ~f) | (x.s.low & f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); } Index: compiler-rt/trunk/lib/builtins/clzsi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/clzsi2.c +++ compiler-rt/trunk/lib/builtins/clzsi2.c @@ -17,36 +17,34 @@ /* Precondition: a != 0 */ -COMPILER_RT_ABI si_int -__clzsi2(si_int a) -{ - su_int x = (su_int)a; - si_int t = ((x & 0xFFFF0000) == 0) << 4; /* if (x is small) t = 16 else 0 */ - x >>= 16 - t; /* x = [0 - 0xFFFF] */ - su_int r = t; /* r = [0, 16] */ - /* return r + clz(x) */ - t = ((x & 0xFF00) == 0) << 3; - x >>= 8 - t; /* x = [0 - 0xFF] */ - r += t; /* r = [0, 8, 16, 24] */ - /* return r + clz(x) */ - t = ((x & 0xF0) == 0) << 2; - x >>= 4 - t; /* x = [0 - 0xF] */ - r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ - /* return r + clz(x) */ - t = ((x & 0xC) == 0) << 1; - x >>= 2 - t; /* x = [0 - 3] */ - r += t; /* r = [0 - 30] and is even */ - /* return r + clz(x) */ -/* switch (x) - * { - * case 0: - * return r + 2; - * case 1: - * return r + 1; - * case 2: - * case 3: - * return r; - * } - */ - return r + ((2 - x) & -((x & 2) == 0)); +COMPILER_RT_ABI si_int __clzsi2(si_int a) { + su_int x = (su_int)a; + si_int t = ((x & 0xFFFF0000) == 0) << 4; /* if (x is small) t = 16 else 0 */ + x >>= 16 - t; /* x = [0 - 0xFFFF] */ + su_int r = t; /* r = [0, 16] */ + /* return r + clz(x) */ + t = ((x & 0xFF00) == 0) << 3; + x >>= 8 - t; /* x = [0 - 0xFF] */ + r += t; /* r = [0, 8, 16, 24] */ + /* return r + clz(x) */ + t = ((x & 0xF0) == 0) << 2; + x >>= 4 - t; /* x = [0 - 0xF] */ + r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ + /* return r + clz(x) */ + t = ((x & 0xC) == 0) << 1; + x >>= 2 - t; /* x = [0 - 3] */ + r += t; /* r = [0 - 30] and is even */ + /* return r + clz(x) */ + /* switch (x) + * { + * case 0: + * return r + 2; + * case 1: + * return r + 1; + * case 2: + * case 3: + * return r; + * } + */ + return r + ((2 - x) & -((x & 2) == 0)); } Index: compiler-rt/trunk/lib/builtins/clzti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/clzti2.c +++ compiler-rt/trunk/lib/builtins/clzti2.c @@ -19,14 +19,12 @@ /* Precondition: a != 0 */ -COMPILER_RT_ABI si_int -__clzti2(ti_int a) -{ - twords x; - x.all = a; - const di_int f = -(x.s.high == 0); - return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + - ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +COMPILER_RT_ABI si_int __clzti2(ti_int a) { + twords x; + x.all = a; + const di_int f = -(x.s.high == 0); + return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/cmpdi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/cmpdi2.c +++ compiler-rt/trunk/lib/builtins/cmpdi2.c @@ -14,37 +14,32 @@ #include "int_lib.h" /* Returns: if (a < b) returns 0 -* if (a == b) returns 1 -* if (a > b) returns 2 -*/ + * if (a == b) returns 1 + * if (a > b) returns 2 + */ -COMPILER_RT_ABI si_int -__cmpdi2(di_int a, di_int b) -{ - dwords x; - x.all = a; - dwords y; - y.all = b; - if (x.s.high < y.s.high) - return 0; - if (x.s.high > y.s.high) - return 2; - if (x.s.low < y.s.low) - return 0; - if (x.s.low > y.s.low) - return 2; - return 1; +COMPILER_RT_ABI si_int __cmpdi2(di_int a, di_int b) { + dwords x; + x.all = a; + dwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; } #ifdef __ARM_EABI__ /* Returns: if (a < b) returns -1 -* if (a == b) returns 0 -* if (a > b) returns 1 -*/ -COMPILER_RT_ABI si_int -__aeabi_lcmp(di_int a, di_int b) -{ - return __cmpdi2(a, b) - 1; + * if (a == b) returns 0 + * if (a > b) returns 1 + */ +COMPILER_RT_ABI si_int __aeabi_lcmp(di_int a, di_int b) { + return __cmpdi2(a, b) - 1; } #endif - Index: compiler-rt/trunk/lib/builtins/cmpti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/cmpti2.c +++ compiler-rt/trunk/lib/builtins/cmpti2.c @@ -20,22 +20,20 @@ * if (a > b) returns 2 */ -COMPILER_RT_ABI si_int -__cmpti2(ti_int a, ti_int b) -{ - twords x; - x.all = a; - twords y; - y.all = b; - if (x.s.high < y.s.high) - return 0; - if (x.s.high > y.s.high) - return 2; - if (x.s.low < y.s.low) - return 0; - if (x.s.low > y.s.low) - return 2; - return 1; +COMPILER_RT_ABI si_int __cmpti2(ti_int a, ti_int b) { + twords x; + x.all = a; + twords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/comparedf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/comparedf2.c +++ compiler-rt/trunk/lib/builtins/comparedf2.c @@ -39,44 +39,46 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -enum LE_RESULT { - LE_LESS = -1, - LE_EQUAL = 0, - LE_GREATER = 1, - LE_UNORDERED = 1 -}; +enum LE_RESULT { LE_LESS = -1, LE_EQUAL = 0, LE_GREATER = 1, LE_UNORDERED = 1 }; + +COMPILER_RT_ABI enum LE_RESULT __ledf2(fp_t a, fp_t b) { -COMPILER_RT_ABI enum LE_RESULT -__ledf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - // If either a or b is NaN, they are unordered. - if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; - - // If a and b are both zeros, they are equal. - if ((aAbs | bAbs) == 0) return LE_EQUAL; - - // If at least one of a and b is positive, we get the same result comparing - // a and b as signed integers as we would with a floating-point compare. - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } - - // Otherwise, both are negative, so we need to flip the sense of the - // comparison to get the correct result. (This assumes a twos- or ones- - // complement integer representation; if integers are represented in a - // sign-magnitude representation, then this flip is incorrect). - else { - if (aInt > bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) + return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) + return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a floating-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } + + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + else { + if (aInt > bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } } #if defined(__ELF__) @@ -85,67 +87,59 @@ #endif enum GE_RESULT { - GE_LESS = -1, - GE_EQUAL = 0, - GE_GREATER = 1, - GE_UNORDERED = -1 // Note: different from LE_UNORDERED + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED }; -COMPILER_RT_ABI enum GE_RESULT -__gedf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; - if ((aAbs | bAbs) == 0) return GE_EQUAL; - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } else { - if (aInt > bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } -} +COMPILER_RT_ABI enum GE_RESULT __gedf2(fp_t a, fp_t b) { -COMPILER_RT_ABI int -__unorddf2(fp_t a, fp_t b) { - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - return aAbs > infRep || bAbs > infRep; + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) + return GE_UNORDERED; + if ((aAbs | bAbs) == 0) + return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } else { + if (aInt > bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } +} + +COMPILER_RT_ABI int __unorddf2(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; } // The following are alternative names for the preceding routines. -COMPILER_RT_ABI enum LE_RESULT -__eqdf2(fp_t a, fp_t b) { - return __ledf2(a, b); -} +COMPILER_RT_ABI enum LE_RESULT __eqdf2(fp_t a, fp_t b) { return __ledf2(a, b); } -COMPILER_RT_ABI enum LE_RESULT -__ltdf2(fp_t a, fp_t b) { - return __ledf2(a, b); -} +COMPILER_RT_ABI enum LE_RESULT __ltdf2(fp_t a, fp_t b) { return __ledf2(a, b); } -COMPILER_RT_ABI enum LE_RESULT -__nedf2(fp_t a, fp_t b) { - return __ledf2(a, b); -} +COMPILER_RT_ABI enum LE_RESULT __nedf2(fp_t a, fp_t b) { return __ledf2(a, b); } -COMPILER_RT_ABI enum GE_RESULT -__gtdf2(fp_t a, fp_t b) { - return __gedf2(a, b); -} +COMPILER_RT_ABI enum GE_RESULT __gtdf2(fp_t a, fp_t b) { return __gedf2(a, b); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) { - return __unorddf2(a, b); -} +AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) { return __unorddf2(a, b); } #else AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unorddf2); #endif Index: compiler-rt/trunk/lib/builtins/comparesf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/comparesf2.c +++ compiler-rt/trunk/lib/builtins/comparesf2.c @@ -39,44 +39,46 @@ #define SINGLE_PRECISION #include "fp_lib.h" -enum LE_RESULT { - LE_LESS = -1, - LE_EQUAL = 0, - LE_GREATER = 1, - LE_UNORDERED = 1 -}; +enum LE_RESULT { LE_LESS = -1, LE_EQUAL = 0, LE_GREATER = 1, LE_UNORDERED = 1 }; + +COMPILER_RT_ABI enum LE_RESULT __lesf2(fp_t a, fp_t b) { -COMPILER_RT_ABI enum LE_RESULT -__lesf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - // If either a or b is NaN, they are unordered. - if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; - - // If a and b are both zeros, they are equal. - if ((aAbs | bAbs) == 0) return LE_EQUAL; - - // If at least one of a and b is positive, we get the same result comparing - // a and b as signed integers as we would with a fp_ting-point compare. - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } - - // Otherwise, both are negative, so we need to flip the sense of the - // comparison to get the correct result. (This assumes a twos- or ones- - // complement integer representation; if integers are represented in a - // sign-magnitude representation, then this flip is incorrect). - else { - if (aInt > bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) + return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) + return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a fp_ting-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } + + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + else { + if (aInt > bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } } #if defined(__ELF__) @@ -85,67 +87,59 @@ #endif enum GE_RESULT { - GE_LESS = -1, - GE_EQUAL = 0, - GE_GREATER = 1, - GE_UNORDERED = -1 // Note: different from LE_UNORDERED + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED }; -COMPILER_RT_ABI enum GE_RESULT -__gesf2(fp_t a, fp_t b) { - - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; - if ((aAbs | bAbs) == 0) return GE_EQUAL; - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } else { - if (aInt > bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } -} +COMPILER_RT_ABI enum GE_RESULT __gesf2(fp_t a, fp_t b) { -COMPILER_RT_ABI int -__unordsf2(fp_t a, fp_t b) { - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - return aAbs > infRep || bAbs > infRep; + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) + return GE_UNORDERED; + if ((aAbs | bAbs) == 0) + return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } else { + if (aInt > bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } +} + +COMPILER_RT_ABI int __unordsf2(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; } // The following are alternative names for the preceding routines. -COMPILER_RT_ABI enum LE_RESULT -__eqsf2(fp_t a, fp_t b) { - return __lesf2(a, b); -} +COMPILER_RT_ABI enum LE_RESULT __eqsf2(fp_t a, fp_t b) { return __lesf2(a, b); } -COMPILER_RT_ABI enum LE_RESULT -__ltsf2(fp_t a, fp_t b) { - return __lesf2(a, b); -} +COMPILER_RT_ABI enum LE_RESULT __ltsf2(fp_t a, fp_t b) { return __lesf2(a, b); } -COMPILER_RT_ABI enum LE_RESULT -__nesf2(fp_t a, fp_t b) { - return __lesf2(a, b); -} +COMPILER_RT_ABI enum LE_RESULT __nesf2(fp_t a, fp_t b) { return __lesf2(a, b); } -COMPILER_RT_ABI enum GE_RESULT -__gtsf2(fp_t a, fp_t b) { - return __gesf2(a, b); -} +COMPILER_RT_ABI enum GE_RESULT __gtsf2(fp_t a, fp_t b) { return __gesf2(a, b); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) { - return __unordsf2(a, b); -} +AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) { return __unordsf2(a, b); } #else AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) COMPILER_RT_ALIAS(__unordsf2); #endif Index: compiler-rt/trunk/lib/builtins/comparetf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/comparetf2.c +++ compiler-rt/trunk/lib/builtins/comparetf2.c @@ -40,42 +40,44 @@ #include "fp_lib.h" #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -enum LE_RESULT { - LE_LESS = -1, - LE_EQUAL = 0, - LE_GREATER = 1, - LE_UNORDERED = 1 -}; +enum LE_RESULT { LE_LESS = -1, LE_EQUAL = 0, LE_GREATER = 1, LE_UNORDERED = 1 }; COMPILER_RT_ABI enum LE_RESULT __letf2(fp_t a, fp_t b) { - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - // If either a or b is NaN, they are unordered. - if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED; - - // If a and b are both zeros, they are equal. - if ((aAbs | bAbs) == 0) return LE_EQUAL; - - // If at least one of a and b is positive, we get the same result comparing - // a and b as signed integers as we would with a floating-point compare. - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } - else { - // Otherwise, both are negative, so we need to flip the sense of the - // comparison to get the correct result. (This assumes a twos- or ones- - // complement integer representation; if integers are represented in a - // sign-magnitude representation, then this flip is incorrect). - if (aInt > bInt) return LE_LESS; - else if (aInt == bInt) return LE_EQUAL; - else return LE_GREATER; - } + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) + return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) + return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a floating-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } else { + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + if (aInt > bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } } #if defined(__ELF__) @@ -84,54 +86,54 @@ #endif enum GE_RESULT { - GE_LESS = -1, - GE_EQUAL = 0, - GE_GREATER = 1, - GE_UNORDERED = -1 // Note: different from LE_UNORDERED + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED }; COMPILER_RT_ABI enum GE_RESULT __getf2(fp_t a, fp_t b) { - const srep_t aInt = toRep(a); - const srep_t bInt = toRep(b); - const rep_t aAbs = aInt & absMask; - const rep_t bAbs = bInt & absMask; - - if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED; - if ((aAbs | bAbs) == 0) return GE_EQUAL; - if ((aInt & bInt) >= 0) { - if (aInt < bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } else { - if (aInt > bInt) return GE_LESS; - else if (aInt == bInt) return GE_EQUAL; - else return GE_GREATER; - } + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) + return GE_UNORDERED; + if ((aAbs | bAbs) == 0) + return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } else { + if (aInt > bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } } COMPILER_RT_ABI int __unordtf2(fp_t a, fp_t b) { - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - return aAbs > infRep || bAbs > infRep; + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; } // The following are alternative names for the preceding routines. -COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) { - return __letf2(a, b); -} +COMPILER_RT_ABI enum LE_RESULT __eqtf2(fp_t a, fp_t b) { return __letf2(a, b); } -COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) { - return __letf2(a, b); -} +COMPILER_RT_ABI enum LE_RESULT __lttf2(fp_t a, fp_t b) { return __letf2(a, b); } -COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) { - return __letf2(a, b); -} +COMPILER_RT_ABI enum LE_RESULT __netf2(fp_t a, fp_t b) { return __letf2(a, b); } -COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) { - return __getf2(a, b); -} +COMPILER_RT_ABI enum GE_RESULT __gttf2(fp_t a, fp_t b) { return __getf2(a, b); } #endif Index: compiler-rt/trunk/lib/builtins/cpu_model.c =================================================================== --- compiler-rt/trunk/lib/builtins/cpu_model.c +++ compiler-rt/trunk/lib/builtins/cpu_model.c @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#if (defined(__i386__) || defined(_M_IX86) || \ - defined(__x86_64__) || defined(_M_X64)) && \ +#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ + defined(_M_X64)) && \ (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) #include @@ -267,11 +267,11 @@ } } -static void -getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, - unsigned Brand_id, unsigned Features, - unsigned Features2, unsigned *Type, - unsigned *Subtype) { +static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Brand_id, + unsigned Features, + unsigned Features2, unsigned *Type, + unsigned *Subtype) { if (Brand_id != 0) return; switch (Family) { @@ -297,7 +297,7 @@ case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. // As found in a Summer 2010 model iMac. case 0x1f: - case 0x2e: // Nehalem EX + case 0x2e: // Nehalem EX *Type = INTEL_COREI7; // "nehalem" *Subtype = INTEL_COREI7_NEHALEM; break; @@ -315,7 +315,7 @@ *Subtype = INTEL_COREI7_SANDYBRIDGE; break; case 0x3a: - case 0x3e: // Ivy Bridge EP + case 0x3e: // Ivy Bridge EP *Type = INTEL_COREI7; // "ivybridge" *Subtype = INTEL_COREI7_IVYBRIDGE; break; @@ -339,10 +339,10 @@ break; // Skylake: - case 0x4e: // Skylake mobile - case 0x5e: // Skylake desktop - case 0x8e: // Kaby Lake mobile - case 0x9e: // Kaby Lake desktop + case 0x4e: // Skylake mobile + case 0x5e: // Skylake desktop + case 0x8e: // Kaby Lake mobile + case 0x9e: // Kaby Lake desktop *Type = INTEL_COREI7; // "skylake" *Subtype = INTEL_COREI7_SKYLAKE; break; @@ -398,7 +398,7 @@ default: // Unknown family 6 CPU. break; - break; + break; } default: break; // Unknown. @@ -474,12 +474,12 @@ unsigned Features2 = 0; unsigned EAX, EBX; -#define setFeature(F) \ - do { \ - if (F < 32) \ - Features |= 1U << (F & 0x1f); \ - else if (F < 64) \ - Features2 |= 1U << ((F - 32) & 0x1f); \ +#define setFeature(F) \ + do { \ + if (F < 32) \ + Features |= 1U << (F & 0x1f); \ + else if (F < 64) \ + Features2 |= 1U << ((F - 32) & 0x1f); \ } while (0) if ((EDX >> 15) & 1) @@ -618,8 +618,7 @@ the priority set. However, it still runs after ifunc initializers and needs to be called explicitly there. */ -int CONSTRUCTOR_ATTRIBUTE -__cpu_indicator_init(void) { +int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { unsigned EAX, EBX, ECX, EDX; unsigned MaxLeaf = 5; unsigned Vendor; @@ -651,8 +650,7 @@ if (Vendor == SIG_INTEL) { /* Get CPU type. */ getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, - Features2, - &(__cpu_model.__cpu_type), + Features2, &(__cpu_model.__cpu_type), &(__cpu_model.__cpu_subtype)); __cpu_model.__cpu_vendor = VENDOR_INTEL; } else if (Vendor == SIG_AMD) { Index: compiler-rt/trunk/lib/builtins/ctzdi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/ctzdi2.c +++ compiler-rt/trunk/lib/builtins/ctzdi2.c @@ -16,8 +16,7 @@ /* Returns: the number of trailing 0-bits */ #if !defined(__clang__) && \ - ((defined(__sparc__) && defined(__arch64__)) || \ - defined(__mips64) || \ + ((defined(__sparc__) && defined(__arch64__)) || defined(__mips64) || \ (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) /* On 64-bit architectures with neither a native clz instruction nor a native * ctz instruction, gcc resolves __builtin_ctz to __ctzdi2 rather than @@ -28,12 +27,10 @@ /* Precondition: a != 0 */ -COMPILER_RT_ABI si_int -__ctzdi2(di_int a) -{ - dwords x; - x.all = a; - const si_int f = -(x.s.low == 0); - return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) + - (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +COMPILER_RT_ABI si_int __ctzdi2(di_int a) { + dwords x; + x.all = a; + const si_int f = -(x.s.low == 0); + return __builtin_ctz((x.s.high & f) | (x.s.low & ~f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); } Index: compiler-rt/trunk/lib/builtins/ctzsi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/ctzsi2.c +++ compiler-rt/trunk/lib/builtins/ctzsi2.c @@ -17,40 +17,39 @@ /* Precondition: a != 0 */ -COMPILER_RT_ABI si_int -__ctzsi2(si_int a) -{ - su_int x = (su_int)a; - si_int t = ((x & 0x0000FFFF) == 0) << 4; /* if (x has no small bits) t = 16 else 0 */ - x >>= t; /* x = [0 - 0xFFFF] + higher garbage bits */ - su_int r = t; /* r = [0, 16] */ - /* return r + ctz(x) */ - t = ((x & 0x00FF) == 0) << 3; - x >>= t; /* x = [0 - 0xFF] + higher garbage bits */ - r += t; /* r = [0, 8, 16, 24] */ - /* return r + ctz(x) */ - t = ((x & 0x0F) == 0) << 2; - x >>= t; /* x = [0 - 0xF] + higher garbage bits */ - r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ - /* return r + ctz(x) */ - t = ((x & 0x3) == 0) << 1; - x >>= t; - x &= 3; /* x = [0 - 3] */ - r += t; /* r = [0 - 30] and is even */ - /* return r + ctz(x) */ +COMPILER_RT_ABI si_int __ctzsi2(si_int a) { + su_int x = (su_int)a; + si_int t = ((x & 0x0000FFFF) == 0) + << 4; /* if (x has no small bits) t = 16 else 0 */ + x >>= t; /* x = [0 - 0xFFFF] + higher garbage bits */ + su_int r = t; /* r = [0, 16] */ + /* return r + ctz(x) */ + t = ((x & 0x00FF) == 0) << 3; + x >>= t; /* x = [0 - 0xFF] + higher garbage bits */ + r += t; /* r = [0, 8, 16, 24] */ + /* return r + ctz(x) */ + t = ((x & 0x0F) == 0) << 2; + x >>= t; /* x = [0 - 0xF] + higher garbage bits */ + r += t; /* r = [0, 4, 8, 12, 16, 20, 24, 28] */ + /* return r + ctz(x) */ + t = ((x & 0x3) == 0) << 1; + x >>= t; + x &= 3; /* x = [0 - 3] */ + r += t; /* r = [0 - 30] and is even */ + /* return r + ctz(x) */ -/* The branch-less return statement below is equivalent - * to the following switch statement: - * switch (x) - * { - * case 0: - * return r + 2; - * case 2: - * return r + 1; - * case 1: - * case 3: - * return r; - * } - */ - return r + ((2 - (x >> 1)) & -((x & 1) == 0)); + /* The branch-less return statement below is equivalent + * to the following switch statement: + * switch (x) + * { + * case 0: + * return r + 2; + * case 2: + * return r + 1; + * case 1: + * case 3: + * return r; + * } + */ + return r + ((2 - (x >> 1)) & -((x & 1) == 0)); } Index: compiler-rt/trunk/lib/builtins/ctzti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/ctzti2.c +++ compiler-rt/trunk/lib/builtins/ctzti2.c @@ -19,14 +19,12 @@ /* Precondition: a != 0 */ -COMPILER_RT_ABI si_int -__ctzti2(ti_int a) -{ - twords x; - x.all = a; - const di_int f = -(x.s.low == 0); - return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + - ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +COMPILER_RT_ABI si_int __ctzti2(ti_int a) { + twords x; + x.all = a; + const di_int f = -(x.s.low == 0); + return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/divdc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divdc3.c +++ compiler-rt/trunk/lib/builtins/divdc3.c @@ -18,44 +18,37 @@ /* Returns: the quotient of (a + ib) / (c + id) */ -COMPILER_RT_ABI Dcomplex -__divdc3(double __a, double __b, double __c, double __d) -{ - int __ilogbw = 0; - double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); - if (crt_isfinite(__logbw)) - { - __ilogbw = (int)__logbw; - __c = crt_scalbn(__c, -__ilogbw); - __d = crt_scalbn(__d, -__ilogbw); +COMPILER_RT_ABI Dcomplex __divdc3(double __a, double __b, double __c, + double __d) { + int __ilogbw = 0; + double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); + if (crt_isfinite(__logbw)) { + __ilogbw = (int)__logbw; + __c = crt_scalbn(__c, -__ilogbw); + __d = crt_scalbn(__d, -__ilogbw); + } + double __denom = __c * __c + __d * __d; + Dcomplex z; + COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = + crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) { + COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b; + } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && + crt_isfinite(__d)) { + __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a); + __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) && + crt_isfinite(__b)) { + __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c); + __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d); + COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); } - double __denom = __c * __c + __d * __d; - Dcomplex z; - COMPLEX_REAL(z) = crt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); - COMPLEX_IMAGINARY(z) = crt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) - { - COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a; - COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b; - } - else if ((crt_isinf(__a) || crt_isinf(__b)) && - crt_isfinite(__c) && crt_isfinite(__d)) - { - __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a); - __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b); - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); - } - else if (crt_isinf(__logbw) && __logbw > 0.0 && - crt_isfinite(__a) && crt_isfinite(__b)) - { - __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c); - __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d); - COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); - } - } - return z; + } + return z; } Index: compiler-rt/trunk/lib/builtins/divdf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divdf3.c +++ compiler-rt/trunk/lib/builtins/divdf3.c @@ -18,186 +18,194 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -COMPILER_RT_ABI fp_t -__divdf3(fp_t a, fp_t b) { +COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN / anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything / NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity / infinity = NaN - if (bAbs == infRep) return fromRep(qnanRep); - // infinity / anything else = +/- infinity - else return fromRep(aAbs | quotientSign); - } - - // anything else / infinity = +/- 0 - if (bAbs == infRep) return fromRep(quotientSign); - - if (!aAbs) { - // zero / zero = NaN - if (!bAbs) return fromRep(qnanRep); - // zero / anything else = +/- zero - else return fromRep(quotientSign); - } - // anything else / zero = +/- infinity - if (!bAbs) return fromRep(infRep | quotientSign); - - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) scale += normalize(&aSignificand); - if (bAbs < implicitBit) scale -= normalize(&bSignificand); + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) + return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else + return fromRep(aAbs | quotientSign); } - // Or in the implicit significand bit. (If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) - aSignificand |= implicitBit; - bSignificand |= implicitBit; - int quotientExponent = aExponent - bExponent + scale; - - // Align the significand of b as a Q31 fixed-point number in the range - // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - const uint32_t q31b = bSignificand >> 21; - uint32_t recip32 = UINT32_C(0x7504f333) - q31b; - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: - // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration, so after three iterations, we have about 28 binary - // digits of accuracy. - uint32_t correction32; - correction32 = -((uint64_t)recip32 * q31b >> 32); - recip32 = (uint64_t)recip32 * correction32 >> 31; - correction32 = -((uint64_t)recip32 * q31b >> 32); - recip32 = (uint64_t)recip32 * correction32 >> 31; - correction32 = -((uint64_t)recip32 * q31b >> 32); - recip32 = (uint64_t)recip32 * correction32 >> 31; - - // recip32 might have overflowed to exactly zero in the preceding - // computation if the high word of b is exactly 1.0. This would sabotage - // the full-width final stage of the computation that follows, so we adjust - // recip32 downward by one bit. - recip32--; - - // We need to perform one more iteration to get us to 56 binary digits; - // The last iteration needs to happen with extra precision. - const uint32_t q63blo = bSignificand << 11; - uint64_t correction, reciprocal; - correction = -((uint64_t)recip32*q31b + ((uint64_t)recip32*q63blo >> 32)); - uint32_t cHi = correction >> 32; - uint32_t cLo = correction; - reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32); - - // We already adjusted the 32-bit estimate, now we need to adjust the final - // 64-bit reciprocal estimate downward to ensure that it is strictly smaller - // than the infinitely precise exact reciprocal. Because the computation - // of the Newton-Raphson step is truncating at every step, this adjustment - // is small; most of the work is already done. - reciprocal -= 2; - - // The numerical reciprocal is accurate to within 2^-56, lies in the - // interval [0.5, 1.0), and is strictly smaller than the true reciprocal - // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b - // in Q53 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0.5, 2.0) - // 3. the error in q is bounded away from 2^-53 (actually, we have a - // couple of bits to spare, but this is all we need). - - // We need a 64 x 64 multiply high to compute q, which isn't a basic - // operation in C, so we need to be a little bit fussy. - rep_t quotient, quotientLo; - wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b - // - // We know from the construction of q that r satisfies: - // - // 0 <= r < ulp(q)*b - // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - rep_t residual; - if (quotient < (implicitBit << 1)) { - residual = (aSignificand << 53) - quotient * bSignificand; - quotientExponent--; - } else { - quotient >>= 1; - residual = (aSignificand << 52) - quotient * bSignificand; - } - - const int writtenExponent = quotientExponent + exponentBias; - - if (writtenExponent >= maxExponent) { - // If we have overflowed the exponent, return infinity. - return fromRep(infRep | quotientSign); - } - - else if (writtenExponent < 1) { - if (writtenExponent == 0) { - // Check whether the rounded result is normal. - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit. - rep_t absResult = quotient & significandMask; - // Round. - absResult += round; - if (absResult & ~significandMask) { - // The rounded result is normal; return it. - return fromRep(absResult | quotientSign); - } - } - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. + // anything else / infinity = +/- 0 + if (bAbs == infRep) + return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) + return fromRep(qnanRep); + // zero / anything else = +/- zero + else return fromRep(quotientSign); } - - else { - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit - rep_t absResult = quotient & significandMask; - // Insert the exponent - absResult |= (rep_t)writtenExponent << significandBits; - // Round - absResult += round; - // Insert the sign and return - const double result = fromRep(absResult | quotientSign); - return result; + // anything else / zero = +/- infinity + if (!bAbs) + return fromRep(infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale -= normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + int quotientExponent = aExponent - bExponent + scale; + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + const uint32_t q31b = bSignificand >> 21; + uint32_t recip32 = UINT32_C(0x7504f333) - q31b; + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + uint32_t correction32; + correction32 = -((uint64_t)recip32 * q31b >> 32); + recip32 = (uint64_t)recip32 * correction32 >> 31; + correction32 = -((uint64_t)recip32 * q31b >> 32); + recip32 = (uint64_t)recip32 * correction32 >> 31; + correction32 = -((uint64_t)recip32 * q31b >> 32); + recip32 = (uint64_t)recip32 * correction32 >> 31; + + // recip32 might have overflowed to exactly zero in the preceding + // computation if the high word of b is exactly 1.0. This would sabotage + // the full-width final stage of the computation that follows, so we adjust + // recip32 downward by one bit. + recip32--; + + // We need to perform one more iteration to get us to 56 binary digits; + // The last iteration needs to happen with extra precision. + const uint32_t q63blo = bSignificand << 11; + uint64_t correction, reciprocal; + correction = -((uint64_t)recip32 * q31b + ((uint64_t)recip32 * q63blo >> 32)); + uint32_t cHi = correction >> 32; + uint32_t cLo = correction; + reciprocal = (uint64_t)recip32 * cHi + ((uint64_t)recip32 * cLo >> 32); + + // We already adjusted the 32-bit estimate, now we need to adjust the final + // 64-bit reciprocal estimate downward to ensure that it is strictly smaller + // than the infinitely precise exact reciprocal. Because the computation + // of the Newton-Raphson step is truncating at every step, this adjustment + // is small; most of the work is already done. + reciprocal -= 2; + + // The numerical reciprocal is accurate to within 2^-56, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q53 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. the error in q is bounded away from 2^-53 (actually, we have a + // couple of bits to spare, but this is all we need). + + // We need a 64 x 64 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + rep_t quotient, quotientLo; + wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + rep_t residual; + if (quotient < (implicitBit << 1)) { + residual = (aSignificand << 53) - quotient * bSignificand; + quotientExponent--; + } else { + quotient >>= 1; + residual = (aSignificand << 52) - quotient * bSignificand; + } + + const int writtenExponent = quotientExponent + exponentBias; + + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return fromRep(infRep | quotientSign); + } + + else if (writtenExponent < 1) { + if (writtenExponent == 0) { + // Check whether the rounded result is normal. + const bool round = (residual << 1) > bSignificand; + // Clear the implicit bit. + rep_t absResult = quotient & significandMask; + // Round. + absResult += round; + if (absResult & ~significandMask) { + // The rounded result is normal; return it. + return fromRep(absResult | quotientSign); + } } + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return fromRep(quotientSign); + } + + else { + const bool round = (residual << 1) > bSignificand; + // Clear the implicit bit + rep_t absResult = quotient & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + // Round + absResult += round; + // Insert the sign and return + const double result = fromRep(absResult | quotientSign); + return result; + } } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { - return __divdf3(a, b); -} +AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { return __divdf3(a, b); } #else AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divdf3); #endif Index: compiler-rt/trunk/lib/builtins/divdi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divdi3.c +++ compiler-rt/trunk/lib/builtins/divdi3.c @@ -15,14 +15,13 @@ /* Returns: a / b */ -COMPILER_RT_ABI di_int -__divdi3(di_int a, di_int b) -{ - const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; - di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */ - di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */ - a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ - b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ - s_a ^= s_b; /*sign of quotient */ - return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) { + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s_a = a >> bits_in_dword_m1; /* s_a = a < 0 ? -1 : 0 */ + di_int s_b = b >> bits_in_dword_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /*sign of quotient */ + return (__udivmoddi4(a, b, (du_int *)0) ^ s_a) - + s_a; /* negate if s_a == -1 */ } Index: compiler-rt/trunk/lib/builtins/divmoddi4.c =================================================================== --- compiler-rt/trunk/lib/builtins/divmoddi4.c +++ compiler-rt/trunk/lib/builtins/divmoddi4.c @@ -15,10 +15,8 @@ /* Returns: a / b, *rem = a % b */ -COMPILER_RT_ABI di_int -__divmoddi4(di_int a, di_int b, di_int* rem) -{ - di_int d = __divdi3(a,b); - *rem = a - (d*b); +COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int *rem) { + di_int d = __divdi3(a, b); + *rem = a - (d * b); return d; } Index: compiler-rt/trunk/lib/builtins/divmodsi4.c =================================================================== --- compiler-rt/trunk/lib/builtins/divmodsi4.c +++ compiler-rt/trunk/lib/builtins/divmodsi4.c @@ -15,12 +15,8 @@ /* Returns: a / b, *rem = a % b */ -COMPILER_RT_ABI si_int -__divmodsi4(si_int a, si_int b, si_int* rem) -{ - si_int d = __divsi3(a,b); - *rem = a - (d*b); - return d; +COMPILER_RT_ABI si_int __divmodsi4(si_int a, si_int b, si_int *rem) { + si_int d = __divsi3(a, b); + *rem = a - (d * b); + return d; } - - Index: compiler-rt/trunk/lib/builtins/divsc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divsc3.c +++ compiler-rt/trunk/lib/builtins/divsc3.c @@ -18,45 +18,37 @@ /* Returns: the quotient of (a + ib) / (c + id) */ -COMPILER_RT_ABI Fcomplex -__divsc3(float __a, float __b, float __c, float __d) -{ - int __ilogbw = 0; - float __logbw = - __compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); - if (crt_isfinite(__logbw)) - { - __ilogbw = (int)__logbw; - __c = crt_scalbnf(__c, -__ilogbw); - __d = crt_scalbnf(__d, -__ilogbw); +COMPILER_RT_ABI Fcomplex __divsc3(float __a, float __b, float __c, float __d) { + int __ilogbw = 0; + float __logbw = + __compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); + if (crt_isfinite(__logbw)) { + __ilogbw = (int)__logbw; + __c = crt_scalbnf(__c, -__ilogbw); + __d = crt_scalbnf(__d, -__ilogbw); + } + float __denom = __c * __c + __d * __d; + Fcomplex z; + COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = + crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) { + COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b; + } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && + crt_isfinite(__d)) { + __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } else if (crt_isinf(__logbw) && __logbw > 0 && crt_isfinite(__a) && + crt_isfinite(__b)) { + __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); + COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); } - float __denom = __c * __c + __d * __d; - Fcomplex z; - COMPLEX_REAL(z) = crt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw); - COMPLEX_IMAGINARY(z) = crt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw); - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) - { - COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a; - COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b; - } - else if ((crt_isinf(__a) || crt_isinf(__b)) && - crt_isfinite(__c) && crt_isfinite(__d)) - { - __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); - } - else if (crt_isinf(__logbw) && __logbw > 0 && - crt_isfinite(__a) && crt_isfinite(__b)) - { - __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); - COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); - } - } - return z; + } + return z; } Index: compiler-rt/trunk/lib/builtins/divsf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divsf3.c +++ compiler-rt/trunk/lib/builtins/divsf3.c @@ -18,170 +18,178 @@ #define SINGLE_PRECISION #include "fp_lib.h" -COMPILER_RT_ABI fp_t -__divsf3(fp_t a, fp_t b) { +COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN / anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything / NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity / infinity = NaN - if (bAbs == infRep) return fromRep(qnanRep); - // infinity / anything else = +/- infinity - else return fromRep(aAbs | quotientSign); - } - - // anything else / infinity = +/- 0 - if (bAbs == infRep) return fromRep(quotientSign); - - if (!aAbs) { - // zero / zero = NaN - if (!bAbs) return fromRep(qnanRep); - // zero / anything else = +/- zero - else return fromRep(quotientSign); - } - // anything else / zero = +/- infinity - if (!bAbs) return fromRep(infRep | quotientSign); - - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) scale += normalize(&aSignificand); - if (bAbs < implicitBit) scale -= normalize(&bSignificand); + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) + return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else + return fromRep(aAbs | quotientSign); } - // Or in the implicit significand bit. (If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) - aSignificand |= implicitBit; - bSignificand |= implicitBit; - int quotientExponent = aExponent - bExponent + scale; - - // Align the significand of b as a Q31 fixed-point number in the range - // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - uint32_t q31b = bSignificand << 8; - uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: - // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration, so after three iterations, we have about 28 binary - // digits of accuracy. - uint32_t correction; - correction = -((uint64_t)reciprocal * q31b >> 32); - reciprocal = (uint64_t)reciprocal * correction >> 31; - correction = -((uint64_t)reciprocal * q31b >> 32); - reciprocal = (uint64_t)reciprocal * correction >> 31; - correction = -((uint64_t)reciprocal * q31b >> 32); - reciprocal = (uint64_t)reciprocal * correction >> 31; - - // Exhaustive testing shows that the error in reciprocal after three steps - // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our - // expectations. We bump the reciprocal by a tiny value to force the error - // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to - // be specific). This also causes 1/1 to give a sensible approximation - // instead of zero (due to overflow). - reciprocal -= 2; - - // The numerical reciprocal is accurate to within 2^-28, lies in the - // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller - // than the true reciprocal of b. Multiplying a by this reciprocal thus - // gives a numerical q = a/b in Q24 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) - // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes - // from the fact that we truncate the product, and the 2^27 term - // is the error in the reciprocal of b scaled by the maximum - // possible value of a. As a consequence of this error bound, - // either q or nextafter(q) is the correctly rounded - rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32; - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b - // - // We know from the construction of q that r satisfies: - // - // 0 <= r < ulp(q)*b - // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - rep_t residual; - if (quotient < (implicitBit << 1)) { - residual = (aSignificand << 24) - quotient * bSignificand; - quotientExponent--; - } else { - quotient >>= 1; - residual = (aSignificand << 23) - quotient * bSignificand; - } - - const int writtenExponent = quotientExponent + exponentBias; - - if (writtenExponent >= maxExponent) { - // If we have overflowed the exponent, return infinity. - return fromRep(infRep | quotientSign); - } - - else if (writtenExponent < 1) { - if (writtenExponent == 0) { - // Check whether the rounded result is normal. - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit. - rep_t absResult = quotient & significandMask; - // Round. - absResult += round; - if (absResult & ~significandMask) { - // The rounded result is normal; return it. - return fromRep(absResult | quotientSign); - } - } - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. + // anything else / infinity = +/- 0 + if (bAbs == infRep) + return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) + return fromRep(qnanRep); + // zero / anything else = +/- zero + else return fromRep(quotientSign); } - - else { - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit - rep_t absResult = quotient & significandMask; - // Insert the exponent - absResult |= (rep_t)writtenExponent << significandBits; - // Round - absResult += round; - // Insert the sign and return + // anything else / zero = +/- infinity + if (!bAbs) + return fromRep(infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale -= normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + int quotientExponent = aExponent - bExponent + scale; + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + uint32_t q31b = bSignificand << 8; + uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + uint32_t correction; + correction = -((uint64_t)reciprocal * q31b >> 32); + reciprocal = (uint64_t)reciprocal * correction >> 31; + correction = -((uint64_t)reciprocal * q31b >> 32); + reciprocal = (uint64_t)reciprocal * correction >> 31; + correction = -((uint64_t)reciprocal * q31b >> 32); + reciprocal = (uint64_t)reciprocal * correction >> 31; + + // Exhaustive testing shows that the error in reciprocal after three steps + // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our + // expectations. We bump the reciprocal by a tiny value to force the error + // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to + // be specific). This also causes 1/1 to give a sensible approximation + // instead of zero (due to overflow). + reciprocal -= 2; + + // The numerical reciprocal is accurate to within 2^-28, lies in the + // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller + // than the true reciprocal of b. Multiplying a by this reciprocal thus + // gives a numerical q = a/b in Q24 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) + // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes + // from the fact that we truncate the product, and the 2^27 term + // is the error in the reciprocal of b scaled by the maximum + // possible value of a. As a consequence of this error bound, + // either q or nextafter(q) is the correctly rounded + rep_t quotient = (uint64_t)reciprocal * (aSignificand << 1) >> 32; + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + rep_t residual; + if (quotient < (implicitBit << 1)) { + residual = (aSignificand << 24) - quotient * bSignificand; + quotientExponent--; + } else { + quotient >>= 1; + residual = (aSignificand << 23) - quotient * bSignificand; + } + + const int writtenExponent = quotientExponent + exponentBias; + + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return fromRep(infRep | quotientSign); + } + + else if (writtenExponent < 1) { + if (writtenExponent == 0) { + // Check whether the rounded result is normal. + const bool round = (residual << 1) > bSignificand; + // Clear the implicit bit. + rep_t absResult = quotient & significandMask; + // Round. + absResult += round; + if (absResult & ~significandMask) { + // The rounded result is normal; return it. return fromRep(absResult | quotientSign); + } } + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return fromRep(quotientSign); + } + + else { + const bool round = (residual << 1) > bSignificand; + // Clear the implicit bit + rep_t absResult = quotient & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + // Round + absResult += round; + // Insert the sign and return + return fromRep(absResult | quotientSign); + } } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { - return __divsf3(a, b); -} +AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { return __divsf3(a, b); } #else AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) COMPILER_RT_ALIAS(__divsf3); #endif Index: compiler-rt/trunk/lib/builtins/divsi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divsi3.c +++ compiler-rt/trunk/lib/builtins/divsi3.c @@ -15,22 +15,20 @@ /* Returns: a / b */ -COMPILER_RT_ABI si_int -__divsi3(si_int a, si_int b) -{ - const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; - si_int s_a = a >> bits_in_word_m1; /* s_a = a < 0 ? -1 : 0 */ - si_int s_b = b >> bits_in_word_m1; /* s_b = b < 0 ? -1 : 0 */ - a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ - b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ - s_a ^= s_b; /* sign of quotient */ - /* - * On CPUs without unsigned hardware division support, - * this calls __udivsi3 (notice the cast to su_int). - * On CPUs with unsigned hardware division support, - * this uses the unsigned division instruction. - */ - return ((su_int)a/(su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */ +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b) { + const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; + si_int s_a = a >> bits_in_word_m1; /* s_a = a < 0 ? -1 : 0 */ + si_int s_b = b >> bits_in_word_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /* sign of quotient */ + /* + * On CPUs without unsigned hardware division support, + * this calls __udivsi3 (notice the cast to su_int). + * On CPUs with unsigned hardware division support, + * this uses the unsigned division instruction. + */ + return ((su_int)a / (su_int)b ^ s_a) - s_a; /* negate if s_a == -1 */ } #if defined(__ARM_EABI__) Index: compiler-rt/trunk/lib/builtins/divtc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divtc3.c +++ compiler-rt/trunk/lib/builtins/divtc3.c @@ -18,45 +18,38 @@ /* Returns: the quotient of (a + ib) / (c + id) */ -COMPILER_RT_ABI Lcomplex -__divtc3(long double __a, long double __b, long double __c, long double __d) -{ - int __ilogbw = 0; - long double __logbw = - __compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); - if (crt_isfinite(__logbw)) - { - __ilogbw = (int)__logbw; - __c = crt_scalbnl(__c, -__ilogbw); - __d = crt_scalbnl(__d, -__ilogbw); +COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b, + long double __c, long double __d) { + int __ilogbw = 0; + long double __logbw = + __compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + if (crt_isfinite(__logbw)) { + __ilogbw = (int)__logbw; + __c = crt_scalbnl(__c, -__ilogbw); + __d = crt_scalbnl(__d, -__ilogbw); + } + long double __denom = __c * __c + __d * __d; + Lcomplex z; + COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = + crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) { + COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; + } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && + crt_isfinite(__d)) { + __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) && + crt_isfinite(__b)) { + __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d); + COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); } - long double __denom = __c * __c + __d * __d; - Lcomplex z; - COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); - COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) - { - COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; - COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; - } - else if ((crt_isinf(__a) || crt_isinf(__b)) && - crt_isfinite(__c) && crt_isfinite(__d)) - { - __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a); - __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b); - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); - } - else if (crt_isinf(__logbw) && __logbw > 0.0 && - crt_isfinite(__a) && crt_isfinite(__b)) - { - __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c); - __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d); - COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); - } - } - return z; + } + return z; } Index: compiler-rt/trunk/lib/builtins/divtf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divtf3.c +++ compiler-rt/trunk/lib/builtins/divtf3.c @@ -21,194 +21,203 @@ #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN / anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything / NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity / infinity = NaN - if (bAbs == infRep) return fromRep(qnanRep); - // infinity / anything else = +/- infinity - else return fromRep(aAbs | quotientSign); - } - - // anything else / infinity = +/- 0 - if (bAbs == infRep) return fromRep(quotientSign); - - if (!aAbs) { - // zero / zero = NaN - if (!bAbs) return fromRep(qnanRep); - // zero / anything else = +/- zero - else return fromRep(quotientSign); - } - // anything else / zero = +/- infinity - if (!bAbs) return fromRep(infRep | quotientSign); - - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) scale += normalize(&aSignificand); - if (bAbs < implicitBit) scale -= normalize(&bSignificand); + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) + return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else + return fromRep(aAbs | quotientSign); } - // Or in the implicit significand bit. (If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) - aSignificand |= implicitBit; - bSignificand |= implicitBit; - int quotientExponent = aExponent - bExponent + scale; - - // Align the significand of b as a Q63 fixed-point number in the range - // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax - // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This - // is accurate to about 3.5 binary digits. - const uint64_t q63b = bSignificand >> 49; - uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b; - // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) - - // Now refine the reciprocal estimate using a Newton-Raphson iteration: - // - // x1 = x0 * (2 - x0 * b) - // - // This doubles the number of correct binary digits in the approximation - // with each iteration. - uint64_t correction64; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - correction64 = -((rep_t)recip64 * q63b >> 64); - recip64 = (rep_t)recip64 * correction64 >> 63; - - // recip64 might have overflowed to exactly zero in the preceeding - // computation if the high word of b is exactly 1.0. This would sabotage - // the full-width final stage of the computation that follows, so we adjust - // recip64 downward by one bit. - recip64--; - - // We need to perform one more iteration to get us to 112 binary digits; - // The last iteration needs to happen with extra precision. - const uint64_t q127blo = bSignificand << 15; - rep_t correction, reciprocal; - - // NOTE: This operation is equivalent to __multi3, which is not implemented - // in some architechure - rep_t r64q63, r64q127, r64cH, r64cL, dummy; - wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63); - wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127); - - correction = -(r64q63 + (r64q127 >> 64)); - - uint64_t cHi = correction >> 64; - uint64_t cLo = correction; - - wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH); - wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL); - - reciprocal = r64cH + (r64cL >> 64); - - // We already adjusted the 64-bit estimate, now we need to adjust the final - // 128-bit reciprocal estimate downward to ensure that it is strictly smaller - // than the infinitely precise exact reciprocal. Because the computation - // of the Newton-Raphson step is truncating at every step, this adjustment - // is small; most of the work is already done. - reciprocal -= 2; - - // The numerical reciprocal is accurate to within 2^-112, lies in the - // interval [0.5, 1.0), and is strictly smaller than the true reciprocal - // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b - // in Q127 with the following properties: - // - // 1. q < a/b - // 2. q is in the interval [0.5, 2.0) - // 3. the error in q is bounded away from 2^-113 (actually, we have a - // couple of bits to spare, but this is all we need). - - // We need a 128 x 128 multiply high to compute q, which isn't a basic - // operation in C, so we need to be a little bit fussy. - rep_t quotient, quotientLo; - wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); - - // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). - // In either case, we are going to compute a residual of the form - // - // r = a - q*b - // - // We know from the construction of q that r satisfies: - // - // 0 <= r < ulp(q)*b - // - // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we - // already have the correct result. The exact halfway case cannot occur. - // We also take this time to right shift quotient if it falls in the [1,2) - // range and adjust the exponent accordingly. - rep_t residual; - rep_t qb; - - if (quotient < (implicitBit << 1)) { - wideMultiply(quotient, bSignificand, &dummy, &qb); - residual = (aSignificand << 113) - qb; - quotientExponent--; - } else { - quotient >>= 1; - wideMultiply(quotient, bSignificand, &dummy, &qb); - residual = (aSignificand << 112) - qb; - } - - const int writtenExponent = quotientExponent + exponentBias; - - if (writtenExponent >= maxExponent) { - // If we have overflowed the exponent, return infinity. - return fromRep(infRep | quotientSign); - } - else if (writtenExponent < 1) { - if (writtenExponent == 0) { - // Check whether the rounded result is normal. - const bool round = (residual << 1) > bSignificand; - // Clear the implicit bit. - rep_t absResult = quotient & significandMask; - // Round. - absResult += round; - if (absResult & ~significandMask) { - // The rounded result is normal; return it. - return fromRep(absResult | quotientSign); - } - } - // Flush denormals to zero. In the future, it would be nice to add - // code to round them correctly. + // anything else / infinity = +/- 0 + if (bAbs == infRep) + return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) + return fromRep(qnanRep); + // zero / anything else = +/- zero + else return fromRep(quotientSign); } - else { - const bool round = (residual << 1) >= bSignificand; - // Clear the implicit bit - rep_t absResult = quotient & significandMask; - // Insert the exponent - absResult |= (rep_t)writtenExponent << significandBits; - // Round - absResult += round; - // Insert the sign and return - const long double result = fromRep(absResult | quotientSign); - return result; + // anything else / zero = +/- infinity + if (!bAbs) + return fromRep(infRep | quotientSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale -= normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + int quotientExponent = aExponent - bExponent + scale; + + // Align the significand of b as a Q63 fixed-point number in the range + // [1, 2.0) and get a Q64 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + const uint64_t q63b = bSignificand >> 49; + uint64_t recip64 = UINT64_C(0x7504f333F9DE6484) - q63b; + // 0x7504f333F9DE6484 / 2^64 + 1 = 3/4 + 1/sqrt(2) + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration. + uint64_t correction64; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + correction64 = -((rep_t)recip64 * q63b >> 64); + recip64 = (rep_t)recip64 * correction64 >> 63; + + // recip64 might have overflowed to exactly zero in the preceeding + // computation if the high word of b is exactly 1.0. This would sabotage + // the full-width final stage of the computation that follows, so we adjust + // recip64 downward by one bit. + recip64--; + + // We need to perform one more iteration to get us to 112 binary digits; + // The last iteration needs to happen with extra precision. + const uint64_t q127blo = bSignificand << 15; + rep_t correction, reciprocal; + + // NOTE: This operation is equivalent to __multi3, which is not implemented + // in some architechure + rep_t r64q63, r64q127, r64cH, r64cL, dummy; + wideMultiply((rep_t)recip64, (rep_t)q63b, &dummy, &r64q63); + wideMultiply((rep_t)recip64, (rep_t)q127blo, &dummy, &r64q127); + + correction = -(r64q63 + (r64q127 >> 64)); + + uint64_t cHi = correction >> 64; + uint64_t cLo = correction; + + wideMultiply((rep_t)recip64, (rep_t)cHi, &dummy, &r64cH); + wideMultiply((rep_t)recip64, (rep_t)cLo, &dummy, &r64cL); + + reciprocal = r64cH + (r64cL >> 64); + + // We already adjusted the 64-bit estimate, now we need to adjust the final + // 128-bit reciprocal estimate downward to ensure that it is strictly smaller + // than the infinitely precise exact reciprocal. Because the computation + // of the Newton-Raphson step is truncating at every step, this adjustment + // is small; most of the work is already done. + reciprocal -= 2; + + // The numerical reciprocal is accurate to within 2^-112, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q127 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. the error in q is bounded away from 2^-113 (actually, we have a + // couple of bits to spare, but this is all we need). + + // We need a 128 x 128 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + rep_t quotient, quotientLo; + wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + rep_t residual; + rep_t qb; + + if (quotient < (implicitBit << 1)) { + wideMultiply(quotient, bSignificand, &dummy, &qb); + residual = (aSignificand << 113) - qb; + quotientExponent--; + } else { + quotient >>= 1; + wideMultiply(quotient, bSignificand, &dummy, &qb); + residual = (aSignificand << 112) - qb; + } + + const int writtenExponent = quotientExponent + exponentBias; + + if (writtenExponent >= maxExponent) { + // If we have overflowed the exponent, return infinity. + return fromRep(infRep | quotientSign); + } else if (writtenExponent < 1) { + if (writtenExponent == 0) { + // Check whether the rounded result is normal. + const bool round = (residual << 1) > bSignificand; + // Clear the implicit bit. + rep_t absResult = quotient & significandMask; + // Round. + absResult += round; + if (absResult & ~significandMask) { + // The rounded result is normal; return it. + return fromRep(absResult | quotientSign); + } } + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return fromRep(quotientSign); + } else { + const bool round = (residual << 1) >= bSignificand; + // Clear the implicit bit + rep_t absResult = quotient & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + // Round + absResult += round; + // Insert the sign and return + const long double result = fromRep(absResult | quotientSign); + return result; + } } #endif Index: compiler-rt/trunk/lib/builtins/divti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divti3.c +++ compiler-rt/trunk/lib/builtins/divti3.c @@ -17,16 +17,15 @@ /* Returns: a / b */ -COMPILER_RT_ABI ti_int -__divti3(ti_int a, ti_int b) -{ - const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; - ti_int s_a = a >> bits_in_tword_m1; /* s_a = a < 0 ? -1 : 0 */ - ti_int s_b = b >> bits_in_tword_m1; /* s_b = b < 0 ? -1 : 0 */ - a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ - b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ - s_a ^= s_b; /* sign of quotient */ - return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a; /* negate if s_a == -1 */ +COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) { + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s_a = a >> bits_in_tword_m1; /* s_a = a < 0 ? -1 : 0 */ + ti_int s_b = b >> bits_in_tword_m1; /* s_b = b < 0 ? -1 : 0 */ + a = (a ^ s_a) - s_a; /* negate if s_a == -1 */ + b = (b ^ s_b) - s_b; /* negate if s_b == -1 */ + s_a ^= s_b; /* sign of quotient */ + return (__udivmodti4(a, b, (tu_int *)0) ^ s_a) - + s_a; /* negate if s_a == -1 */ } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/divxc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/divxc3.c +++ compiler-rt/trunk/lib/builtins/divxc3.c @@ -17,46 +17,39 @@ /* Returns: the quotient of (a + ib) / (c + id) */ -COMPILER_RT_ABI Lcomplex -__divxc3(long double __a, long double __b, long double __c, long double __d) -{ - int __ilogbw = 0; - long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); - if (crt_isfinite(__logbw)) - { - __ilogbw = (int)__logbw; - __c = crt_scalbnl(__c, -__ilogbw); - __d = crt_scalbnl(__d, -__ilogbw); +COMPILER_RT_ABI Lcomplex __divxc3(long double __a, long double __b, + long double __c, long double __d) { + int __ilogbw = 0; + long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + if (crt_isfinite(__logbw)) { + __ilogbw = (int)__logbw; + __c = crt_scalbnl(__c, -__ilogbw); + __d = crt_scalbnl(__d, -__ilogbw); + } + long double __denom = __c * __c + __d * __d; + Lcomplex z; + COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = + crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) { + COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; + } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && + crt_isfinite(__d)) { + __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } else if (crt_isinf(__logbw) && __logbw > 0 && crt_isfinite(__a) && + crt_isfinite(__b)) { + __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); + COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); } - long double __denom = __c * __c + __d * __d; - Lcomplex z; - COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); - COMPLEX_IMAGINARY(z) = crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) - { - COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; - COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; - } - else if ((crt_isinf(__a) || crt_isinf(__b)) && - crt_isfinite(__c) && crt_isfinite(__d)) - { - __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); - } - else if (crt_isinf(__logbw) && __logbw > 0 && - crt_isfinite(__a) && crt_isfinite(__b)) - { - __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); - COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); - COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); - } - } - return z; + } + return z; } #endif Index: compiler-rt/trunk/lib/builtins/emutls.c =================================================================== --- compiler-rt/trunk/lib/builtins/emutls.c +++ compiler-rt/trunk/lib/builtins/emutls.c @@ -11,7 +11,6 @@ #include #include "int_lib.h" -#include "int_util.h" #ifdef __BIONIC__ /* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation @@ -28,9 +27,9 @@ #endif typedef struct emutls_address_array { - uintptr_t skip_destructor_rounds; - uintptr_t size; /* number of elements in the 'data' array */ - void* data[]; + uintptr_t skip_destructor_rounds; + uintptr_t size; /* number of elements in the 'data' array */ + void *data[]; } emutls_address_array; static void emutls_shutdown(emutls_address_array *array); @@ -54,178 +53,169 @@ #endif static __inline void *emutls_memalign_alloc(size_t align, size_t size) { - void *base; + void *base; #if EMUTLS_USE_POSIX_MEMALIGN - if (posix_memalign(&base, align, size) != 0) - abort(); + if (posix_memalign(&base, align, size) != 0) + abort(); #else - #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void*)) - char* object; - if ((object = (char*)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) - abort(); - base = (void*)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) - & ~(uintptr_t)(align - 1)); +#define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void *)) + char *object; + if ((object = (char *)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) + abort(); + base = (void *)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) & + ~(uintptr_t)(align - 1)); - ((void**)base)[-1] = object; + ((void **)base)[-1] = object; #endif - return base; + return base; } static __inline void emutls_memalign_free(void *base) { #if EMUTLS_USE_POSIX_MEMALIGN - free(base); + free(base); #else - /* The mallocated address is in ((void**)base)[-1] */ - free(((void**)base)[-1]); + /* The mallocated address is in ((void**)base)[-1] */ + free(((void **)base)[-1]); #endif } static __inline void emutls_setspecific(emutls_address_array *value) { - pthread_setspecific(emutls_pthread_key, (void*) value); + pthread_setspecific(emutls_pthread_key, (void *)value); } -static __inline emutls_address_array* emutls_getspecific() { - return (emutls_address_array*) pthread_getspecific(emutls_pthread_key); +static __inline emutls_address_array *emutls_getspecific() { + return (emutls_address_array *)pthread_getspecific(emutls_pthread_key); } -static void emutls_key_destructor(void* ptr) { - emutls_address_array *array = (emutls_address_array*)ptr; - if (array->skip_destructor_rounds > 0) { - /* emutls is deallocated using a pthread key destructor. These - * destructors are called in several rounds to accommodate destructor - * functions that (re)initialize key values with pthread_setspecific. - * Delay the emutls deallocation to accommodate other end-of-thread - * cleanup tasks like calling thread_local destructors (e.g. the - * __cxa_thread_atexit fallback in libc++abi). - */ - array->skip_destructor_rounds--; - emutls_setspecific(array); - } else { - emutls_shutdown(array); - free(ptr); - } +static void emutls_key_destructor(void *ptr) { + emutls_address_array *array = (emutls_address_array *)ptr; + if (array->skip_destructor_rounds > 0) { + /* emutls is deallocated using a pthread key destructor. These + * destructors are called in several rounds to accommodate destructor + * functions that (re)initialize key values with pthread_setspecific. + * Delay the emutls deallocation to accommodate other end-of-thread + * cleanup tasks like calling thread_local destructors (e.g. the + * __cxa_thread_atexit fallback in libc++abi). + */ + array->skip_destructor_rounds--; + emutls_setspecific(array); + } else { + emutls_shutdown(array); + free(ptr); + } } static __inline void emutls_init(void) { - if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) - abort(); - emutls_key_created = true; + if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) + abort(); + emutls_key_created = true; } static __inline void emutls_init_once(void) { - static pthread_once_t once = PTHREAD_ONCE_INIT; - pthread_once(&once, emutls_init); + static pthread_once_t once = PTHREAD_ONCE_INIT; + pthread_once(&once, emutls_init); } -static __inline void emutls_lock() { - pthread_mutex_lock(&emutls_mutex); -} +static __inline void emutls_lock() { pthread_mutex_lock(&emutls_mutex); } -static __inline void emutls_unlock() { - pthread_mutex_unlock(&emutls_mutex); -} +static __inline void emutls_unlock() { pthread_mutex_unlock(&emutls_mutex); } #else /* _WIN32 */ -#include +#include #include #include -#include +#include static LPCRITICAL_SECTION emutls_mutex; static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES; typedef uintptr_t gcc_word; -typedef void * gcc_pointer; +typedef void *gcc_pointer; static void win_error(DWORD last_err, const char *hint) { - char *buffer = NULL; - if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_MAX_WIDTH_MASK, - NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) { - fprintf(stderr, "Windows error: %s\n", buffer); - } else { - fprintf(stderr, "Unkown Windows error: %s\n", hint); - } - LocalFree(buffer); + char *buffer = NULL; + if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) { + fprintf(stderr, "Windows error: %s\n", buffer); + } else { + fprintf(stderr, "Unkown Windows error: %s\n", hint); + } + LocalFree(buffer); } static __inline void win_abort(DWORD last_err, const char *hint) { - win_error(last_err, hint); - abort(); + win_error(last_err, hint); + abort(); } static __inline void *emutls_memalign_alloc(size_t align, size_t size) { - void *base = _aligned_malloc(size, align); - if (!base) - win_abort(GetLastError(), "_aligned_malloc"); - return base; + void *base = _aligned_malloc(size, align); + if (!base) + win_abort(GetLastError(), "_aligned_malloc"); + return base; } -static __inline void emutls_memalign_free(void *base) { - _aligned_free(base); -} +static __inline void emutls_memalign_free(void *base) { _aligned_free(base); } static void emutls_exit(void) { - if (emutls_mutex) { - DeleteCriticalSection(emutls_mutex); - _aligned_free(emutls_mutex); - emutls_mutex = NULL; - } - if (emutls_tls_index != TLS_OUT_OF_INDEXES) { - emutls_shutdown((emutls_address_array*)TlsGetValue(emutls_tls_index)); - TlsFree(emutls_tls_index); - emutls_tls_index = TLS_OUT_OF_INDEXES; - } + if (emutls_mutex) { + DeleteCriticalSection(emutls_mutex); + _aligned_free(emutls_mutex); + emutls_mutex = NULL; + } + if (emutls_tls_index != TLS_OUT_OF_INDEXES) { + emutls_shutdown((emutls_address_array *)TlsGetValue(emutls_tls_index)); + TlsFree(emutls_tls_index); + emutls_tls_index = TLS_OUT_OF_INDEXES; + } } -#pragma warning (push) -#pragma warning (disable : 4100) +#pragma warning(push) +#pragma warning(disable : 4100) static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) { - emutls_mutex = (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16); - if (!emutls_mutex) { - win_error(GetLastError(), "_aligned_malloc"); - return FALSE; - } - InitializeCriticalSection(emutls_mutex); - - emutls_tls_index = TlsAlloc(); - if (emutls_tls_index == TLS_OUT_OF_INDEXES) { - emutls_exit(); - win_error(GetLastError(), "TlsAlloc"); - return FALSE; - } - atexit(&emutls_exit); - return TRUE; + emutls_mutex = + (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16); + if (!emutls_mutex) { + win_error(GetLastError(), "_aligned_malloc"); + return FALSE; + } + InitializeCriticalSection(emutls_mutex); + + emutls_tls_index = TlsAlloc(); + if (emutls_tls_index == TLS_OUT_OF_INDEXES) { + emutls_exit(); + win_error(GetLastError(), "TlsAlloc"); + return FALSE; + } + atexit(&emutls_exit); + return TRUE; } static __inline void emutls_init_once(void) { - static INIT_ONCE once; - InitOnceExecuteOnce(&once, emutls_init, NULL, NULL); + static INIT_ONCE once; + InitOnceExecuteOnce(&once, emutls_init, NULL, NULL); } -static __inline void emutls_lock() { - EnterCriticalSection(emutls_mutex); -} +static __inline void emutls_lock() { EnterCriticalSection(emutls_mutex); } -static __inline void emutls_unlock() { - LeaveCriticalSection(emutls_mutex); -} +static __inline void emutls_unlock() { LeaveCriticalSection(emutls_mutex); } static __inline void emutls_setspecific(emutls_address_array *value) { - if (TlsSetValue(emutls_tls_index, (LPVOID) value) == 0) - win_abort(GetLastError(), "TlsSetValue"); + if (TlsSetValue(emutls_tls_index, (LPVOID)value) == 0) + win_abort(GetLastError(), "TlsSetValue"); } -static __inline emutls_address_array* emutls_getspecific() { - LPVOID value = TlsGetValue(emutls_tls_index); - if (value == NULL) { - const DWORD err = GetLastError(); - if (err != ERROR_SUCCESS) - win_abort(err, "TlsGetValue"); - } - return (emutls_address_array*) value; +static __inline emutls_address_array *emutls_getspecific() { + LPVOID value = TlsGetValue(emutls_tls_index); + if (value == NULL) { + const DWORD err = GetLastError(); + if (err != ERROR_SUCCESS) + win_abort(err, "TlsGetValue"); + } + return (emutls_address_array *)value; } /* Provide atomic load/store functions for emutls_get_index if built with MSVC. @@ -236,39 +226,39 @@ enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 }; static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) { - assert(type == __ATOMIC_ACQUIRE); - // These return the previous value - but since we do an OR with 0, - // it's equivalent to a plain load. + assert(type == __ATOMIC_ACQUIRE); + // These return the previous value - but since we do an OR with 0, + // it's equivalent to a plain load. #ifdef _WIN64 - return InterlockedOr64(ptr, 0); + return InterlockedOr64(ptr, 0); #else - return InterlockedOr(ptr, 0); + return InterlockedOr(ptr, 0); #endif } static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) { - assert(type == __ATOMIC_RELEASE); - InterlockedExchangePointer((void *volatile *)ptr, (void *)val); + assert(type == __ATOMIC_RELEASE); + InterlockedExchangePointer((void *volatile *)ptr, (void *)val); } #endif /* __ATOMIC_RELEASE */ -#pragma warning (pop) +#pragma warning(pop) #endif /* _WIN32 */ -static size_t emutls_num_object = 0; /* number of allocated TLS objects */ +static size_t emutls_num_object = 0; /* number of allocated TLS objects */ /* Free the allocated TLS data */ static void emutls_shutdown(emutls_address_array *array) { - if (array) { - uintptr_t i; - for (i = 0; i < array->size; ++i) { - if (array->data[i]) - emutls_memalign_free(array->data[i]); - } + if (array) { + uintptr_t i; + for (i = 0; i < array->size; ++i) { + if (array->data[i]) + emutls_memalign_free(array->data[i]); } + } } /* For every TLS variable xyz, @@ -277,85 +267,84 @@ * will point to __emutls_t.xyz, which has the initial value. */ typedef struct __emutls_control { - /* Must use gcc_word here, instead of size_t, to match GCC. When - gcc_word is larger than size_t, the upper extra bits are all - zeros. We can use variables of size_t to operate on size and - align. */ - gcc_word size; /* size of the object in bytes */ - gcc_word align; /* alignment of the object in bytes */ - union { - uintptr_t index; /* data[index-1] is the object address */ - void* address; /* object address, when in single thread env */ - } object; - void* value; /* null or non-zero initial value for the object */ + /* Must use gcc_word here, instead of size_t, to match GCC. When + gcc_word is larger than size_t, the upper extra bits are all + zeros. We can use variables of size_t to operate on size and + align. */ + gcc_word size; /* size of the object in bytes */ + gcc_word align; /* alignment of the object in bytes */ + union { + uintptr_t index; /* data[index-1] is the object address */ + void *address; /* object address, when in single thread env */ + } object; + void *value; /* null or non-zero initial value for the object */ } __emutls_control; /* Emulated TLS objects are always allocated at run-time. */ static __inline void *emutls_allocate_object(__emutls_control *control) { - /* Use standard C types, check with gcc's emutls.o. */ - COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); - COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void*)); - - size_t size = control->size; - size_t align = control->align; - void* base; - if (align < sizeof(void*)) - align = sizeof(void*); - /* Make sure that align is power of 2. */ - if ((align & (align - 1)) != 0) - abort(); - - base = emutls_memalign_alloc(align, size); - if (control->value) - memcpy(base, control->value, size); - else - memset(base, 0, size); - return base; -} + /* Use standard C types, check with gcc's emutls.o. */ + COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); + COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void *)); + + size_t size = control->size; + size_t align = control->align; + void *base; + if (align < sizeof(void *)) + align = sizeof(void *); + /* Make sure that align is power of 2. */ + if ((align & (align - 1)) != 0) + abort(); + base = emutls_memalign_alloc(align, size); + if (control->value) + memcpy(base, control->value, size); + else + memset(base, 0, size); + return base; +} /* Returns control->object.index; set index if not allocated yet. */ static __inline uintptr_t emutls_get_index(__emutls_control *control) { - uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); + uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); + if (!index) { + emutls_init_once(); + emutls_lock(); + index = control->object.index; if (!index) { - emutls_init_once(); - emutls_lock(); - index = control->object.index; - if (!index) { - index = ++emutls_num_object; - __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); - } - emutls_unlock(); + index = ++emutls_num_object; + __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); } - return index; + emutls_unlock(); + } + return index; } /* Updates newly allocated thread local emutls_address_array. */ static __inline void emutls_check_array_set_size(emutls_address_array *array, uintptr_t size) { - if (array == NULL) - abort(); - array->size = size; - emutls_setspecific(array); + if (array == NULL) + abort(); + array->size = size; + emutls_setspecific(array); } /* Returns the new 'data' array size, number of elements, * which must be no smaller than the given index. */ static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { - /* Need to allocate emutls_address_array with extra slots - * to store the header. - * Round up the emutls_address_array size to multiple of 16. - */ - uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *); - return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words; + /* Need to allocate emutls_address_array with extra slots + * to store the header. + * Round up the emutls_address_array size to multiple of 16. + */ + uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *); + return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words; } /* Returns the size in bytes required for an emutls_address_array with * N number of elements for data field. */ static __inline uintptr_t emutls_asize(uintptr_t N) { - return N * sizeof(void *) + sizeof(emutls_address_array); + return N * sizeof(void *) + sizeof(emutls_address_array); } /* Returns the thread local emutls_address_array. @@ -363,42 +352,41 @@ */ static __inline emutls_address_array * emutls_get_address_array(uintptr_t index) { - emutls_address_array* array = emutls_getspecific(); - if (array == NULL) { - uintptr_t new_size = emutls_new_data_array_size(index); - array = (emutls_address_array*) malloc(emutls_asize(new_size)); - if (array) { - memset(array->data, 0, new_size * sizeof(void*)); - array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS; - } - emutls_check_array_set_size(array, new_size); - } else if (index > array->size) { - uintptr_t orig_size = array->size; - uintptr_t new_size = emutls_new_data_array_size(index); - array = (emutls_address_array*) realloc(array, emutls_asize(new_size)); - if (array) - memset(array->data + orig_size, 0, - (new_size - orig_size) * sizeof(void*)); - emutls_check_array_set_size(array, new_size); + emutls_address_array *array = emutls_getspecific(); + if (array == NULL) { + uintptr_t new_size = emutls_new_data_array_size(index); + array = (emutls_address_array *)malloc(emutls_asize(new_size)); + if (array) { + memset(array->data, 0, new_size * sizeof(void *)); + array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS; } - return array; -} - -void* __emutls_get_address(__emutls_control* control) { - uintptr_t index = emutls_get_index(control); - emutls_address_array* array = emutls_get_address_array(index--); - if (array->data[index] == NULL) - array->data[index] = emutls_allocate_object(control); - return array->data[index]; + emutls_check_array_set_size(array, new_size); + } else if (index > array->size) { + uintptr_t orig_size = array->size; + uintptr_t new_size = emutls_new_data_array_size(index); + array = (emutls_address_array *)realloc(array, emutls_asize(new_size)); + if (array) + memset(array->data + orig_size, 0, + (new_size - orig_size) * sizeof(void *)); + emutls_check_array_set_size(array, new_size); + } + return array; +} + +void *__emutls_get_address(__emutls_control *control) { + uintptr_t index = emutls_get_index(control); + emutls_address_array *array = emutls_get_address_array(index--); + if (array->data[index] == NULL) + array->data[index] = emutls_allocate_object(control); + return array->data[index]; } #ifdef __BIONIC__ /* Called by Bionic on dlclose to delete the emutls pthread key. */ -__attribute__((visibility("hidden"))) -void __emutls_unregister_key(void) { - if (emutls_key_created) { - pthread_key_delete(emutls_pthread_key); - emutls_key_created = false; - } +__attribute__((visibility("hidden"))) void __emutls_unregister_key(void) { + if (emutls_key_created) { + pthread_key_delete(emutls_pthread_key); + emutls_key_created = false; + } } #endif Index: compiler-rt/trunk/lib/builtins/enable_execute_stack.c =================================================================== --- compiler-rt/trunk/lib/builtins/enable_execute_stack.c +++ compiler-rt/trunk/lib/builtins/enable_execute_stack.c @@ -29,43 +29,44 @@ #endif /* _WIN32 */ #if __LP64__ - #define TRAMPOLINE_SIZE 48 +#define TRAMPOLINE_SIZE 48 #else - #define TRAMPOLINE_SIZE 40 +#define TRAMPOLINE_SIZE 40 #endif /* - * The compiler generates calls to __enable_execute_stack() when creating + * The compiler generates calls to __enable_execute_stack() when creating * trampoline functions on the stack for use with nested functions. - * It is expected to mark the page(s) containing the address + * It is expected to mark the page(s) containing the address * and the next 48 bytes as executable. Since the stack is normally rw- - * that means changing the protection on those page(s) to rwx. + * that means changing the protection on those page(s) to rwx. */ -COMPILER_RT_ABI void -__enable_execute_stack(void* addr) -{ +COMPILER_RT_ABI void __enable_execute_stack(void *addr) { #if _WIN32 - MEMORY_BASIC_INFORMATION mbi; - if (!VirtualQuery (addr, &mbi, sizeof(mbi))) - return; /* We should probably assert here because there is no return value */ - VirtualProtect (mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, &mbi.Protect); + MEMORY_BASIC_INFORMATION mbi; + if (!VirtualQuery(addr, &mbi, sizeof(mbi))) + return; /* We should probably assert here because there is no return value + */ + VirtualProtect(mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, + &mbi.Protect); #else #if __APPLE__ - /* On Darwin, pagesize is always 4096 bytes */ - const uintptr_t pageSize = 4096; + /* On Darwin, pagesize is always 4096 bytes */ + const uintptr_t pageSize = 4096; #elif !defined(HAVE_SYSCONF) #error "HAVE_SYSCONF not defined! See enable_execute_stack.c" #else - const uintptr_t pageSize = sysconf(_SC_PAGESIZE); + const uintptr_t pageSize = sysconf(_SC_PAGESIZE); #endif /* __APPLE__ */ - const uintptr_t pageAlignMask = ~(pageSize-1); - uintptr_t p = (uintptr_t)addr; - unsigned char* startPage = (unsigned char*)(p & pageAlignMask); - unsigned char* endPage = (unsigned char*)((p+TRAMPOLINE_SIZE+pageSize) & pageAlignMask); - size_t length = endPage - startPage; - (void) mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC); + const uintptr_t pageAlignMask = ~(pageSize - 1); + uintptr_t p = (uintptr_t)addr; + unsigned char *startPage = (unsigned char *)(p & pageAlignMask); + unsigned char *endPage = + (unsigned char *)((p + TRAMPOLINE_SIZE + pageSize) & pageAlignMask); + size_t length = endPage - startPage; + (void)mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC); #endif } Index: compiler-rt/trunk/lib/builtins/eprintf.c =================================================================== --- compiler-rt/trunk/lib/builtins/eprintf.c +++ compiler-rt/trunk/lib/builtins/eprintf.c @@ -7,12 +7,9 @@ * ===----------------------------------------------------------------------=== */ - - #include "int_lib.h" #include - /* * __eprintf() was used in an old version of . * It can eventually go away, but it is needed when linking @@ -25,10 +22,9 @@ __attribute__((visibility("hidden"))) #endif COMPILER_RT_ABI void -__eprintf(const char* format, const char* assertion_expression, - const char* line, const char* file) -{ - fprintf(stderr, format, assertion_expression, line, file); - fflush(stderr); - compilerrt_abort(); +__eprintf(const char *format, const char *assertion_expression, + const char *line, const char *file) { + fprintf(stderr, format, assertion_expression, line, file); + fflush(stderr); + compilerrt_abort(); } Index: compiler-rt/trunk/lib/builtins/extenddftf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/extenddftf2.c +++ compiler-rt/trunk/lib/builtins/extenddftf2.c @@ -16,7 +16,7 @@ #include "fp_extend_impl.inc" COMPILER_RT_ABI long double __extenddftf2(double a) { - return __extendXfYf2__(a); + return __extendXfYf2__(a); } #endif Index: compiler-rt/trunk/lib/builtins/extendhfsf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/extendhfsf2.c +++ compiler-rt/trunk/lib/builtins/extendhfsf2.c @@ -14,18 +14,14 @@ // Use a forwarding definition and noinline to implement a poor man's alias, // as there isn't a good cross-platform way of defining one. COMPILER_RT_ABI NOINLINE float __extendhfsf2(uint16_t a) { - return __extendXfYf2__(a); + return __extendXfYf2__(a); } -COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { - return __extendhfsf2(a); -} +COMPILER_RT_ABI float __gnu_h2f_ieee(uint16_t a) { return __extendhfsf2(a); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI float __aeabi_h2f(uint16_t a) { - return __extendhfsf2(a); -} +AEABI_RTABI float __aeabi_h2f(uint16_t a) { return __extendhfsf2(a); } #else AEABI_RTABI float __aeabi_h2f(uint16_t a) COMPILER_RT_ALIAS(__extendhfsf2); #endif Index: compiler-rt/trunk/lib/builtins/extendsfdf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/extendsfdf2.c +++ compiler-rt/trunk/lib/builtins/extendsfdf2.c @@ -11,15 +11,11 @@ #define DST_DOUBLE #include "fp_extend_impl.inc" -COMPILER_RT_ABI double __extendsfdf2(float a) { - return __extendXfYf2__(a); -} +COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI double __aeabi_f2d(float a) { - return __extendsfdf2(a); -} +AEABI_RTABI double __aeabi_f2d(float a) { return __extendsfdf2(a); } #else AEABI_RTABI double __aeabi_f2d(float a) COMPILER_RT_ALIAS(__extendsfdf2); #endif Index: compiler-rt/trunk/lib/builtins/extendsftf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/extendsftf2.c +++ compiler-rt/trunk/lib/builtins/extendsftf2.c @@ -16,7 +16,7 @@ #include "fp_extend_impl.inc" COMPILER_RT_ABI long double __extendsftf2(float a) { - return __extendXfYf2__(a); + return __extendXfYf2__(a); } #endif Index: compiler-rt/trunk/lib/builtins/ffsdi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/ffsdi2.c +++ compiler-rt/trunk/lib/builtins/ffsdi2.c @@ -17,16 +17,13 @@ * the value zero if a is zero. The least significant bit is index one. */ -COMPILER_RT_ABI si_int -__ffsdi2(di_int a) -{ - dwords x; - x.all = a; - if (x.s.low == 0) - { - if (x.s.high == 0) - return 0; - return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); - } - return __builtin_ctz(x.s.low) + 1; +COMPILER_RT_ABI si_int __ffsdi2(di_int a) { + dwords x; + x.all = a; + if (x.s.low == 0) { + if (x.s.high == 0) + return 0; + return __builtin_ctz(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); + } + return __builtin_ctz(x.s.low) + 1; } Index: compiler-rt/trunk/lib/builtins/ffssi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/ffssi2.c +++ compiler-rt/trunk/lib/builtins/ffssi2.c @@ -17,12 +17,9 @@ * the value zero if a is zero. The least significant bit is index one. */ -COMPILER_RT_ABI si_int -__ffssi2(si_int a) -{ - if (a == 0) - { - return 0; - } - return __builtin_ctz(a) + 1; +COMPILER_RT_ABI si_int __ffssi2(si_int a) { + if (a == 0) { + return 0; + } + return __builtin_ctz(a) + 1; } Index: compiler-rt/trunk/lib/builtins/ffsti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/ffsti2.c +++ compiler-rt/trunk/lib/builtins/ffsti2.c @@ -19,18 +19,15 @@ * the value zero if a is zero. The least significant bit is index one. */ -COMPILER_RT_ABI si_int -__ffsti2(ti_int a) -{ - twords x; - x.all = a; - if (x.s.low == 0) - { - if (x.s.high == 0) - return 0; - return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT); - } - return __builtin_ctzll(x.s.low) + 1; +COMPILER_RT_ABI si_int __ffsti2(ti_int a) { + twords x; + x.all = a; + if (x.s.low == 0) { + if (x.s.high == 0) + return 0; + return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT); + } + return __builtin_ctzll(x.s.low) + 1; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/fixdfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixdfdi.c +++ compiler-rt/trunk/lib/builtins/fixdfdi.c @@ -17,13 +17,11 @@ COMPILER_RT_ABI du_int __fixunsdfdi(double a); -COMPILER_RT_ABI di_int -__fixdfdi(double a) -{ - if (a < 0.0) { - return -__fixunsdfdi(-a); - } - return __fixunsdfdi(a); +COMPILER_RT_ABI di_int __fixdfdi(double a) { + if (a < 0.0) { + return -__fixunsdfdi(-a); + } + return __fixunsdfdi(a); } #else @@ -36,18 +34,13 @@ typedef du_int fixuint_t; #include "fp_fixint_impl.inc" -COMPILER_RT_ABI di_int -__fixdfdi(fp_t a) { - return __fixint(a); -} +COMPILER_RT_ABI di_int __fixdfdi(fp_t a) { return __fixint(a); } #endif #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI di_int __aeabi_d2lz(fp_t a) { - return __fixdfdi(a); -} +AEABI_RTABI di_int __aeabi_d2lz(fp_t a) { return __fixdfdi(a); } #else AEABI_RTABI di_int __aeabi_d2lz(fp_t a) COMPILER_RT_ALIAS(__fixdfdi); #endif Index: compiler-rt/trunk/lib/builtins/fixdfsi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixdfsi.c +++ compiler-rt/trunk/lib/builtins/fixdfsi.c @@ -13,16 +13,11 @@ typedef su_int fixuint_t; #include "fp_fixint_impl.inc" -COMPILER_RT_ABI si_int -__fixdfsi(fp_t a) { - return __fixint(a); -} +COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI si_int __aeabi_d2iz(fp_t a) { - return __fixdfsi(a); -} +AEABI_RTABI si_int __aeabi_d2iz(fp_t a) { return __fixdfsi(a); } #else AEABI_RTABI si_int __aeabi_d2iz(fp_t a) COMPILER_RT_ALIAS(__fixdfsi); #endif Index: compiler-rt/trunk/lib/builtins/fixdfti.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixdfti.c +++ compiler-rt/trunk/lib/builtins/fixdfti.c @@ -17,9 +17,6 @@ typedef tu_int fixuint_t; #include "fp_fixint_impl.inc" -COMPILER_RT_ABI ti_int -__fixdfti(fp_t a) { - return __fixint(a); -} +COMPILER_RT_ABI ti_int __fixdfti(fp_t a) { return __fixint(a); } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/fixsfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixsfdi.c +++ compiler-rt/trunk/lib/builtins/fixsfdi.c @@ -17,13 +17,11 @@ COMPILER_RT_ABI du_int __fixunssfdi(float a); -COMPILER_RT_ABI di_int -__fixsfdi(float a) -{ - if (a < 0.0f) { - return -__fixunssfdi(-a); - } - return __fixunssfdi(a); +COMPILER_RT_ABI di_int __fixsfdi(float a) { + if (a < 0.0f) { + return -__fixunssfdi(-a); + } + return __fixunssfdi(a); } #else @@ -36,18 +34,13 @@ typedef du_int fixuint_t; #include "fp_fixint_impl.inc" -COMPILER_RT_ABI di_int -__fixsfdi(fp_t a) { - return __fixint(a); -} +COMPILER_RT_ABI di_int __fixsfdi(fp_t a) { return __fixint(a); } #endif #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI di_int __aeabi_f2lz(fp_t a) { - return __fixsfdi(a); -} +AEABI_RTABI di_int __aeabi_f2lz(fp_t a) { return __fixsfdi(a); } #else AEABI_RTABI di_int __aeabi_f2lz(fp_t a) COMPILER_RT_ALIAS(__fixsfdi); #endif Index: compiler-rt/trunk/lib/builtins/fixsfsi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixsfsi.c +++ compiler-rt/trunk/lib/builtins/fixsfsi.c @@ -13,16 +13,11 @@ typedef su_int fixuint_t; #include "fp_fixint_impl.inc" -COMPILER_RT_ABI si_int -__fixsfsi(fp_t a) { - return __fixint(a); -} +COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI si_int __aeabi_f2iz(fp_t a) { - return __fixsfsi(a); -} +AEABI_RTABI si_int __aeabi_f2iz(fp_t a) { return __fixsfsi(a); } #else AEABI_RTABI si_int __aeabi_f2iz(fp_t a) COMPILER_RT_ALIAS(__fixsfsi); #endif Index: compiler-rt/trunk/lib/builtins/fixsfti.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixsfti.c +++ compiler-rt/trunk/lib/builtins/fixsfti.c @@ -17,9 +17,6 @@ typedef tu_int fixuint_t; #include "fp_fixint_impl.inc" -COMPILER_RT_ABI ti_int -__fixsfti(fp_t a) { - return __fixint(a); -} +COMPILER_RT_ABI ti_int __fixsfti(fp_t a) { return __fixint(a); } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/fixtfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixtfdi.c +++ compiler-rt/trunk/lib/builtins/fixtfdi.c @@ -15,8 +15,5 @@ typedef du_int fixuint_t; #include "fp_fixint_impl.inc" -COMPILER_RT_ABI di_int -__fixtfdi(fp_t a) { - return __fixint(a); -} +COMPILER_RT_ABI di_int __fixtfdi(fp_t a) { return __fixint(a); } #endif Index: compiler-rt/trunk/lib/builtins/fixtfsi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixtfsi.c +++ compiler-rt/trunk/lib/builtins/fixtfsi.c @@ -15,8 +15,5 @@ typedef su_int fixuint_t; #include "fp_fixint_impl.inc" -COMPILER_RT_ABI si_int -__fixtfsi(fp_t a) { - return __fixint(a); -} +COMPILER_RT_ABI si_int __fixtfsi(fp_t a) { return __fixint(a); } #endif Index: compiler-rt/trunk/lib/builtins/fixtfti.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixtfti.c +++ compiler-rt/trunk/lib/builtins/fixtfti.c @@ -15,8 +15,5 @@ typedef tu_int fixuint_t; #include "fp_fixint_impl.inc" -COMPILER_RT_ABI ti_int -__fixtfti(fp_t a) { - return __fixint(a); -} +COMPILER_RT_ABI ti_int __fixtfti(fp_t a) { return __fixint(a); } #endif Index: compiler-rt/trunk/lib/builtins/fixunsdfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunsdfdi.c +++ compiler-rt/trunk/lib/builtins/fixunsdfdi.c @@ -15,13 +15,12 @@ * flag as a side-effect of computation. */ -COMPILER_RT_ABI du_int -__fixunsdfdi(double a) -{ - if (a <= 0.0) return 0; - su_int high = a / 4294967296.f; /* a / 0x1p32f; */ - su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */ - return ((du_int)high << 32) | low; +COMPILER_RT_ABI du_int __fixunsdfdi(double a) { + if (a <= 0.0) + return 0; + su_int high = a / 4294967296.f; /* a / 0x1p32f; */ + su_int low = a - (double)high * 4294967296.f; /* high * 0x1p32f; */ + return ((du_int)high << 32) | low; } #else @@ -33,18 +32,13 @@ typedef du_int fixuint_t; #include "fp_fixuint_impl.inc" -COMPILER_RT_ABI du_int -__fixunsdfdi(fp_t a) { - return __fixuint(a); -} +COMPILER_RT_ABI du_int __fixunsdfdi(fp_t a) { return __fixuint(a); } #endif #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) { - return __fixunsdfdi(a); -} +AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) { return __fixunsdfdi(a); } #else AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfdi); #endif Index: compiler-rt/trunk/lib/builtins/fixunsdfsi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunsdfsi.c +++ compiler-rt/trunk/lib/builtins/fixunsdfsi.c @@ -12,16 +12,11 @@ typedef su_int fixuint_t; #include "fp_fixuint_impl.inc" -COMPILER_RT_ABI su_int -__fixunsdfsi(fp_t a) { - return __fixuint(a); -} +COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) { - return __fixunsdfsi(a); -} +AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) { return __fixunsdfsi(a); } #else AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunsdfsi); #endif Index: compiler-rt/trunk/lib/builtins/fixunsdfti.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunsdfti.c +++ compiler-rt/trunk/lib/builtins/fixunsdfti.c @@ -15,8 +15,5 @@ typedef tu_int fixuint_t; #include "fp_fixuint_impl.inc" -COMPILER_RT_ABI tu_int -__fixunsdfti(fp_t a) { - return __fixuint(a); -} +COMPILER_RT_ABI tu_int __fixunsdfti(fp_t a) { return __fixuint(a); } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/fixunssfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunssfdi.c +++ compiler-rt/trunk/lib/builtins/fixunssfdi.c @@ -15,14 +15,13 @@ * flag as a side-effect of computation. */ -COMPILER_RT_ABI du_int -__fixunssfdi(float a) -{ - if (a <= 0.0f) return 0; - double da = a; - su_int high = da / 4294967296.f; /* da / 0x1p32f; */ - su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */ - return ((du_int)high << 32) | low; +COMPILER_RT_ABI du_int __fixunssfdi(float a) { + if (a <= 0.0f) + return 0; + double da = a; + su_int high = da / 4294967296.f; /* da / 0x1p32f; */ + su_int low = da - (double)high * 4294967296.f; /* high * 0x1p32f; */ + return ((du_int)high << 32) | low; } #else @@ -34,18 +33,13 @@ typedef du_int fixuint_t; #include "fp_fixuint_impl.inc" -COMPILER_RT_ABI du_int -__fixunssfdi(fp_t a) { - return __fixuint(a); -} +COMPILER_RT_ABI du_int __fixunssfdi(fp_t a) { return __fixuint(a); } #endif #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) { - return __fixunssfdi(a); -} +AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) { return __fixunssfdi(a); } #else AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) COMPILER_RT_ALIAS(__fixunssfdi); #endif Index: compiler-rt/trunk/lib/builtins/fixunssfsi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunssfsi.c +++ compiler-rt/trunk/lib/builtins/fixunssfsi.c @@ -16,16 +16,11 @@ typedef su_int fixuint_t; #include "fp_fixuint_impl.inc" -COMPILER_RT_ABI su_int -__fixunssfsi(fp_t a) { - return __fixuint(a); -} +COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) { - return __fixunssfsi(a); -} +AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) { return __fixunssfsi(a); } #else AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) COMPILER_RT_ALIAS(__fixunssfsi); #endif Index: compiler-rt/trunk/lib/builtins/fixunssfti.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunssfti.c +++ compiler-rt/trunk/lib/builtins/fixunssfti.c @@ -18,8 +18,5 @@ typedef tu_int fixuint_t; #include "fp_fixuint_impl.inc" -COMPILER_RT_ABI tu_int -__fixunssfti(fp_t a) { - return __fixuint(a); -} +COMPILER_RT_ABI tu_int __fixunssfti(fp_t a) { return __fixuint(a); } #endif Index: compiler-rt/trunk/lib/builtins/fixunstfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunstfdi.c +++ compiler-rt/trunk/lib/builtins/fixunstfdi.c @@ -14,8 +14,5 @@ typedef du_int fixuint_t; #include "fp_fixuint_impl.inc" -COMPILER_RT_ABI du_int -__fixunstfdi(fp_t a) { - return __fixuint(a); -} +COMPILER_RT_ABI du_int __fixunstfdi(fp_t a) { return __fixuint(a); } #endif Index: compiler-rt/trunk/lib/builtins/fixunstfsi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunstfsi.c +++ compiler-rt/trunk/lib/builtins/fixunstfsi.c @@ -14,8 +14,5 @@ typedef su_int fixuint_t; #include "fp_fixuint_impl.inc" -COMPILER_RT_ABI su_int -__fixunstfsi(fp_t a) { - return __fixuint(a); -} +COMPILER_RT_ABI su_int __fixunstfsi(fp_t a) { return __fixuint(a); } #endif Index: compiler-rt/trunk/lib/builtins/fixunstfti.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunstfti.c +++ compiler-rt/trunk/lib/builtins/fixunstfti.c @@ -14,8 +14,5 @@ typedef tu_int fixuint_t; #include "fp_fixuint_impl.inc" -COMPILER_RT_ABI tu_int -__fixunstfti(fp_t a) { - return __fixuint(a); -} +COMPILER_RT_ABI tu_int __fixunstfti(fp_t a) { return __fixuint(a); } #endif Index: compiler-rt/trunk/lib/builtins/fixunsxfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunsxfdi.c +++ compiler-rt/trunk/lib/builtins/fixunsxfdi.c @@ -19,27 +19,25 @@ * Negative values all become zero. */ -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * du_int is a 64 bit integral type - * value in long double is representable in du_int or is negative - * (no range checking performed) +/* Assumption: long double is an intel 80 bit floating point type padded with 6 + * bytes du_int is a 64 bit integral type value in long double is representable + * in du_int or is negative (no range checking performed) */ -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee + * eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ -COMPILER_RT_ABI du_int -__fixunsxfdi(long double a) -{ - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0 || (fb.u.high.s.low & 0x00008000)) - return 0; - if ((unsigned)e > sizeof(du_int) * CHAR_BIT) - return ~(du_int)0; - return fb.u.low.all >> (63 - e); +COMPILER_RT_ABI du_int __fixunsxfdi(long double a) { + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(du_int) * CHAR_BIT) + return ~(du_int)0; + return fb.u.low.all >> (63 - e); } #endif Index: compiler-rt/trunk/lib/builtins/fixunsxfsi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunsxfsi.c +++ compiler-rt/trunk/lib/builtins/fixunsxfsi.c @@ -19,26 +19,25 @@ * Negative values all become zero. */ -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * su_int is a 32 bit integral type - * value in long double is representable in su_int or is negative +/* Assumption: long double is an intel 80 bit floating point type padded with 6 + * bytes su_int is a 32 bit integral type value in long double is representable + * in su_int or is negative */ -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee + * eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ -COMPILER_RT_ABI su_int -__fixunsxfsi(long double a) -{ - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0 || (fb.u.high.s.low & 0x00008000)) - return 0; - if ((unsigned)e > sizeof(su_int) * CHAR_BIT) - return ~(su_int)0; - return fb.u.low.s.high >> (31 - e); +COMPILER_RT_ABI su_int __fixunsxfsi(long double a) { + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(su_int) * CHAR_BIT) + return ~(su_int)0; + return fb.u.low.s.high >> (31 - e); } #endif /* !_ARCH_PPC */ Index: compiler-rt/trunk/lib/builtins/fixunsxfti.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixunsxfti.c +++ compiler-rt/trunk/lib/builtins/fixunsxfti.c @@ -19,31 +19,30 @@ * Negative values all become zero. */ -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * tu_int is a 128 bit integral type - * value in long double is representable in tu_int or is negative +/* Assumption: long double is an intel 80 bit floating point type padded with 6 + * bytes tu_int is a 128 bit integral type value in long double is representable + * in tu_int or is negative */ -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee + * eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ -COMPILER_RT_ABI tu_int -__fixunsxfti(long double a) -{ - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0 || (fb.u.high.s.low & 0x00008000)) - return 0; - if ((unsigned)e > sizeof(tu_int) * CHAR_BIT) - return ~(tu_int)0; - tu_int r = fb.u.low.all; - if (e > 63) - r <<= (e - 63); - else - r >>= (63 - e); - return r; +COMPILER_RT_ABI tu_int __fixunsxfti(long double a) { + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(tu_int) * CHAR_BIT) + return ~(tu_int)0; + tu_int r = fb.u.low.all; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return r; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/fixxfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixxfdi.c +++ compiler-rt/trunk/lib/builtins/fixxfdi.c @@ -17,31 +17,30 @@ /* Returns: convert a to a signed long long, rounding toward zero. */ -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * di_int is a 64 bit integral type - * value in long double is representable in di_int (no range checking performed) +/* Assumption: long double is an intel 80 bit floating point type padded with 6 + * bytes di_int is a 64 bit integral type value in long double is representable + * in di_int (no range checking performed) */ -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee + * eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ -COMPILER_RT_ABI di_int -__fixxfdi(long double a) -{ - const di_int di_max = (di_int)((~(du_int)0) / 2); - const di_int di_min = -di_max - 1; - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0) - return 0; - if ((unsigned)e >= sizeof(di_int) * CHAR_BIT) - return a > 0 ? di_max : di_min; - di_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); - di_int r = fb.u.low.all; - r = (du_int)r >> (63 - e); - return (r ^ s) - s; +COMPILER_RT_ABI di_int __fixxfdi(long double a) { + const di_int di_max = (di_int)((~(du_int)0) / 2); + const di_int di_min = -di_max - 1; + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + if ((unsigned)e >= sizeof(di_int) * CHAR_BIT) + return a > 0 ? di_max : di_min; + di_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); + di_int r = fb.u.low.all; + r = (du_int)r >> (63 - e); + return (r ^ s) - s; } #endif /* !_ARCH_PPC */ Index: compiler-rt/trunk/lib/builtins/fixxfti.c =================================================================== --- compiler-rt/trunk/lib/builtins/fixxfti.c +++ compiler-rt/trunk/lib/builtins/fixxfti.c @@ -17,34 +17,33 @@ /* Returns: convert a to a signed long long, rounding toward zero. */ -/* Assumption: long double is an intel 80 bit floating point type padded with 6 bytes - * ti_int is a 128 bit integral type - * value in long double is representable in ti_int +/* Assumption: long double is an intel 80 bit floating point type padded with 6 + * bytes ti_int is a 128 bit integral type value in long double is representable + * in ti_int */ -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee + * eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ -COMPILER_RT_ABI ti_int -__fixxfti(long double a) -{ - const ti_int ti_max = (ti_int)((~(tu_int)0) / 2); - const ti_int ti_min = -ti_max - 1; - long_double_bits fb; - fb.f = a; - int e = (fb.u.high.s.low & 0x00007FFF) - 16383; - if (e < 0) - return 0; - ti_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); - ti_int r = fb.u.low.all; - if ((unsigned)e >= sizeof(ti_int) * CHAR_BIT) - return a > 0 ? ti_max : ti_min; - if (e > 63) - r <<= (e - 63); - else - r >>= (63 - e); - return (r ^ s) - s; +COMPILER_RT_ABI ti_int __fixxfti(long double a) { + const ti_int ti_max = (ti_int)((~(tu_int)0) / 2); + const ti_int ti_min = -ti_max - 1; + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + ti_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); + ti_int r = fb.u.low.all; + if ((unsigned)e >= sizeof(ti_int) * CHAR_BIT) + return a > 0 ? ti_max : ti_min; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return (r ^ s) - s; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/floatdidf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatdidf.c +++ compiler-rt/trunk/lib/builtins/floatdidf.c @@ -19,95 +19,89 @@ * di_int is a 64 bit integral type */ -/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm + * mmmm */ #ifndef __SOFT_FP__ -/* Support for systems that have hardware floating-point; we'll set the inexact flag - * as a side-effect of this computation. +/* Support for systems that have hardware floating-point; we'll set the inexact + * flag as a side-effect of this computation. */ -COMPILER_RT_ABI double -__floatdidf(di_int a) -{ - static const double twop52 = 4503599627370496.0; // 0x1.0p52 - static const double twop32 = 4294967296.0; // 0x1.0p32 +COMPILER_RT_ABI double __floatdidf(di_int a) { + static const double twop52 = 4503599627370496.0; // 0x1.0p52 + static const double twop32 = 4294967296.0; // 0x1.0p32 + + union { + int64_t x; + double d; + } low = {.d = twop52}; - union { int64_t x; double d; } low = { .d = twop52 }; + const double high = (int32_t)(a >> 32) * twop32; + low.x |= a & INT64_C(0x00000000ffffffff); - const double high = (int32_t)(a >> 32) * twop32; - low.x |= a & INT64_C(0x00000000ffffffff); - - const double result = (high - twop52) + low.d; - return result; + const double result = (high - twop52) + low.d; + return result; } #else -/* Support for systems that don't have hardware floating-point; there are no flags to - * set, and we don't want to code-gen to an unknown soft-float implementation. +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. */ -COMPILER_RT_ABI double -__floatdidf(di_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(di_int) * CHAR_BIT; - const di_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __builtin_clzll(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > DBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit DBL_MANT_DIG-1 bits to the right of 1 - * Q = bit DBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = ((du_int)a >> (sd - (DBL_MANT_DIG+2))) | - ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ - if (a & ((du_int)1 << DBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to DBL_MANT_DIG bits */ - } - else - { - a <<= (DBL_MANT_DIG - sd); - /* a is now rounded to DBL_MANT_DIG bits */ +COMPILER_RT_ABI double __floatdidf(di_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((du_int)1 << DBL_MANT_DIG)) { + a >>= 1; + ++e; } - double_bits fb; - fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */ - ((e + 1023) << 20) | /* exponent */ - ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ - fb.u.s.low = (su_int)a; /* mantissa-low */ - return fb.f; + /* a is now rounded to DBL_MANT_DIG bits */ + } else { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */ + ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; } #endif #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI double __aeabi_l2d(di_int a) { - return __floatdidf(a); -} +AEABI_RTABI double __aeabi_l2d(di_int a) { return __floatdidf(a); } #else AEABI_RTABI double __aeabi_l2d(di_int a) COMPILER_RT_ALIAS(__floatdidf); #endif Index: compiler-rt/trunk/lib/builtins/floatdisf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatdisf.c +++ compiler-rt/trunk/lib/builtins/floatdisf.c @@ -13,74 +13,65 @@ /* Returns: convert a to a float, rounding toward even.*/ -/* Assumption: float is a IEEE 32 bit floating point type +/* Assumption: float is a IEEE 32 bit floating point type * di_int is a 64 bit integral type - */ + */ /* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ #include "int_lib.h" -COMPILER_RT_ABI float -__floatdisf(di_int a) -{ - if (a == 0) - return 0.0F; - const unsigned N = sizeof(di_int) * CHAR_BIT; - const di_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __builtin_clzll(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > FLT_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit FLT_MANT_DIG-1 bits to the right of 1 - * Q = bit FLT_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case FLT_MANT_DIG + 1: - a <<= 1; - break; - case FLT_MANT_DIG + 2: - break; - default: - a = ((du_int)a >> (sd - (FLT_MANT_DIG+2))) | - ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ - if (a & ((du_int)1 << FLT_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to FLT_MANT_DIG bits */ - } - else - { - a <<= (FLT_MANT_DIG - sd); - /* a is now rounded to FLT_MANT_DIG bits */ +COMPILER_RT_ABI float __floatdisf(di_int a) { + if (a == 0) + return 0.0F; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > FLT_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((du_int)1 << FLT_MANT_DIG)) { + a >>= 1; + ++e; } - float_bits fb; - fb.u = ((su_int)s & 0x80000000) | /* sign */ - ((e + 127) << 23) | /* exponent */ - ((su_int)a & 0x007FFFFF); /* mantissa */ - return fb.f; + /* a is now rounded to FLT_MANT_DIG bits */ + } else { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | /* sign */ + ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI float __aeabi_l2f(di_int a) { - return __floatdisf(a); -} +AEABI_RTABI float __aeabi_l2f(di_int a) { return __floatdisf(a); } #else AEABI_RTABI float __aeabi_l2f(di_int a) COMPILER_RT_ALIAS(__floatdisf); #endif Index: compiler-rt/trunk/lib/builtins/floatditf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatditf.c +++ compiler-rt/trunk/lib/builtins/floatditf.c @@ -18,32 +18,32 @@ #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) COMPILER_RT_ABI fp_t __floatditf(di_int a) { - const int aWidth = sizeof a * CHAR_BIT; + const int aWidth = sizeof a * CHAR_BIT; - // Handle zero as a special case to protect clz - if (a == 0) - return fromRep(0); - - // All other cases begin by extracting the sign and absolute value of a - rep_t sign = 0; - du_int aAbs = (du_int)a; - if (a < 0) { - sign = signBit; - aAbs = ~(du_int)a + 1U; - } - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clzll(aAbs); - rep_t result; - - // Shift a into the significand field, rounding if it is a right-shift - const int shift = significandBits - exponent; - result = (rep_t)aAbs << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - // Insert the sign bit and return - return fromRep(result | sign); + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + du_int aAbs = (du_int)a; + if (a < 0) { + sign = signBit; + aAbs = ~(du_int)a + 1U; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(aAbs); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); } #endif Index: compiler-rt/trunk/lib/builtins/floatdixf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatdixf.c +++ compiler-rt/trunk/lib/builtins/floatdixf.c @@ -9,7 +9,7 @@ * This file implements __floatdixf for the compiler_rt library. * * ===----------------------------------------------------------------------=== - */ + */ #if !_ARCH_PPC @@ -17,29 +17,28 @@ /* Returns: convert a to a long double, rounding toward even. */ -/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits - * di_int is a 64 bit integral type +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 + * bits di_int is a 64 bit integral type */ -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee + * eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ -COMPILER_RT_ABI long double -__floatdixf(di_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(di_int) * CHAR_BIT; - const di_int s = a >> (N-1); - a = (a ^ s) - s; - int clz = __builtin_clzll(a); - int e = (N - 1) - clz ; /* exponent */ - long_double_bits fb; - fb.u.high.s.low = ((su_int)s & 0x00008000) | /* sign */ - (e + 16383); /* exponent */ - fb.u.low.all = a << clz; /* mantissa */ - return fb.f; +COMPILER_RT_ABI long double __floatdixf(di_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N - 1); + a = (a ^ s) - s; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz; /* exponent */ + long_double_bits fb; + fb.u.high.s.low = ((su_int)s & 0x00008000) | /* sign */ + (e + 16383); /* exponent */ + fb.u.low.all = a << clz; /* mantissa */ + return fb.f; } #endif /* !_ARCH_PPC */ Index: compiler-rt/trunk/lib/builtins/floatsidf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatsidf.c +++ compiler-rt/trunk/lib/builtins/floatsidf.c @@ -17,43 +17,40 @@ #include "int_lib.h" -COMPILER_RT_ABI fp_t -__floatsidf(int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) - return fromRep(0); - - // All other cases begin by extracting the sign and absolute value of a - rep_t sign = 0; - if (a < 0) { - sign = signBit; - a = -a; - } - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. Extra - // cast to unsigned int is necessary to get the correct behavior for - // the input INT_MIN. - const int shift = significandBits - exponent; - result = (rep_t)(unsigned int)a << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - // Insert the sign bit and return - return fromRep(result | sign); +COMPILER_RT_ABI fp_t __floatsidf(int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. Extra + // cast to unsigned int is necessary to get the correct behavior for + // the input INT_MIN. + const int shift = significandBits - exponent; + result = (rep_t)(unsigned int)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_i2d(int a) { - return __floatsidf(a); -} +AEABI_RTABI fp_t __aeabi_i2d(int a) { return __floatsidf(a); } #else AEABI_RTABI fp_t __aeabi_i2d(int a) COMPILER_RT_ALIAS(__floatsidf); #endif Index: compiler-rt/trunk/lib/builtins/floatsisf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatsisf.c +++ compiler-rt/trunk/lib/builtins/floatsisf.c @@ -17,49 +17,48 @@ #include "int_lib.h" -COMPILER_RT_ABI fp_t -__floatsisf(int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) - return fromRep(0); - - // All other cases begin by extracting the sign and absolute value of a - rep_t sign = 0; - if (a < 0) { - sign = signBit; - a = -a; - } - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field, rounding if it is a right-shift - if (exponent <= significandBits) { - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - } else { - const int shift = exponent - significandBits; - result = (rep_t)a >> shift ^ implicitBit; - rep_t round = (rep_t)a << (typeWidth - shift); - if (round > signBit) result++; - if (round == signBit) result += result & 1; - } - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - // Insert the sign bit and return - return fromRep(result | sign); +COMPILER_RT_ABI fp_t __floatsisf(int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) + result++; + if (round == signBit) + result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_i2f(int a) { - return __floatsisf(a); -} +AEABI_RTABI fp_t __aeabi_i2f(int a) { return __floatsisf(a); } #else AEABI_RTABI fp_t __aeabi_i2f(int a) COMPILER_RT_ALIAS(__floatsisf); #endif Index: compiler-rt/trunk/lib/builtins/floatsitf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatsitf.c +++ compiler-rt/trunk/lib/builtins/floatsitf.c @@ -18,32 +18,32 @@ #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) COMPILER_RT_ABI fp_t __floatsitf(int a) { - const int aWidth = sizeof a * CHAR_BIT; + const int aWidth = sizeof a * CHAR_BIT; - // Handle zero as a special case to protect clz - if (a == 0) - return fromRep(0); - - // All other cases begin by extracting the sign and absolute value of a - rep_t sign = 0; - unsigned aAbs = (unsigned)a; - if (a < 0) { - sign = signBit; - aAbs = ~(unsigned)a + 1U; - } - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(aAbs); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. - const int shift = significandBits - exponent; - result = (rep_t)aAbs << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - // Insert the sign bit and return - return fromRep(result | sign); + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + unsigned aAbs = (unsigned)a; + if (a < 0) { + sign = signBit; + aAbs = ~(unsigned)a + 1U; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(aAbs); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); } #endif Index: compiler-rt/trunk/lib/builtins/floattidf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floattidf.c +++ compiler-rt/trunk/lib/builtins/floattidf.c @@ -21,62 +21,56 @@ * ti_int is a 128 bit integral type */ -/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm + * mmmm */ -COMPILER_RT_ABI double -__floattidf(ti_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(ti_int) * CHAR_BIT; - const ti_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > DBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit DBL_MANT_DIG-1 bits to the right of 1 - * Q = bit DBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = ((tu_int)a >> (sd - (DBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << DBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to DBL_MANT_DIG bits */ +COMPILER_RT_ABI double __floattidf(ti_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << DBL_MANT_DIG)) { + a >>= 1; + ++e; } - else - { - a <<= (DBL_MANT_DIG - sd); - /* a is now rounded to DBL_MANT_DIG bits */ - } - double_bits fb; - fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */ - ((e + 1023) << 20) | /* exponent */ + /* a is now rounded to DBL_MANT_DIG bits */ + } else { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((su_int)s & 0x80000000) | /* sign */ + ((e + 1023) << 20) | /* exponent */ ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ - fb.u.s.low = (su_int)a; /* mantissa-low */ - return fb.f; + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/floattisf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floattisf.c +++ compiler-rt/trunk/lib/builtins/floattisf.c @@ -17,65 +17,58 @@ /* Returns: convert a to a float, rounding toward even. */ -/* Assumption: float is a IEEE 32 bit floating point type +/* Assumption: float is a IEEE 32 bit floating point type * ti_int is a 128 bit integral type */ /* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ -COMPILER_RT_ABI float -__floattisf(ti_int a) -{ - if (a == 0) - return 0.0F; - const unsigned N = sizeof(ti_int) * CHAR_BIT; - const ti_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > FLT_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit FLT_MANT_DIG-1 bits to the right of 1 - * Q = bit FLT_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case FLT_MANT_DIG + 1: - a <<= 1; - break; - case FLT_MANT_DIG + 2: - break; - default: - a = ((tu_int)a >> (sd - (FLT_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << FLT_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to FLT_MANT_DIG bits */ +COMPILER_RT_ABI float __floattisf(ti_int a) { + if (a == 0) + return 0.0F; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > FLT_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << FLT_MANT_DIG)) { + a >>= 1; + ++e; } - else - { - a <<= (FLT_MANT_DIG - sd); - /* a is now rounded to FLT_MANT_DIG bits */ - } - float_bits fb; - fb.u = ((su_int)s & 0x80000000) | /* sign */ - ((e + 127) << 23) | /* exponent */ - ((su_int)a & 0x007FFFFF); /* mantissa */ - return fb.f; + /* a is now rounded to FLT_MANT_DIG bits */ + } else { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | /* sign */ + ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/floattitf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floattitf.c +++ compiler-rt/trunk/lib/builtins/floattitf.c @@ -22,60 +22,60 @@ * ti_int is a 128 bit integral type */ -/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | - * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm + * mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t -__floattitf(ti_int a) { - if (a == 0) - return 0.0; - const unsigned N = sizeof(ti_int) * CHAR_BIT; - const ti_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > LDBL_MANT_DIG) { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit LDBL_MANT_DIG-1 bits to the right of 1 - * Q = bit LDBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) { - case LDBL_MANT_DIG + 1: - a <<= 1; - break; - case LDBL_MANT_DIG + 2: - break; - default: - a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << LDBL_MANT_DIG)) { - a >>= 1; - ++e; - } - /* a is now rounded to LDBL_MANT_DIG bits */ - } else { - a <<= (LDBL_MANT_DIG - sd); - /* a is now rounded to LDBL_MANT_DIG bits */ +COMPILER_RT_ABI fp_t __floattitf(ti_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (LDBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; } - - long_double_bits fb; - fb.u.high.all = (s & 0x8000000000000000LL) /* sign */ - | (du_int)(e + 16383) << 48 /* exponent */ - | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ - fb.u.low.all = (du_int)(a); - return fb.f; + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + + long_double_bits fb; + fb.u.high.all = (s & 0x8000000000000000LL) /* sign */ + | (du_int)(e + 16383) << 48 /* exponent */ + | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ + fb.u.low.all = (du_int)(a); + return fb.f; } #endif Index: compiler-rt/trunk/lib/builtins/floattixf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floattixf.c +++ compiler-rt/trunk/lib/builtins/floattixf.c @@ -17,67 +17,61 @@ /* Returns: convert a to a long double, rounding toward even. */ -/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits - * ti_int is a 128 bit integral type +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 + * bits ti_int is a 128 bit integral type */ -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee + * eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ -COMPILER_RT_ABI long double -__floattixf(ti_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(ti_int) * CHAR_BIT; - const ti_int s = a >> (N-1); - a = (a ^ s) - s; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > LDBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit LDBL_MANT_DIG-1 bits to the right of 1 - * Q = bit LDBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case LDBL_MANT_DIG + 1: - a <<= 1; - break; - case LDBL_MANT_DIG + 2: - break; - default: - a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << LDBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to LDBL_MANT_DIG bits */ +COMPILER_RT_ABI long double __floattixf(ti_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (LDBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; } - else - { - a <<= (LDBL_MANT_DIG - sd); - /* a is now rounded to LDBL_MANT_DIG bits */ - } - long_double_bits fb; - fb.u.high.s.low = ((su_int)s & 0x8000) | /* sign */ - (e + 16383); /* exponent */ - fb.u.low.all = (du_int)a; /* mantissa */ - return fb.f; + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + long_double_bits fb; + fb.u.high.s.low = ((su_int)s & 0x8000) | /* sign */ + (e + 16383); /* exponent */ + fb.u.low.all = (du_int)a; /* mantissa */ + return fb.f; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/floatundidf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatundidf.c +++ compiler-rt/trunk/lib/builtins/floatundidf.c @@ -17,96 +17,94 @@ * du_int is a 64 bit integral type */ -/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm + * mmmm */ #include "int_lib.h" #ifndef __SOFT_FP__ -/* Support for systems that have hardware floating-point; we'll set the inexact flag - * as a side-effect of this computation. +/* Support for systems that have hardware floating-point; we'll set the inexact + * flag as a side-effect of this computation. */ -COMPILER_RT_ABI double -__floatundidf(du_int a) -{ - static const double twop52 = 4503599627370496.0; // 0x1.0p52 - static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84 - static const double twop84_plus_twop52 = 19342813118337666422669312.0; // 0x1.00000001p84 +COMPILER_RT_ABI double __floatundidf(du_int a) { + static const double twop52 = 4503599627370496.0; // 0x1.0p52 + static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84 + static const double twop84_plus_twop52 = + 19342813118337666422669312.0; // 0x1.00000001p84 + + union { + uint64_t x; + double d; + } high = {.d = twop84}; + union { + uint64_t x; + double d; + } low = {.d = twop52}; - union { uint64_t x; double d; } high = { .d = twop84 }; - union { uint64_t x; double d; } low = { .d = twop52 }; + high.x |= a >> 32; + low.x |= a & UINT64_C(0x00000000ffffffff); - high.x |= a >> 32; - low.x |= a & UINT64_C(0x00000000ffffffff); - - const double result = (high.d - twop84_plus_twop52) + low.d; - return result; + const double result = (high.d - twop84_plus_twop52) + low.d; + return result; } #else -/* Support for systems that don't have hardware floating-point; there are no flags to - * set, and we don't want to code-gen to an unknown soft-float implementation. +/* Support for systems that don't have hardware floating-point; there are no + * flags to set, and we don't want to code-gen to an unknown soft-float + * implementation. */ -COMPILER_RT_ABI double -__floatundidf(du_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(du_int) * CHAR_BIT; - int sd = N - __builtin_clzll(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > DBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit DBL_MANT_DIG-1 bits to the right of 1 - * Q = bit DBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (DBL_MANT_DIG+2))) | - ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ - if (a & ((du_int)1 << DBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to DBL_MANT_DIG bits */ - } - else - { - a <<= (DBL_MANT_DIG - sd); - /* a is now rounded to DBL_MANT_DIG bits */ +COMPILER_RT_ABI double __floatundidf(du_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((du_int)1 << DBL_MANT_DIG)) { + a >>= 1; + ++e; } - double_bits fb; - fb.u.s.high = ((e + 1023) << 20) | /* exponent */ - ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ - fb.u.s.low = (su_int)a; /* mantissa-low */ - return fb.f; + /* a is now rounded to DBL_MANT_DIG bits */ + } else { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((e + 1023) << 20) | /* exponent */ + ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; } #endif #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI double __aeabi_ul2d(du_int a) { - return __floatundidf(a); -} +AEABI_RTABI double __aeabi_ul2d(du_int a) { return __floatundidf(a); } #else AEABI_RTABI double __aeabi_ul2d(du_int a) COMPILER_RT_ALIAS(__floatundidf); #endif Index: compiler-rt/trunk/lib/builtins/floatundisf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatundisf.c +++ compiler-rt/trunk/lib/builtins/floatundisf.c @@ -13,7 +13,7 @@ /* Returns: convert a to a float, rounding toward even. */ -/* Assumption: float is a IEEE 32 bit floating point type +/* Assumption: float is a IEEE 32 bit floating point type * du_int is a 64 bit integral type */ @@ -21,63 +21,54 @@ #include "int_lib.h" -COMPILER_RT_ABI float -__floatundisf(du_int a) -{ - if (a == 0) - return 0.0F; - const unsigned N = sizeof(du_int) * CHAR_BIT; - int sd = N - __builtin_clzll(a); /* number of significant digits */ - int e = sd - 1; /* 8 exponent */ - if (sd > FLT_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit FLT_MANT_DIG-1 bits to the right of 1 - * Q = bit FLT_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case FLT_MANT_DIG + 1: - a <<= 1; - break; - case FLT_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (FLT_MANT_DIG+2))) | - ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ - if (a & ((du_int)1 << FLT_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to FLT_MANT_DIG bits */ +COMPILER_RT_ABI float __floatundisf(du_int a) { + if (a == 0) + return 0.0F; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); /* number of significant digits */ + int e = sd - 1; /* 8 exponent */ + if (sd > FLT_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((du_int)1 << FLT_MANT_DIG)) { + a >>= 1; + ++e; } - else - { - a <<= (FLT_MANT_DIG - sd); - /* a is now rounded to FLT_MANT_DIG bits */ - } - float_bits fb; - fb.u = ((e + 127) << 23) | /* exponent */ - ((su_int)a & 0x007FFFFF); /* mantissa */ - return fb.f; + /* a is now rounded to FLT_MANT_DIG bits */ + } else { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI float __aeabi_ul2f(du_int a) { - return __floatundisf(a); -} +AEABI_RTABI float __aeabi_ul2f(du_int a) { return __floatundisf(a); } #else AEABI_RTABI float __aeabi_ul2f(du_int a) COMPILER_RT_ALIAS(__floatundisf); #endif Index: compiler-rt/trunk/lib/builtins/floatunditf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatunditf.c +++ compiler-rt/trunk/lib/builtins/floatunditf.c @@ -18,22 +18,23 @@ #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) COMPILER_RT_ABI fp_t __floatunditf(du_int a) { - const int aWidth = sizeof a * CHAR_BIT; + const int aWidth = sizeof a * CHAR_BIT; - // Handle zero as a special case to protect clz - if (a == 0) return fromRep(0); - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clzll(a); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - return fromRep(result); + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); } #endif Index: compiler-rt/trunk/lib/builtins/floatundixf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatundixf.c +++ compiler-rt/trunk/lib/builtins/floatundixf.c @@ -17,25 +17,24 @@ /* Returns: convert a to a long double, rounding toward even. */ -/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits - * du_int is a 64 bit integral type +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 + * bits du_int is a 64 bit integral type */ -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee + * eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ -COMPILER_RT_ABI long double -__floatundixf(du_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(du_int) * CHAR_BIT; - int clz = __builtin_clzll(a); - int e = (N - 1) - clz ; /* exponent */ - long_double_bits fb; - fb.u.high.s.low = (e + 16383); /* exponent */ - fb.u.low.all = a << clz; /* mantissa */ - return fb.f; +COMPILER_RT_ABI long double __floatundixf(du_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz; /* exponent */ + long_double_bits fb; + fb.u.high.s.low = (e + 16383); /* exponent */ + fb.u.low.all = a << clz; /* mantissa */ + return fb.f; } #endif /* _ARCH_PPC */ Index: compiler-rt/trunk/lib/builtins/floatunsidf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatunsidf.c +++ compiler-rt/trunk/lib/builtins/floatunsidf.c @@ -17,32 +17,30 @@ #include "int_lib.h" -COMPILER_RT_ABI fp_t -__floatunsidf(unsigned int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) return fromRep(0); - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - return fromRep(result); +COMPILER_RT_ABI fp_t __floatunsidf(unsigned int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) { - return __floatunsidf(a); -} +AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) { return __floatunsidf(a); } #else AEABI_RTABI fp_t __aeabi_ui2d(unsigned int a) COMPILER_RT_ALIAS(__floatunsidf); #endif Index: compiler-rt/trunk/lib/builtins/floatunsisf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatunsisf.c +++ compiler-rt/trunk/lib/builtins/floatunsisf.c @@ -17,40 +17,40 @@ #include "int_lib.h" -COMPILER_RT_ABI fp_t -__floatunsisf(unsigned int a) { - - const int aWidth = sizeof a * CHAR_BIT; - - // Handle zero as a special case to protect clz - if (a == 0) return fromRep(0); - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field, rounding if it is a right-shift - if (exponent <= significandBits) { - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - } else { - const int shift = exponent - significandBits; - result = (rep_t)a >> shift ^ implicitBit; - rep_t round = (rep_t)a << (typeWidth - shift); - if (round > signBit) result++; - if (round == signBit) result += result & 1; - } - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - return fromRep(result); +COMPILER_RT_ABI fp_t __floatunsisf(unsigned int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) + result++; + if (round == signBit) + result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) { - return __floatunsisf(a); -} +AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) { return __floatunsisf(a); } #else AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) COMPILER_RT_ALIAS(__floatunsisf); #endif Index: compiler-rt/trunk/lib/builtins/floatunsitf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatunsitf.c +++ compiler-rt/trunk/lib/builtins/floatunsitf.c @@ -18,22 +18,23 @@ #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) COMPILER_RT_ABI fp_t __floatunsitf(unsigned int a) { - const int aWidth = sizeof a * CHAR_BIT; + const int aWidth = sizeof a * CHAR_BIT; - // Handle zero as a special case to protect clz - if (a == 0) return fromRep(0); - - // Exponent of (fp_t)a is the width of abs(a). - const int exponent = (aWidth - 1) - __builtin_clz(a); - rep_t result; - - // Shift a into the significand field and clear the implicit bit. - const int shift = significandBits - exponent; - result = (rep_t)a << shift ^ implicitBit; - - // Insert the exponent - result += (rep_t)(exponent + exponentBias) << significandBits; - return fromRep(result); + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clz(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); } #endif Index: compiler-rt/trunk/lib/builtins/floatuntidf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatuntidf.c +++ compiler-rt/trunk/lib/builtins/floatuntidf.c @@ -21,59 +21,53 @@ * tu_int is a 128 bit integral type */ -/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm */ +/* seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm + * mmmm */ -COMPILER_RT_ABI double -__floatuntidf(tu_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(tu_int) * CHAR_BIT; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > DBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit DBL_MANT_DIG-1 bits to the right of 1 - * Q = bit DBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case DBL_MANT_DIG + 1: - a <<= 1; - break; - case DBL_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (DBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << DBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to DBL_MANT_DIG bits */ +COMPILER_RT_ABI double __floatuntidf(tu_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > DBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit DBL_MANT_DIG-1 bits to the right of 1 + * Q = bit DBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << DBL_MANT_DIG)) { + a >>= 1; + ++e; } - else - { - a <<= (DBL_MANT_DIG - sd); - /* a is now rounded to DBL_MANT_DIG bits */ - } - double_bits fb; - fb.u.s.high = ((e + 1023) << 20) | /* exponent */ + /* a is now rounded to DBL_MANT_DIG bits */ + } else { + a <<= (DBL_MANT_DIG - sd); + /* a is now rounded to DBL_MANT_DIG bits */ + } + double_bits fb; + fb.u.s.high = ((e + 1023) << 20) | /* exponent */ ((su_int)(a >> 32) & 0x000FFFFF); /* mantissa-high */ - fb.u.s.low = (su_int)a; /* mantissa-low */ - return fb.f; + fb.u.s.low = (su_int)a; /* mantissa-low */ + return fb.f; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/floatuntisf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatuntisf.c +++ compiler-rt/trunk/lib/builtins/floatuntisf.c @@ -17,62 +17,55 @@ /* Returns: convert a to a float, rounding toward even. */ -/* Assumption: float is a IEEE 32 bit floating point type +/* Assumption: float is a IEEE 32 bit floating point type * tu_int is a 128 bit integral type */ /* seee eeee emmm mmmm mmmm mmmm mmmm mmmm */ -COMPILER_RT_ABI float -__floatuntisf(tu_int a) -{ - if (a == 0) - return 0.0F; - const unsigned N = sizeof(tu_int) * CHAR_BIT; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > FLT_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit FLT_MANT_DIG-1 bits to the right of 1 - * Q = bit FLT_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case FLT_MANT_DIG + 1: - a <<= 1; - break; - case FLT_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (FLT_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << FLT_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to FLT_MANT_DIG bits */ +COMPILER_RT_ABI float __floatuntisf(tu_int a) { + if (a == 0) + return 0.0F; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > FLT_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit FLT_MANT_DIG-1 bits to the right of 1 + * Q = bit FLT_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << FLT_MANT_DIG)) { + a >>= 1; + ++e; } - else - { - a <<= (FLT_MANT_DIG - sd); - /* a is now rounded to FLT_MANT_DIG bits */ - } - float_bits fb; - fb.u = ((e + 127) << 23) | /* exponent */ - ((su_int)a & 0x007FFFFF); /* mantissa */ - return fb.f; + /* a is now rounded to FLT_MANT_DIG bits */ + } else { + a <<= (FLT_MANT_DIG - sd); + /* a is now rounded to FLT_MANT_DIG bits */ + } + float_bits fb; + fb.u = ((e + 127) << 23) | /* exponent */ + ((su_int)a & 0x007FFFFF); /* mantissa */ + return fb.f; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/floatuntitf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatuntitf.c +++ compiler-rt/trunk/lib/builtins/floatuntitf.c @@ -22,57 +22,57 @@ * tu_int is a 128 bit integral type */ -/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | - * mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm + * mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) -COMPILER_RT_ABI fp_t -__floatuntitf(tu_int a) { - if (a == 0) - return 0.0; - const unsigned N = sizeof(tu_int) * CHAR_BIT; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > LDBL_MANT_DIG) { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit LDBL_MANT_DIG-1 bits to the right of 1 - * Q = bit LDBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) { - case LDBL_MANT_DIG + 1: - a <<= 1; - break; - case LDBL_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (LDBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << LDBL_MANT_DIG)) { - a >>= 1; - ++e; - } - /* a is now rounded to LDBL_MANT_DIG bits */ - } else { - a <<= (LDBL_MANT_DIG - sd); - /* a is now rounded to LDBL_MANT_DIG bits */ +COMPILER_RT_ABI fp_t __floatuntitf(tu_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (LDBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; } - - long_double_bits fb; - fb.u.high.all = (du_int)(e + 16383) << 48 /* exponent */ - | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ - fb.u.low.all = (du_int)(a); - return fb.f; + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + + long_double_bits fb; + fb.u.high.all = (du_int)(e + 16383) << 48 /* exponent */ + | ((a >> 64) & 0x0000ffffffffffffLL); /* significand */ + fb.u.low.all = (du_int)(a); + return fb.f; } #endif Index: compiler-rt/trunk/lib/builtins/floatuntixf.c =================================================================== --- compiler-rt/trunk/lib/builtins/floatuntixf.c +++ compiler-rt/trunk/lib/builtins/floatuntixf.c @@ -17,64 +17,58 @@ /* Returns: convert a to a long double, rounding toward even. */ -/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits - * tu_int is a 128 bit integral type +/* Assumption: long double is a IEEE 80 bit floating point type padded to 128 + * bits tu_int is a 128 bit integral type */ -/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | - * 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm +/* gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee + * eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm + * mmmm mmmm mmmm */ -COMPILER_RT_ABI long double -__floatuntixf(tu_int a) -{ - if (a == 0) - return 0.0; - const unsigned N = sizeof(tu_int) * CHAR_BIT; - int sd = N - __clzti2(a); /* number of significant digits */ - int e = sd - 1; /* exponent */ - if (sd > LDBL_MANT_DIG) - { - /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx - * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR - * 12345678901234567890123456 - * 1 = msb 1 bit - * P = bit LDBL_MANT_DIG-1 bits to the right of 1 - * Q = bit LDBL_MANT_DIG bits to the right of 1 - * R = "or" of all bits to the right of Q - */ - switch (sd) - { - case LDBL_MANT_DIG + 1: - a <<= 1; - break; - case LDBL_MANT_DIG + 2: - break; - default: - a = (a >> (sd - (LDBL_MANT_DIG+2))) | - ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); - }; - /* finish: */ - a |= (a & 4) != 0; /* Or P into R */ - ++a; /* round - this step may add a significant bit */ - a >>= 2; /* dump Q and R */ - /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ - if (a & ((tu_int)1 << LDBL_MANT_DIG)) - { - a >>= 1; - ++e; - } - /* a is now rounded to LDBL_MANT_DIG bits */ +COMPILER_RT_ABI long double __floatuntixf(tu_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); /* number of significant digits */ + int e = sd - 1; /* exponent */ + if (sd > LDBL_MANT_DIG) { + /* start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + * finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + * 12345678901234567890123456 + * 1 = msb 1 bit + * P = bit LDBL_MANT_DIG-1 bits to the right of 1 + * Q = bit LDBL_MANT_DIG bits to the right of 1 + * R = "or" of all bits to the right of Q + */ + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (LDBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG + 2) - sd))) != 0); + }; + /* finish: */ + a |= (a & 4) != 0; /* Or P into R */ + ++a; /* round - this step may add a significant bit */ + a >>= 2; /* dump Q and R */ + /* a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits */ + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; } - else - { - a <<= (LDBL_MANT_DIG - sd); - /* a is now rounded to LDBL_MANT_DIG bits */ - } - long_double_bits fb; - fb.u.high.s.low = (e + 16383); /* exponent */ - fb.u.low.all = (du_int)a; /* mantissa */ - return fb.f; + /* a is now rounded to LDBL_MANT_DIG bits */ + } else { + a <<= (LDBL_MANT_DIG - sd); + /* a is now rounded to LDBL_MANT_DIG bits */ + } + long_double_bits fb; + fb.u.high.s.low = (e + 16383); /* exponent */ + fb.u.low.all = (du_int)a; /* mantissa */ + return fb.f; } #endif Index: compiler-rt/trunk/lib/builtins/fp_add_impl.inc =================================================================== --- compiler-rt/trunk/lib/builtins/fp_add_impl.inc +++ compiler-rt/trunk/lib/builtins/fp_add_impl.inc @@ -14,130 +14,143 @@ #include "fp_lib.h" static __inline fp_t __addXf3__(fp_t a, fp_t b) { - rep_t aRep = toRep(a); - rep_t bRep = toRep(b); - const rep_t aAbs = aRep & absMask; - const rep_t bAbs = bRep & absMask; - - // Detect if a or b is zero, infinity, or NaN. - if (aAbs - REP_C(1) >= infRep - REP_C(1) || - bAbs - REP_C(1) >= infRep - REP_C(1)) { - // NaN + anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything + NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // +/-infinity + -/+infinity = qNaN - if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep); - // +/-infinity + anything remaining = +/- infinity - else return a; - } - - // anything remaining + +/-infinity = +/-infinity - if (bAbs == infRep) return b; - - // zero + anything = anything - if (!aAbs) { - // but we need to get the sign right for zero + zero - if (!bAbs) return fromRep(toRep(a) & toRep(b)); - else return b; - } - - // anything + zero = anything - if (!bAbs) return a; - } - - // Swap a and b if necessary so that a has the larger absolute value. - if (bAbs > aAbs) { - const rep_t temp = aRep; - aRep = bRep; - bRep = temp; - } - - // Extract the exponent and significand from the (possibly swapped) a and b. - int aExponent = aRep >> significandBits & maxExponent; - int bExponent = bRep >> significandBits & maxExponent; - rep_t aSignificand = aRep & significandMask; - rep_t bSignificand = bRep & significandMask; - - // Normalize any denormals, and adjust the exponent accordingly. - if (aExponent == 0) aExponent = normalize(&aSignificand); - if (bExponent == 0) bExponent = normalize(&bSignificand); - - // The sign of the result is the sign of the larger operand, a. If they - // have opposite signs, we are performing a subtraction; otherwise addition. - const rep_t resultSign = aRep & signBit; - const bool subtraction = (aRep ^ bRep) & signBit; - - // Shift the significands to give us round, guard and sticky, and or in the - // implicit significand bit. (If we fell through from the denormal path it - // was already set by normalize( ), but setting it twice won't hurt - // anything.) - aSignificand = (aSignificand | implicitBit) << 3; - bSignificand = (bSignificand | implicitBit) << 3; - - // Shift the significand of b by the difference in exponents, with a sticky - // bottom bit to get rounding correct. - const unsigned int align = aExponent - bExponent; - if (align) { - if (align < typeWidth) { - const bool sticky = bSignificand << (typeWidth - align); - bSignificand = bSignificand >> align | sticky; - } else { - bSignificand = 1; // sticky; b is known to be non-zero. - } - } - if (subtraction) { - aSignificand -= bSignificand; - // If a == -b, return +zero. - if (aSignificand == 0) return fromRep(0); - - // If partial cancellation occured, we need to left-shift the result - // and adjust the exponent: - if (aSignificand < implicitBit << 3) { - const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); - aSignificand <<= shift; - aExponent -= shift; - } - } - else /* addition */ { - aSignificand += bSignificand; - - // If the addition carried up, we need to right-shift the result and - // adjust the exponent: - if (aSignificand & implicitBit << 4) { - const bool sticky = aSignificand & 1; - aSignificand = aSignificand >> 1 | sticky; - aExponent += 1; - } - } - - // If we have overflowed the type, return +/- infinity: - if (aExponent >= maxExponent) return fromRep(infRep | resultSign); - - if (aExponent <= 0) { - // Result is denormal before rounding; the exponent is zero and we - // need to shift the significand. - const int shift = 1 - aExponent; - const bool sticky = aSignificand << (typeWidth - shift); - aSignificand = aSignificand >> shift | sticky; - aExponent = 0; - } - - // Low three bits are round, guard, and sticky. - const int roundGuardSticky = aSignificand & 0x7; - - // Shift the significand into place, and mask off the implicit bit. - rep_t result = aSignificand >> 3 & significandMask; - - // Insert the exponent and sign. - result |= (rep_t)aExponent << significandBits; - result |= resultSign; - - // Final rounding. The result may overflow to infinity, but that is the - // correct result in that case. - if (roundGuardSticky > 0x4) result++; - if (roundGuardSticky == 0x4) result += result & 1; - return fromRep(result); + rep_t aRep = toRep(a); + rep_t bRep = toRep(b); + const rep_t aAbs = aRep & absMask; + const rep_t bAbs = bRep & absMask; + + // Detect if a or b is zero, infinity, or NaN. + if (aAbs - REP_C(1) >= infRep - REP_C(1) || + bAbs - REP_C(1) >= infRep - REP_C(1)) { + // NaN + anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything + NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // +/-infinity + -/+infinity = qNaN + if ((toRep(a) ^ toRep(b)) == signBit) + return fromRep(qnanRep); + // +/-infinity + anything remaining = +/- infinity + else + return a; + } + + // anything remaining + +/-infinity = +/-infinity + if (bAbs == infRep) + return b; + + // zero + anything = anything + if (!aAbs) { + // but we need to get the sign right for zero + zero + if (!bAbs) + return fromRep(toRep(a) & toRep(b)); + else + return b; + } + + // anything + zero = anything + if (!bAbs) + return a; + } + + // Swap a and b if necessary so that a has the larger absolute value. + if (bAbs > aAbs) { + const rep_t temp = aRep; + aRep = bRep; + bRep = temp; + } + + // Extract the exponent and significand from the (possibly swapped) a and b. + int aExponent = aRep >> significandBits & maxExponent; + int bExponent = bRep >> significandBits & maxExponent; + rep_t aSignificand = aRep & significandMask; + rep_t bSignificand = bRep & significandMask; + + // Normalize any denormals, and adjust the exponent accordingly. + if (aExponent == 0) + aExponent = normalize(&aSignificand); + if (bExponent == 0) + bExponent = normalize(&bSignificand); + + // The sign of the result is the sign of the larger operand, a. If they + // have opposite signs, we are performing a subtraction; otherwise addition. + const rep_t resultSign = aRep & signBit; + const bool subtraction = (aRep ^ bRep) & signBit; + + // Shift the significands to give us round, guard and sticky, and or in the + // implicit significand bit. (If we fell through from the denormal path it + // was already set by normalize( ), but setting it twice won't hurt + // anything.) + aSignificand = (aSignificand | implicitBit) << 3; + bSignificand = (bSignificand | implicitBit) << 3; + + // Shift the significand of b by the difference in exponents, with a sticky + // bottom bit to get rounding correct. + const unsigned int align = aExponent - bExponent; + if (align) { + if (align < typeWidth) { + const bool sticky = bSignificand << (typeWidth - align); + bSignificand = bSignificand >> align | sticky; + } else { + bSignificand = 1; // sticky; b is known to be non-zero. + } + } + if (subtraction) { + aSignificand -= bSignificand; + // If a == -b, return +zero. + if (aSignificand == 0) + return fromRep(0); + + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent: + if (aSignificand < implicitBit << 3) { + const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + aSignificand <<= shift; + aExponent -= shift; + } + } else /* addition */ { + aSignificand += bSignificand; + + // If the addition carried up, we need to right-shift the result and + // adjust the exponent: + if (aSignificand & implicitBit << 4) { + const bool sticky = aSignificand & 1; + aSignificand = aSignificand >> 1 | sticky; + aExponent += 1; + } + } + + // If we have overflowed the type, return +/- infinity: + if (aExponent >= maxExponent) + return fromRep(infRep | resultSign); + + if (aExponent <= 0) { + // Result is denormal before rounding; the exponent is zero and we + // need to shift the significand. + const int shift = 1 - aExponent; + const bool sticky = aSignificand << (typeWidth - shift); + aSignificand = aSignificand >> shift | sticky; + aExponent = 0; + } + + // Low three bits are round, guard, and sticky. + const int roundGuardSticky = aSignificand & 0x7; + + // Shift the significand into place, and mask off the implicit bit. + rep_t result = aSignificand >> 3 & significandMask; + + // Insert the exponent and sign. + result |= (rep_t)aExponent << significandBits; + result |= resultSign; + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + if (roundGuardSticky > 0x4) + result++; + if (roundGuardSticky == 0x4) + result += result & 1; + return fromRep(result); } Index: compiler-rt/trunk/lib/builtins/fp_extend.h =================================================================== --- compiler-rt/trunk/lib/builtins/fp_extend.h +++ compiler-rt/trunk/lib/builtins/fp_extend.h @@ -1,4 +1,5 @@ -//===-lib/fp_extend.h - low precision -> high precision conversion -*- C -*-===// +//===-lib/fp_extend.h - low precision -> high precision conversion -*- C +//-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -29,12 +30,12 @@ static const int srcSigBits = 52; static __inline int src_rep_t_clz(src_rep_t a) { #if defined __LP64__ - return __builtin_clzl(a); + return __builtin_clzl(a); #else - if (a & REP_C(0xffffffff00000000)) - return __builtin_clz(a >> 32); - else - return 32 + __builtin_clz(a & REP_C(0xffffffff)); + if (a & REP_C(0xffffffff00000000)) + return __builtin_clz(a >> 32); + else + return 32 + __builtin_clz(a & REP_C(0xffffffff)); #endif } @@ -47,7 +48,7 @@ #else #error Source should be half, single, or double precision! -#endif //end source precision +#endif // end source precision #if defined DST_SINGLE typedef float dst_t; @@ -69,20 +70,26 @@ #else #error Destination should be single, double, or quad precision! -#endif //end destination precision +#endif // end destination precision // End of specialization parameters. Two helper routines for conversion to and // from the representation of floating-point data as integer values follow. static __inline src_rep_t srcToRep(src_t x) { - const union { src_t f; src_rep_t i; } rep = {.f = x}; - return rep.i; + const union { + src_t f; + src_rep_t i; + } rep = {.f = x}; + return rep.i; } static __inline dst_t dstFromRep(dst_rep_t x) { - const union { dst_t f; dst_rep_t i; } rep = {.i = x}; - return rep.f; + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = x}; + return rep.f; } // End helper routines. Conversion implementation follows. -#endif //FP_EXTEND_HEADER +#endif // FP_EXTEND_HEADER Index: compiler-rt/trunk/lib/builtins/fp_extend_impl.inc =================================================================== --- compiler-rt/trunk/lib/builtins/fp_extend_impl.inc +++ compiler-rt/trunk/lib/builtins/fp_extend_impl.inc @@ -38,70 +38,70 @@ #include "fp_extend.h" static __inline dst_t __extendXfYf2__(src_t a) { - // Various constants whose values follow from the type parameters. - // Any reasonable optimizer will fold and propagate all of these. - const int srcBits = sizeof(src_t)*CHAR_BIT; - const int srcExpBits = srcBits - srcSigBits - 1; - const int srcInfExp = (1 << srcExpBits) - 1; - const int srcExpBias = srcInfExp >> 1; - - const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; - const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; - const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); - const src_rep_t srcAbsMask = srcSignMask - 1; - const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); - const src_rep_t srcNaNCode = srcQNaN - 1; - - const int dstBits = sizeof(dst_t)*CHAR_BIT; - const int dstExpBits = dstBits - dstSigBits - 1; - const int dstInfExp = (1 << dstExpBits) - 1; - const int dstExpBias = dstInfExp >> 1; - - const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; - - // Break a into a sign and representation of the absolute value - const src_rep_t aRep = srcToRep(a); - const src_rep_t aAbs = aRep & srcAbsMask; - const src_rep_t sign = aRep & srcSignMask; - dst_rep_t absResult; - - // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted - // to (signed) int. To avoid that, explicitly cast to src_rep_t. - if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { - // a is a normal number. - // Extend to the destination type by shifting the significand and - // exponent into the proper position and rebiasing the exponent. - absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); - absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; - } - - else if (aAbs >= srcInfinity) { - // a is NaN or infinity. - // Conjure the result by beginning with infinity, then setting the qNaN - // bit (if needed) and right-aligning the rest of the trailing NaN - // payload field. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); - absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); - } - - else if (aAbs) { - // a is denormal. - // renormalize the significand and clear the leading bit, then insert - // the correct adjusted exponent in the destination type. - const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); - absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); - absResult ^= dstMinNormal; - const int resultExponent = dstExpBias - srcExpBias - scale + 1; - absResult |= (dst_rep_t)resultExponent << dstSigBits; - } - - else { - // a is zero. - absResult = 0; - } - - // Apply the signbit to (dst_t)abs(a). - const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); - return dstFromRep(result); + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(src_t) * CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t) * CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; + + // Break a into a sign and representation of the absolute value + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted + // to (signed) int. To avoid that, explicitly cast to src_rep_t. + if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { + // a is a normal number. + // Extend to the destination type by shifting the significand and + // exponent into the proper position and rebiasing the exponent. + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); + absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; + } + + else if (aAbs >= srcInfinity) { + // a is NaN or infinity. + // Conjure the result by beginning with infinity, then setting the qNaN + // bit (if needed) and right-aligning the rest of the trailing NaN + // payload field. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); + absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); + } + + else if (aAbs) { + // a is denormal. + // renormalize the significand and clear the leading bit, then insert + // the correct adjusted exponent in the destination type. + const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); + absResult ^= dstMinNormal; + const int resultExponent = dstExpBias - srcExpBias - scale + 1; + absResult |= (dst_rep_t)resultExponent << dstSigBits; + } + + else { + // a is zero. + absResult = 0; + } + + // Apply the signbit to (dst_t)abs(a). + const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); + return dstFromRep(result); } Index: compiler-rt/trunk/lib/builtins/fp_fixint_impl.inc =================================================================== --- compiler-rt/trunk/lib/builtins/fp_fixint_impl.inc +++ compiler-rt/trunk/lib/builtins/fp_fixint_impl.inc @@ -14,27 +14,27 @@ #include "fp_lib.h" static __inline fixint_t __fixint(fp_t a) { - const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); - const fixint_t fixint_min = -fixint_max - 1; - // Break a into sign, exponent, significand - const rep_t aRep = toRep(a); - const rep_t aAbs = aRep & absMask; - const fixint_t sign = aRep & signBit ? -1 : 1; - const int exponent = (aAbs >> significandBits) - exponentBias; - const rep_t significand = (aAbs & significandMask) | implicitBit; + const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); + const fixint_t fixint_min = -fixint_max - 1; + // Break a into sign, exponent, significand + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const fixint_t sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; - // If exponent is negative, the result is zero. - if (exponent < 0) - return 0; + // If exponent is negative, the result is zero. + if (exponent < 0) + return 0; - // If the value is too large for the integer type, saturate. - if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT) - return sign == 1 ? fixint_max : fixint_min; + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT) + return sign == 1 ? fixint_max : fixint_min; - // If 0 <= exponent < significandBits, right shift to get the result. - // Otherwise, shift left. - if (exponent < significandBits) - return sign * (significand >> (significandBits - exponent)); - else - return sign * ((fixint_t)significand << (exponent - significandBits)); + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return sign * (significand >> (significandBits - exponent)); + else + return sign * ((fixint_t)significand << (exponent - significandBits)); } Index: compiler-rt/trunk/lib/builtins/fp_fixuint_impl.inc =================================================================== --- compiler-rt/trunk/lib/builtins/fp_fixuint_impl.inc +++ compiler-rt/trunk/lib/builtins/fp_fixuint_impl.inc @@ -14,25 +14,25 @@ #include "fp_lib.h" static __inline fixuint_t __fixuint(fp_t a) { - // Break a into sign, exponent, significand - const rep_t aRep = toRep(a); - const rep_t aAbs = aRep & absMask; - const int sign = aRep & signBit ? -1 : 1; - const int exponent = (aAbs >> significandBits) - exponentBias; - const rep_t significand = (aAbs & significandMask) | implicitBit; + // Break a into sign, exponent, significand + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const int sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; - // If either the value or the exponent is negative, the result is zero. - if (sign == -1 || exponent < 0) - return 0; + // If either the value or the exponent is negative, the result is zero. + if (sign == -1 || exponent < 0) + return 0; - // If the value is too large for the integer type, saturate. - if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT) - return ~(fixuint_t)0; + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT) + return ~(fixuint_t)0; - // If 0 <= exponent < significandBits, right shift to get the result. - // Otherwise, shift left. - if (exponent < significandBits) - return significand >> (significandBits - exponent); - else - return (fixuint_t)significand << (exponent - significandBits); + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return significand >> (significandBits - exponent); + else + return (fixuint_t)significand << (exponent - significandBits); } Index: compiler-rt/trunk/lib/builtins/fp_lib.h =================================================================== --- compiler-rt/trunk/lib/builtins/fp_lib.h +++ compiler-rt/trunk/lib/builtins/fp_lib.h @@ -20,22 +20,22 @@ #ifndef FP_LIB_HEADER #define FP_LIB_HEADER -#include -#include -#include #include "int_lib.h" #include "int_math.h" +#include +#include +#include // x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in // 32-bit mode. #if defined(__FreeBSD__) && defined(__i386__) -# include -# if __FreeBSD_version < 903000 // v9.3 -# define uint64_t unsigned long long -# define int64_t long long -# undef UINT64_C -# define UINT64_C(c) (c ## ULL) -# endif +#include +#if __FreeBSD_version < 903000 // v9.3 +#define uint64_t unsigned long long +#define int64_t long long +#undef UINT64_C +#define UINT64_C(c) (c##ULL) +#endif #endif #if defined SINGLE_PRECISION @@ -46,15 +46,13 @@ #define REP_C UINT32_C #define significandBits 23 -static __inline int rep_clz(rep_t a) { - return __builtin_clz(a); -} +static __inline int rep_clz(rep_t a) { return __builtin_clz(a); } // 32x32 --> 64 bit multiply static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { - const uint64_t product = (uint64_t)a*b; - *hi = product >> 32; - *lo = product; + const uint64_t product = (uint64_t)a * b; + *hi = product >> 32; + *lo = product; } COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); @@ -68,12 +66,12 @@ static __inline int rep_clz(rep_t a) { #if defined __LP64__ - return __builtin_clzl(a); + return __builtin_clzl(a); #else - if (a & REP_C(0xffffffff00000000)) - return __builtin_clz(a >> 32); - else - return 32 + __builtin_clz(a & REP_C(0xffffffff)); + if (a & REP_C(0xffffffff00000000)) + return __builtin_clz(a >> 32); + else + return 32 + __builtin_clz(a & REP_C(0xffffffff)); #endif } @@ -84,17 +82,17 @@ // many 64-bit platforms have this operation, but they tend to have hardware // floating-point, so we don't bother with a special case for them here. static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { - // Each of the component 32x32 -> 64 products - const uint64_t plolo = loWord(a) * loWord(b); - const uint64_t plohi = loWord(a) * hiWord(b); - const uint64_t philo = hiWord(a) * loWord(b); - const uint64_t phihi = hiWord(a) * hiWord(b); - // Sum terms that contribute to lo in a way that allows us to get the carry - const uint64_t r0 = loWord(plolo); - const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); - *lo = r0 + (r1 << 32); - // Sum terms contributing to hi with the carry from lo - *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; + // Each of the component 32x32 -> 64 products + const uint64_t plolo = loWord(a) * loWord(b); + const uint64_t plohi = loWord(a) * hiWord(b); + const uint64_t philo = hiWord(a) * loWord(b); + const uint64_t phihi = hiWord(a) * hiWord(b); + // Sum terms that contribute to lo in a way that allows us to get the carry + const uint64_t r0 = loWord(plolo); + const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); + *lo = r0 + (r1 << 32); + // Sum terms contributing to hi with the carry from lo + *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; } #undef loWord #undef hiWord @@ -113,32 +111,34 @@ #define significandBits 112 static __inline int rep_clz(rep_t a) { - const union - { - __uint128_t ll; + const union { + __uint128_t ll; #if _YUGA_BIG_ENDIAN - struct { uint64_t high, low; } s; + struct { + uint64_t high, low; + } s; #else - struct { uint64_t low, high; } s; + struct { + uint64_t low, high; + } s; #endif - } uu = { .ll = a }; + } uu = {.ll = a}; - uint64_t word; - uint64_t add; + uint64_t word; + uint64_t add; - if (uu.s.high){ - word = uu.s.high; - add = 0; - } - else{ - word = uu.s.low; - add = 64; - } - return __builtin_clzll(word) + add; + if (uu.s.high) { + word = uu.s.high; + add = 0; + } else { + word = uu.s.low; + add = 64; + } + return __builtin_clzll(word) + add; } -#define Word_LoMask UINT64_C(0x00000000ffffffff) -#define Word_HiMask UINT64_C(0xffffffff00000000) +#define Word_LoMask UINT64_C(0x00000000ffffffff) +#define Word_HiMask UINT64_C(0xffffffff00000000) #define Word_FullMask UINT64_C(0xffffffffffffffff) #define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask) #define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask) @@ -150,55 +150,41 @@ // floating-point, so we don't bother with a special case for them here. static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { - const uint64_t product11 = Word_1(a) * Word_1(b); - const uint64_t product12 = Word_1(a) * Word_2(b); - const uint64_t product13 = Word_1(a) * Word_3(b); - const uint64_t product14 = Word_1(a) * Word_4(b); - const uint64_t product21 = Word_2(a) * Word_1(b); - const uint64_t product22 = Word_2(a) * Word_2(b); - const uint64_t product23 = Word_2(a) * Word_3(b); - const uint64_t product24 = Word_2(a) * Word_4(b); - const uint64_t product31 = Word_3(a) * Word_1(b); - const uint64_t product32 = Word_3(a) * Word_2(b); - const uint64_t product33 = Word_3(a) * Word_3(b); - const uint64_t product34 = Word_3(a) * Word_4(b); - const uint64_t product41 = Word_4(a) * Word_1(b); - const uint64_t product42 = Word_4(a) * Word_2(b); - const uint64_t product43 = Word_4(a) * Word_3(b); - const uint64_t product44 = Word_4(a) * Word_4(b); - - const __uint128_t sum0 = (__uint128_t)product44; - const __uint128_t sum1 = (__uint128_t)product34 + - (__uint128_t)product43; - const __uint128_t sum2 = (__uint128_t)product24 + - (__uint128_t)product33 + - (__uint128_t)product42; - const __uint128_t sum3 = (__uint128_t)product14 + - (__uint128_t)product23 + - (__uint128_t)product32 + - (__uint128_t)product41; - const __uint128_t sum4 = (__uint128_t)product13 + - (__uint128_t)product22 + - (__uint128_t)product31; - const __uint128_t sum5 = (__uint128_t)product12 + - (__uint128_t)product21; - const __uint128_t sum6 = (__uint128_t)product11; - - const __uint128_t r0 = (sum0 & Word_FullMask) + - ((sum1 & Word_LoMask) << 32); - const __uint128_t r1 = (sum0 >> 64) + - ((sum1 >> 32) & Word_FullMask) + - (sum2 & Word_FullMask) + - ((sum3 << 32) & Word_HiMask); - - *lo = r0 + (r1 << 64); - *hi = (r1 >> 64) + - (sum1 >> 96) + - (sum2 >> 64) + - (sum3 >> 32) + - sum4 + - (sum5 << 32) + - (sum6 << 64); + const uint64_t product11 = Word_1(a) * Word_1(b); + const uint64_t product12 = Word_1(a) * Word_2(b); + const uint64_t product13 = Word_1(a) * Word_3(b); + const uint64_t product14 = Word_1(a) * Word_4(b); + const uint64_t product21 = Word_2(a) * Word_1(b); + const uint64_t product22 = Word_2(a) * Word_2(b); + const uint64_t product23 = Word_2(a) * Word_3(b); + const uint64_t product24 = Word_2(a) * Word_4(b); + const uint64_t product31 = Word_3(a) * Word_1(b); + const uint64_t product32 = Word_3(a) * Word_2(b); + const uint64_t product33 = Word_3(a) * Word_3(b); + const uint64_t product34 = Word_3(a) * Word_4(b); + const uint64_t product41 = Word_4(a) * Word_1(b); + const uint64_t product42 = Word_4(a) * Word_2(b); + const uint64_t product43 = Word_4(a) * Word_3(b); + const uint64_t product44 = Word_4(a) * Word_4(b); + + const __uint128_t sum0 = (__uint128_t)product44; + const __uint128_t sum1 = (__uint128_t)product34 + (__uint128_t)product43; + const __uint128_t sum2 = + (__uint128_t)product24 + (__uint128_t)product33 + (__uint128_t)product42; + const __uint128_t sum3 = (__uint128_t)product14 + (__uint128_t)product23 + + (__uint128_t)product32 + (__uint128_t)product41; + const __uint128_t sum4 = + (__uint128_t)product13 + (__uint128_t)product22 + (__uint128_t)product31; + const __uint128_t sum5 = (__uint128_t)product12 + (__uint128_t)product21; + const __uint128_t sum6 = (__uint128_t)product11; + + const __uint128_t r0 = (sum0 & Word_FullMask) + ((sum1 & Word_LoMask) << 32); + const __uint128_t r1 = (sum0 >> 64) + ((sum1 >> 32) & Word_FullMask) + + (sum2 & Word_FullMask) + ((sum3 << 32) & Word_HiMask); + + *lo = r0 + (r1 << 64); + *hi = (r1 >> 64) + (sum1 >> 96) + (sum2 >> 64) + (sum3 >> 32) + sum4 + + (sum5 << 32) + (sum6 << 64); } #undef Word_1 #undef Word_2 @@ -212,58 +198,65 @@ #error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. #endif -#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || defined(CRT_LDBL_128BIT) -#define typeWidth (sizeof(rep_t)*CHAR_BIT) -#define exponentBits (typeWidth - significandBits - 1) -#define maxExponent ((1 << exponentBits) - 1) -#define exponentBias (maxExponent >> 1) +#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \ + defined(CRT_LDBL_128BIT) +#define typeWidth (sizeof(rep_t) * CHAR_BIT) +#define exponentBits (typeWidth - significandBits - 1) +#define maxExponent ((1 << exponentBits) - 1) +#define exponentBias (maxExponent >> 1) -#define implicitBit (REP_C(1) << significandBits) +#define implicitBit (REP_C(1) << significandBits) #define significandMask (implicitBit - 1U) -#define signBit (REP_C(1) << (significandBits + exponentBits)) -#define absMask (signBit - 1U) -#define exponentMask (absMask ^ significandMask) -#define oneRep ((rep_t)exponentBias << significandBits) -#define infRep exponentMask -#define quietBit (implicitBit >> 1) -#define qnanRep (exponentMask | quietBit) +#define signBit (REP_C(1) << (significandBits + exponentBits)) +#define absMask (signBit - 1U) +#define exponentMask (absMask ^ significandMask) +#define oneRep ((rep_t)exponentBias << significandBits) +#define infRep exponentMask +#define quietBit (implicitBit >> 1) +#define qnanRep (exponentMask | quietBit) static __inline rep_t toRep(fp_t x) { - const union { fp_t f; rep_t i; } rep = {.f = x}; - return rep.i; + const union { + fp_t f; + rep_t i; + } rep = {.f = x}; + return rep.i; } static __inline fp_t fromRep(rep_t x) { - const union { fp_t f; rep_t i; } rep = {.i = x}; - return rep.f; + const union { + fp_t f; + rep_t i; + } rep = {.i = x}; + return rep.f; } static __inline int normalize(rep_t *significand) { - const int shift = rep_clz(*significand) - rep_clz(implicitBit); - *significand <<= shift; - return 1 - shift; + const int shift = rep_clz(*significand) - rep_clz(implicitBit); + *significand <<= shift; + return 1 - shift; } static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { - *hi = *hi << count | *lo >> (typeWidth - count); - *lo = *lo << count; + *hi = *hi << count | *lo >> (typeWidth - count); + *lo = *lo << count; } -static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int count) { - if (count < typeWidth) { - const bool sticky = *lo << (typeWidth - count); - *lo = *hi << (typeWidth - count) | *lo >> count | sticky; - *hi = *hi >> count; - } - else if (count < 2*typeWidth) { - const bool sticky = *hi << (2*typeWidth - count) | *lo; - *lo = *hi >> (count - typeWidth) | sticky; - *hi = 0; - } else { - const bool sticky = *hi | *lo; - *lo = sticky; - *hi = 0; - } +static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, + unsigned int count) { + if (count < typeWidth) { + const bool sticky = *lo << (typeWidth - count); + *lo = *hi << (typeWidth - count) | *lo >> count | sticky; + *hi = *hi >> count; + } else if (count < 2 * typeWidth) { + const bool sticky = *hi << (2 * typeWidth - count) | *lo; + *lo = *hi >> (count - typeWidth) | sticky; + *hi = 0; + } else { + const bool sticky = *hi | *lo; + *lo = sticky; + *hi = 0; + } } // Implements logb methods (logb, logbf, logbl) for IEEE-754. This avoids @@ -279,9 +272,9 @@ // 2) 0.0 returns -inf if (exp == maxExponent) { if (((rep & signBit) == 0) || (x != x)) { - return x; // NaN or +inf: return x + return x; // NaN or +inf: return x } else { - return -x; // -inf: return -x + return -x; // -inf: return -x } } else if (x == 0.0) { // 0.0: return -inf @@ -290,13 +283,13 @@ if (exp != 0) { // Normal number - return exp - exponentBias; // Unbias exponent + return exp - exponentBias; // Unbias exponent } else { // Subnormal number; normalize and repeat rep &= absMask; const int shift = 1 - normalize(&rep); exp = (rep & exponentMask) >> significandBits; - return exp - exponentBias - shift; // Unbias exponent + return exp - exponentBias - shift; // Unbias exponent } } #endif @@ -310,17 +303,17 @@ return __compiler_rt_logbX(x); } #elif defined(QUAD_PRECISION) - #if defined(CRT_LDBL_128BIT) +#if defined(CRT_LDBL_128BIT) static __inline fp_t __compiler_rt_logbl(fp_t x) { return __compiler_rt_logbX(x); } - #else +#else // The generic implementation only works for ieee754 floating point. For other // floating point types, continue to rely on the libm implementation for now. static __inline long double __compiler_rt_logbl(long double x) { return crt_logbl(x); } - #endif +#endif #endif #endif // FP_LIB_HEADER Index: compiler-rt/trunk/lib/builtins/fp_mul_impl.inc =================================================================== --- compiler-rt/trunk/lib/builtins/fp_mul_impl.inc +++ compiler-rt/trunk/lib/builtins/fp_mul_impl.inc @@ -14,102 +14,118 @@ #include "fp_lib.h" static __inline fp_t __mulXf3__(fp_t a, fp_t b) { - const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; - const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; - const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; - - rep_t aSignificand = toRep(a) & significandMask; - rep_t bSignificand = toRep(b) & significandMask; - int scale = 0; - - // Detect if a or b is zero, denormal, infinity, or NaN. - if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - - const rep_t aAbs = toRep(a) & absMask; - const rep_t bAbs = toRep(b) & absMask; - - // NaN * anything = qNaN - if (aAbs > infRep) return fromRep(toRep(a) | quietBit); - // anything * NaN = qNaN - if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - - if (aAbs == infRep) { - // infinity * non-zero = +/- infinity - if (bAbs) return fromRep(aAbs | productSign); - // infinity * zero = NaN - else return fromRep(qnanRep); - } - - if (bAbs == infRep) { - //? non-zero * infinity = +/- infinity - if (aAbs) return fromRep(bAbs | productSign); - // zero * infinity = NaN - else return fromRep(qnanRep); - } - - // zero * anything = +/- zero - if (!aAbs) return fromRep(productSign); - // anything * zero = +/- zero - if (!bAbs) return fromRep(productSign); - - // one or both of a or b is denormal, the other (if applicable) is a - // normal number. Renormalize one or both of a and b, and set scale to - // include the necessary exponent adjustment. - if (aAbs < implicitBit) scale += normalize(&aSignificand); - if (bAbs < implicitBit) scale += normalize(&bSignificand); + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN * anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything * NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity * non-zero = +/- infinity + if (bAbs) + return fromRep(aAbs | productSign); + // infinity * zero = NaN + else + return fromRep(qnanRep); } - // Or in the implicit significand bit. (If we fell through from the - // denormal path it was already set by normalize( ), but setting it twice - // won't hurt anything.) - aSignificand |= implicitBit; - bSignificand |= implicitBit; - - // Get the significand of a*b. Before multiplying the significands, shift - // one of them left to left-align it in the field. Thus, the product will - // have (exponentBits + 2) integral digits, all but two of which must be - // zero. Normalizing this result is just a conditional left-shift by one - // and bumping the exponent accordingly. - rep_t productHi, productLo; - wideMultiply(aSignificand, bSignificand << exponentBits, - &productHi, &productLo); - - int productExponent = aExponent + bExponent - exponentBias + scale; - - // Normalize the significand, adjust exponent if needed. - if (productHi & implicitBit) productExponent++; - else wideLeftShift(&productHi, &productLo, 1); - - // If we have overflowed the type, return +/- infinity. - if (productExponent >= maxExponent) return fromRep(infRep | productSign); - - if (productExponent <= 0) { - // Result is denormal before rounding - // - // If the result is so small that it just underflows to zero, return - // a zero of the appropriate sign. Mathematically there is no need to - // handle this case separately, but we make it a special case to - // simplify the shift logic. - const unsigned int shift = REP_C(1) - (unsigned int)productExponent; - if (shift >= typeWidth) return fromRep(productSign); - - // Otherwise, shift the significand of the result so that the round - // bit is the high bit of productLo. - wideRightShiftWithSticky(&productHi, &productLo, shift); + if (bAbs == infRep) { + //? non-zero * infinity = +/- infinity + if (aAbs) + return fromRep(bAbs | productSign); + // zero * infinity = NaN + else + return fromRep(qnanRep); } - else { - // Result is normal before rounding; insert the exponent. - productHi &= significandMask; - productHi |= (rep_t)productExponent << significandBits; - } - - // Insert the sign of the result: - productHi |= productSign; - // Final rounding. The final result may overflow to infinity, or underflow - // to zero, but those are the correct results in those cases. We use the - // default IEEE-754 round-to-nearest, ties-to-even rounding mode. - if (productLo > signBit) productHi++; - if (productLo == signBit) productHi += productHi & 1; - return fromRep(productHi); + // zero * anything = +/- zero + if (!aAbs) + return fromRep(productSign); + // anything * zero = +/- zero + if (!bAbs) + return fromRep(productSign); + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale += normalize(&bSignificand); + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + // Get the significand of a*b. Before multiplying the significands, shift + // one of them left to left-align it in the field. Thus, the product will + // have (exponentBits + 2) integral digits, all but two of which must be + // zero. Normalizing this result is just a conditional left-shift by one + // and bumping the exponent accordingly. + rep_t productHi, productLo; + wideMultiply(aSignificand, bSignificand << exponentBits, &productHi, + &productLo); + + int productExponent = aExponent + bExponent - exponentBias + scale; + + // Normalize the significand, adjust exponent if needed. + if (productHi & implicitBit) + productExponent++; + else + wideLeftShift(&productHi, &productLo, 1); + + // If we have overflowed the type, return +/- infinity. + if (productExponent >= maxExponent) + return fromRep(infRep | productSign); + + if (productExponent <= 0) { + // Result is denormal before rounding + // + // If the result is so small that it just underflows to zero, return + // a zero of the appropriate sign. Mathematically there is no need to + // handle this case separately, but we make it a special case to + // simplify the shift logic. + const unsigned int shift = REP_C(1) - (unsigned int)productExponent; + if (shift >= typeWidth) + return fromRep(productSign); + + // Otherwise, shift the significand of the result so that the round + // bit is the high bit of productLo. + wideRightShiftWithSticky(&productHi, &productLo, shift); + } else { + // Result is normal before rounding; insert the exponent. + productHi &= significandMask; + productHi |= (rep_t)productExponent << significandBits; + } + + // Insert the sign of the result: + productHi |= productSign; + + // Final rounding. The final result may overflow to infinity, or underflow + // to zero, but those are the correct results in those cases. We use the + // default IEEE-754 round-to-nearest, ties-to-even rounding mode. + if (productLo > signBit) + productHi++; + if (productLo == signBit) + productHi += productHi & 1; + return fromRep(productHi); } Index: compiler-rt/trunk/lib/builtins/fp_trunc.h =================================================================== --- compiler-rt/trunk/lib/builtins/fp_trunc.h +++ compiler-rt/trunk/lib/builtins/fp_trunc.h @@ -35,7 +35,7 @@ #else #error Source should be double precision or quad precision! -#endif //end source precision +#endif // end source precision #if defined DST_DOUBLE typedef double dst_t; @@ -57,19 +57,25 @@ #else #error Destination should be single precision or double precision! -#endif //end destination precision +#endif // end destination precision // End of specialization parameters. Two helper routines for conversion to and // from the representation of floating-point data as integer values follow. static __inline src_rep_t srcToRep(src_t x) { - const union { src_t f; src_rep_t i; } rep = {.f = x}; - return rep.i; + const union { + src_t f; + src_rep_t i; + } rep = {.f = x}; + return rep.i; } static __inline dst_t dstFromRep(dst_rep_t x) { - const union { dst_t f; dst_rep_t i; } rep = {.i = x}; - return rep.f; + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = x}; + return rep.f; } #endif // FP_TRUNC_HEADER Index: compiler-rt/trunk/lib/builtins/fp_trunc_impl.inc =================================================================== --- compiler-rt/trunk/lib/builtins/fp_trunc_impl.inc +++ compiler-rt/trunk/lib/builtins/fp_trunc_impl.inc @@ -39,96 +39,94 @@ #include "fp_trunc.h" static __inline dst_t __truncXfYf2__(src_t a) { - // Various constants whose values follow from the type parameters. - // Any reasonable optimizer will fold and propagate all of these. - const int srcBits = sizeof(src_t)*CHAR_BIT; - const int srcExpBits = srcBits - srcSigBits - 1; - const int srcInfExp = (1 << srcExpBits) - 1; - const int srcExpBias = srcInfExp >> 1; - - const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; - const src_rep_t srcSignificandMask = srcMinNormal - 1; - const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; - const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); - const src_rep_t srcAbsMask = srcSignMask - 1; - const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; - const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); - const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); - const src_rep_t srcNaNCode = srcQNaN - 1; - - const int dstBits = sizeof(dst_t)*CHAR_BIT; - const int dstExpBits = dstBits - dstSigBits - 1; - const int dstInfExp = (1 << dstExpBits) - 1; - const int dstExpBias = dstInfExp >> 1; - - const int underflowExponent = srcExpBias + 1 - dstExpBias; - const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; - const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; - const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; - - const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); - const dst_rep_t dstNaNCode = dstQNaN - 1; - - // Break a into a sign and representation of the absolute value - const src_rep_t aRep = srcToRep(a); - const src_rep_t aAbs = aRep & srcAbsMask; - const src_rep_t sign = aRep & srcSignMask; - dst_rep_t absResult; - - if (aAbs - underflow < aAbs - overflow) { - // The exponent of a is within the range of normal numbers in the - // destination format. We can convert by simply right-shifting with - // rounding and adjusting the exponent. - absResult = aAbs >> (srcSigBits - dstSigBits); - absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; - - const src_rep_t roundBits = aAbs & roundMask; - // Round to nearest - if (roundBits > halfway) - absResult++; - // Ties to even - else if (roundBits == halfway) - absResult += absResult & 1; - } - else if (aAbs > srcInfinity) { - // a is NaN. - // Conjure the result by beginning with infinity, setting the qNaN - // bit and inserting the (truncated) trailing NaN field. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - absResult |= dstQNaN; - absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; - } - else if (aAbs >= overflow) { - // a overflows to infinity. - absResult = (dst_rep_t)dstInfExp << dstSigBits; - } - else { - // a underflows on conversion to the destination type or is an exact - // zero. The result may be a denormal or zero. Extract the exponent - // to get the shift amount for the denormalization. - const int aExp = aAbs >> srcSigBits; - const int shift = srcExpBias - dstExpBias - aExp + 1; - - const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; - - // Right shift by the denormalization amount with sticky. - if (shift > srcSigBits) { - absResult = 0; - } else { - const bool sticky = significand << (srcBits - shift); - src_rep_t denormalizedSignificand = significand >> shift | sticky; - absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); - const src_rep_t roundBits = denormalizedSignificand & roundMask; - // Round to nearest - if (roundBits > halfway) - absResult++; - // Ties to even - else if (roundBits == halfway) - absResult += absResult & 1; - } + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(src_t) * CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcSignificandMask = srcMinNormal - 1; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; + const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t) * CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const int underflowExponent = srcExpBias + 1 - dstExpBias; + const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; + const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; + const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; + + const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); + const dst_rep_t dstNaNCode = dstQNaN - 1; + + // Break a into a sign and representation of the absolute value + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + if (aAbs - underflow < aAbs - overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + absResult = aAbs >> (srcSigBits - dstSigBits); + absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; + + const src_rep_t roundBits = aAbs & roundMask; + // Round to nearest + if (roundBits > halfway) + absResult++; + // Ties to even + else if (roundBits == halfway) + absResult += absResult & 1; + } else if (aAbs > srcInfinity) { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= dstQNaN; + absResult |= + ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; + } else if (aAbs >= overflow) { + // a overflows to infinity. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + const int aExp = aAbs >> srcSigBits; + const int shift = srcExpBias - dstExpBias - aExp + 1; + + const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; + + // Right shift by the denormalization amount with sticky. + if (shift > srcSigBits) { + absResult = 0; + } else { + const bool sticky = significand << (srcBits - shift); + src_rep_t denormalizedSignificand = significand >> shift | sticky; + absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); + const src_rep_t roundBits = denormalizedSignificand & roundMask; + // Round to nearest + if (roundBits > halfway) + absResult++; + // Ties to even + else if (roundBits == halfway) + absResult += absResult & 1; } + } - // Apply the signbit to (dst_t)abs(a). - const dst_rep_t result = absResult | sign >> (srcBits - dstBits); - return dstFromRep(result); + // Apply the signbit to (dst_t)abs(a). + const dst_rep_t result = absResult | sign >> (srcBits - dstBits); + return dstFromRep(result); } Index: compiler-rt/trunk/lib/builtins/gcc_personality_v0.c =================================================================== --- compiler-rt/trunk/lib/builtins/gcc_personality_v0.c +++ compiler-rt/trunk/lib/builtins/gcc_personality_v0.c @@ -11,7 +11,8 @@ #include "int_lib.h" #include -#if defined(__arm__) && !defined(__ARM_DWARF_EH__) && !defined(__USING_SJLJ_EXCEPTIONS__) +#if defined(__arm__) && !defined(__ARM_DWARF_EH__) && \ + !defined(__USING_SJLJ_EXCEPTIONS__) /* * When building with older compilers (e.g. clang <3.9), it is possible that we * have a version of unwind.h which does not provide the EHABI declarations @@ -28,117 +29,113 @@ * http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html */ -#define DW_EH_PE_omit 0xff /* no data follows */ - -#define DW_EH_PE_absptr 0x00 -#define DW_EH_PE_uleb128 0x01 -#define DW_EH_PE_udata2 0x02 -#define DW_EH_PE_udata4 0x03 -#define DW_EH_PE_udata8 0x04 -#define DW_EH_PE_sleb128 0x09 -#define DW_EH_PE_sdata2 0x0A -#define DW_EH_PE_sdata4 0x0B -#define DW_EH_PE_sdata8 0x0C - -#define DW_EH_PE_pcrel 0x10 -#define DW_EH_PE_textrel 0x20 -#define DW_EH_PE_datarel 0x30 -#define DW_EH_PE_funcrel 0x40 -#define DW_EH_PE_aligned 0x50 -#define DW_EH_PE_indirect 0x80 /* gcc extension */ - +#define DW_EH_PE_omit 0xff /* no data follows */ +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0A +#define DW_EH_PE_sdata4 0x0B +#define DW_EH_PE_sdata8 0x0C + +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 /* gcc extension */ /* read a uleb128 encoded value and advance pointer */ -static uintptr_t readULEB128(const uint8_t** data) -{ - uintptr_t result = 0; - uintptr_t shift = 0; - unsigned char byte; - const uint8_t* p = *data; - do { - byte = *p++; - result |= (byte & 0x7f) << shift; - shift += 7; - } while (byte & 0x80); - *data = p; - return result; +static uintptr_t readULEB128(const uint8_t **data) { + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t *p = *data; + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + *data = p; + return result; } /* read a pointer encoded value and advance pointer */ -static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) -{ - const uint8_t* p = *data; - uintptr_t result = 0; - - if ( encoding == DW_EH_PE_omit ) - return 0; - - /* first get value */ - switch (encoding & 0x0F) { - case DW_EH_PE_absptr: - result = *((const uintptr_t*)p); - p += sizeof(uintptr_t); - break; - case DW_EH_PE_uleb128: - result = readULEB128(&p); - break; - case DW_EH_PE_udata2: - result = *((const uint16_t*)p); - p += sizeof(uint16_t); - break; - case DW_EH_PE_udata4: - result = *((const uint32_t*)p); - p += sizeof(uint32_t); - break; - case DW_EH_PE_udata8: - result = *((const uint64_t*)p); - p += sizeof(uint64_t); - break; - case DW_EH_PE_sdata2: - result = *((const int16_t*)p); - p += sizeof(int16_t); - break; - case DW_EH_PE_sdata4: - result = *((const int32_t*)p); - p += sizeof(int32_t); - break; - case DW_EH_PE_sdata8: - result = *((const int64_t*)p); - p += sizeof(int64_t); - break; - case DW_EH_PE_sleb128: - default: - /* not supported */ - compilerrt_abort(); - break; - } - - /* then add relative offset */ - switch ( encoding & 0x70 ) { - case DW_EH_PE_absptr: - /* do nothing */ - break; - case DW_EH_PE_pcrel: - result += (uintptr_t)(*data); - break; - case DW_EH_PE_textrel: - case DW_EH_PE_datarel: - case DW_EH_PE_funcrel: - case DW_EH_PE_aligned: - default: - /* not supported */ - compilerrt_abort(); - break; - } +static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) { + const uint8_t *p = *data; + uintptr_t result = 0; + + if (encoding == DW_EH_PE_omit) + return 0; + + /* first get value */ + switch (encoding & 0x0F) { + case DW_EH_PE_absptr: + result = *((const uintptr_t *)p); + p += sizeof(uintptr_t); + break; + case DW_EH_PE_uleb128: + result = readULEB128(&p); + break; + case DW_EH_PE_udata2: + result = *((const uint16_t *)p); + p += sizeof(uint16_t); + break; + case DW_EH_PE_udata4: + result = *((const uint32_t *)p); + p += sizeof(uint32_t); + break; + case DW_EH_PE_udata8: + result = *((const uint64_t *)p); + p += sizeof(uint64_t); + break; + case DW_EH_PE_sdata2: + result = *((const int16_t *)p); + p += sizeof(int16_t); + break; + case DW_EH_PE_sdata4: + result = *((const int32_t *)p); + p += sizeof(int32_t); + break; + case DW_EH_PE_sdata8: + result = *((const int64_t *)p); + p += sizeof(int64_t); + break; + case DW_EH_PE_sleb128: + default: + /* not supported */ + compilerrt_abort(); + break; + } + + /* then add relative offset */ + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + /* do nothing */ + break; + case DW_EH_PE_pcrel: + result += (uintptr_t)(*data); + break; + case DW_EH_PE_textrel: + case DW_EH_PE_datarel: + case DW_EH_PE_funcrel: + case DW_EH_PE_aligned: + default: + /* not supported */ + compilerrt_abort(); + break; + } + + /* then apply indirection */ + if (encoding & DW_EH_PE_indirect) { + result = *((const uintptr_t *)result); + } - /* then apply indirection */ - if (encoding & DW_EH_PE_indirect) { - result = *((const uintptr_t*)result); - } - - *data = p; - return result; + *data = p; + return result; } #if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ @@ -152,14 +149,14 @@ continueUnwind(struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context) { #if USING_ARM_EHABI - /* - * On ARM EHABI the personality routine is responsible for actually - * unwinding a single stack frame before returning (ARM EHABI Sec. 6.1). - */ - if (__gnu_unwind_frame(exceptionObject, context) != _URC_OK) - return _URC_FAILURE; + /* + * On ARM EHABI the personality routine is responsible for actually + * unwinding a single stack frame before returning (ARM EHABI Sec. 6.1). + */ + if (__gnu_unwind_frame(exceptionObject, context) != _URC_OK) + return _URC_FAILURE; #endif - return _URC_CONTINUE_UNWIND; + return _URC_CONTINUE_UNWIND; } /* @@ -173,78 +170,76 @@ #if __USING_SJLJ_EXCEPTIONS__ /* the setjump-longjump based exceptions personality routine has a * different name */ -COMPILER_RT_ABI _Unwind_Reason_Code -__gcc_personality_sj0(int version, _Unwind_Action actions, - uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, - struct _Unwind_Context *context) +COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_sj0( + int version, _Unwind_Action actions, uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context) #elif USING_ARM_EHABI /* The ARM EHABI personality routine has a different signature. */ COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( - _Unwind_State state, struct _Unwind_Exception *exceptionObject, - struct _Unwind_Context *context) + _Unwind_State state, struct _Unwind_Exception *exceptionObject, + struct _Unwind_Context *context) #else -COMPILER_RT_ABI _Unwind_Reason_Code -__gcc_personality_v0(int version, _Unwind_Action actions, - uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, - struct _Unwind_Context *context) +COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( + int version, _Unwind_Action actions, uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context) #endif { - /* Since C does not have catch clauses, there is nothing to do during */ - /* phase 1 (the search phase). */ + /* Since C does not have catch clauses, there is nothing to do during */ + /* phase 1 (the search phase). */ #if USING_ARM_EHABI - /* After resuming from a cleanup we should also continue on to the next - * frame straight away. */ - if ((state & _US_ACTION_MASK) != _US_UNWIND_FRAME_STARTING) + /* After resuming from a cleanup we should also continue on to the next + * frame straight away. */ + if ((state & _US_ACTION_MASK) != _US_UNWIND_FRAME_STARTING) #else - if ( actions & _UA_SEARCH_PHASE ) + if (actions & _UA_SEARCH_PHASE) #endif - return continueUnwind(exceptionObject, context); + return continueUnwind(exceptionObject, context); - /* There is nothing to do if there is no LSDA for this frame. */ - const uint8_t* lsda = (uint8_t*)_Unwind_GetLanguageSpecificData(context); - if ( lsda == (uint8_t*) 0 ) - return continueUnwind(exceptionObject, context); - - uintptr_t pc = (uintptr_t)_Unwind_GetIP(context)-1; - uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context); - uintptr_t pcOffset = pc - funcStart; - - /* Parse LSDA header. */ - uint8_t lpStartEncoding = *lsda++; - if (lpStartEncoding != DW_EH_PE_omit) { - readEncodedPointer(&lsda, lpStartEncoding); - } - uint8_t ttypeEncoding = *lsda++; - if (ttypeEncoding != DW_EH_PE_omit) { - readULEB128(&lsda); - } - /* Walk call-site table looking for range that includes current PC. */ - uint8_t callSiteEncoding = *lsda++; - uint32_t callSiteTableLength = readULEB128(&lsda); - const uint8_t* callSiteTableStart = lsda; - const uint8_t* callSiteTableEnd = callSiteTableStart + callSiteTableLength; - const uint8_t* p=callSiteTableStart; - while (p < callSiteTableEnd) { - uintptr_t start = readEncodedPointer(&p, callSiteEncoding); - uintptr_t length = readEncodedPointer(&p, callSiteEncoding); - uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding); - readULEB128(&p); /* action value not used for C code */ - if ( landingPad == 0 ) - continue; /* no landing pad for this entry */ - if ( (start <= pcOffset) && (pcOffset < (start+length)) ) { - /* Found landing pad for the PC. - * Set Instruction Pointer to so we re-enter function - * at landing pad. The landing pad is created by the compiler - * to take two parameters in registers. - */ - _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), - (uintptr_t)exceptionObject); - _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); - _Unwind_SetIP(context, (funcStart + landingPad)); - return _URC_INSTALL_CONTEXT; - } + /* There is nothing to do if there is no LSDA for this frame. */ + const uint8_t *lsda = (uint8_t *)_Unwind_GetLanguageSpecificData(context); + if (lsda == (uint8_t *)0) + return continueUnwind(exceptionObject, context); + + uintptr_t pc = (uintptr_t)_Unwind_GetIP(context) - 1; + uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context); + uintptr_t pcOffset = pc - funcStart; + + /* Parse LSDA header. */ + uint8_t lpStartEncoding = *lsda++; + if (lpStartEncoding != DW_EH_PE_omit) { + readEncodedPointer(&lsda, lpStartEncoding); + } + uint8_t ttypeEncoding = *lsda++; + if (ttypeEncoding != DW_EH_PE_omit) { + readULEB128(&lsda); + } + /* Walk call-site table looking for range that includes current PC. */ + uint8_t callSiteEncoding = *lsda++; + uint32_t callSiteTableLength = readULEB128(&lsda); + const uint8_t *callSiteTableStart = lsda; + const uint8_t *callSiteTableEnd = callSiteTableStart + callSiteTableLength; + const uint8_t *p = callSiteTableStart; + while (p < callSiteTableEnd) { + uintptr_t start = readEncodedPointer(&p, callSiteEncoding); + uintptr_t length = readEncodedPointer(&p, callSiteEncoding); + uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding); + readULEB128(&p); /* action value not used for C code */ + if (landingPad == 0) + continue; /* no landing pad for this entry */ + if ((start <= pcOffset) && (pcOffset < (start + length))) { + /* Found landing pad for the PC. + * Set Instruction Pointer to so we re-enter function + * at landing pad. The landing pad is created by the compiler + * to take two parameters in registers. + */ + _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), + (uintptr_t)exceptionObject); + _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); + _Unwind_SetIP(context, (funcStart + landingPad)); + return _URC_INSTALL_CONTEXT; } + } - /* No landing pad found, continue unwinding. */ - return continueUnwind(exceptionObject, context); + /* No landing pad found, continue unwinding. */ + return continueUnwind(exceptionObject, context); } Index: compiler-rt/trunk/lib/builtins/int_endianness.h =================================================================== --- compiler-rt/trunk/lib/builtins/int_endianness.h +++ compiler-rt/trunk/lib/builtins/int_endianness.h @@ -15,16 +15,16 @@ #ifndef INT_ENDIANNESS_H #define INT_ENDIANNESS_H -#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ defined(__ORDER_LITTLE_ENDIAN__) /* Clang and GCC provide built-in endianness definitions. */ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 1 #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 0 #endif /* __BYTE_ORDER__ */ #else /* Compilers other than Clang or GCC. */ @@ -34,10 +34,10 @@ #if defined(_BIG_ENDIAN) #define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 1 #elif defined(_LITTLE_ENDIAN) #define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 0 #else /* !_LITTLE_ENDIAN */ #error "unknown endianness" #endif /* !_LITTLE_ENDIAN */ @@ -52,10 +52,10 @@ #if _BYTE_ORDER == _BIG_ENDIAN #define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 1 #elif _BYTE_ORDER == _LITTLE_ENDIAN #define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 0 #endif /* _BYTE_ORDER */ #endif /* *BSD */ @@ -65,10 +65,10 @@ #if _BYTE_ORDER == _BIG_ENDIAN #define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 1 #elif _BYTE_ORDER == _LITTLE_ENDIAN #define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 0 #endif /* _BYTE_ORDER */ #endif /* OpenBSD */ @@ -77,19 +77,19 @@ /* Mac OSX has __BIG_ENDIAN__ or __LITTLE_ENDIAN__ automatically set by the * compiler (at least with GCC) */ -#if defined(__APPLE__) || defined(__ellcc__ ) +#if defined(__APPLE__) || defined(__ellcc__) #ifdef __BIG_ENDIAN__ #if __BIG_ENDIAN__ #define _YUGA_LITTLE_ENDIAN 0 -#define _YUGA_BIG_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 1 #endif #endif /* __BIG_ENDIAN__ */ #ifdef __LITTLE_ENDIAN__ #if __LITTLE_ENDIAN__ #define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 0 #endif #endif /* __LITTLE_ENDIAN__ */ @@ -100,7 +100,7 @@ #if defined(_WIN32) #define _YUGA_LITTLE_ENDIAN 1 -#define _YUGA_BIG_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 0 #endif /* Windows */ Index: compiler-rt/trunk/lib/builtins/int_lib.h =================================================================== --- compiler-rt/trunk/lib/builtins/int_lib.h +++ compiler-rt/trunk/lib/builtins/int_lib.h @@ -20,24 +20,26 @@ /* Assumption: Endianness is little or big (not mixed). */ #if defined(__ELF__) -#define FNALIAS(alias_name, original_name) \ +#define FNALIAS(alias_name, original_name) \ void alias_name() __attribute__((__alias__(#original_name))) #define COMPILER_RT_ALIAS(aliasee) __attribute__((__alias__(#aliasee))) #else -#define FNALIAS(alias, name) _Pragma("GCC error(\"alias unsupported on this file format\")") -#define COMPILER_RT_ALIAS(aliasee) _Pragma("GCC error(\"alias unsupported on this file format\")") +#define FNALIAS(alias, name) \ + _Pragma("GCC error(\"alias unsupported on this file format\")") +#define COMPILER_RT_ALIAS(aliasee) \ + _Pragma("GCC error(\"alias unsupported on this file format\")") #endif /* ABI macro definitions */ #if __ARM_EABI__ -# ifdef COMPILER_RT_ARMHF_TARGET -# define COMPILER_RT_ABI -# else -# define COMPILER_RT_ABI __attribute__((__pcs__("aapcs"))) -# endif +#ifdef COMPILER_RT_ARMHF_TARGET +#define COMPILER_RT_ABI #else -# define COMPILER_RT_ABI +#define COMPILER_RT_ABI __attribute__((__pcs__("aapcs"))) +#endif +#else +#define COMPILER_RT_ABI #endif #define AEABI_RTABI __attribute__((__pcs__("aapcs"))) @@ -59,15 +61,15 @@ * Kernel and boot environment can't use normal headers, * so use the equivalent system headers. */ -# include -# include -# include +#include +#include +#include #else /* Include the standard compiler builtin headers we use functionality from. */ -# include -# include -# include -# include +#include +#include +#include +#include #endif /* Include the commonly used internal type definitions. */ @@ -83,11 +85,11 @@ COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b); COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d); -COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int* rem); -COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int* rem); +COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem); +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem); #ifdef CRT_HAS_128BIT COMPILER_RT_ABI si_int __clzti2(ti_int a); -COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); +COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem); #endif /* Definitions for builtins unavailable on MSVC */ Index: compiler-rt/trunk/lib/builtins/int_math.h =================================================================== --- compiler-rt/trunk/lib/builtins/int_math.h +++ compiler-rt/trunk/lib/builtins/int_math.h @@ -21,7 +21,7 @@ #define INT_MATH_H #ifndef __has_builtin -# define __has_builtin(x) 0 +#define __has_builtin(x) 0 #endif #if defined(_MSC_VER) && !defined(__clang__) @@ -45,15 +45,15 @@ * versions of GCC which didn't have __builtin_isfinite. */ #if __has_builtin(__builtin_isfinite) -# define crt_isfinite(x) __builtin_isfinite((x)) +#define crt_isfinite(x) __builtin_isfinite((x)) #elif defined(__GNUC__) -# define crt_isfinite(x) \ - __extension__(({ \ - __typeof((x)) x_ = (x); \ - !crt_isinf(x_) && !crt_isnan(x_); \ - })) +#define crt_isfinite(x) \ + __extension__(({ \ + __typeof((x)) x_ = (x); \ + !crt_isinf(x_) && !crt_isnan(x_); \ + })) #else -# error "Do not know how to check for infinity" +#error "Do not know how to check for infinity" #endif /* __has_builtin(__builtin_isfinite) */ #define crt_isinf(x) __builtin_isinf((x)) #define crt_isnan(x) __builtin_isnan((x)) Index: compiler-rt/trunk/lib/builtins/int_types.h =================================================================== --- compiler-rt/trunk/lib/builtins/int_types.h +++ compiler-rt/trunk/lib/builtins/int_types.h @@ -23,40 +23,36 @@ #ifdef si_int #undef si_int #endif -typedef int si_int; +typedef int si_int; typedef unsigned su_int; -typedef long long di_int; +typedef long long di_int; typedef unsigned long long du_int; -typedef union -{ - di_int all; - struct - { +typedef union { + di_int all; + struct { #if _YUGA_LITTLE_ENDIAN - su_int low; - si_int high; + su_int low; + si_int high; #else - si_int high; - su_int low; + si_int high; + su_int low; #endif /* _YUGA_LITTLE_ENDIAN */ - }s; + } s; } dwords; -typedef union -{ - du_int all; - struct - { +typedef union { + du_int all; + struct { #if _YUGA_LITTLE_ENDIAN - su_int low; - su_int high; + su_int low; + su_int high; #else - su_int high; - su_int low; + su_int high; + su_int low; #endif /* _YUGA_LITTLE_ENDIAN */ - }s; + } s; } udwords; #if defined(__LP64__) || defined(__wasm__) || defined(__mips64) || \ @@ -73,75 +69,68 @@ #endif #ifdef CRT_HAS_128BIT -typedef int ti_int __attribute__ ((mode (TI))); -typedef unsigned tu_int __attribute__ ((mode (TI))); +typedef int ti_int __attribute__((mode(TI))); +typedef unsigned tu_int __attribute__((mode(TI))); -typedef union -{ - ti_int all; - struct - { +typedef union { + ti_int all; + struct { #if _YUGA_LITTLE_ENDIAN - du_int low; - di_int high; + du_int low; + di_int high; #else - di_int high; - du_int low; + di_int high; + du_int low; #endif /* _YUGA_LITTLE_ENDIAN */ - }s; + } s; } twords; -typedef union -{ - tu_int all; - struct - { +typedef union { + tu_int all; + struct { #if _YUGA_LITTLE_ENDIAN - du_int low; - du_int high; + du_int low; + du_int high; #else - du_int high; - du_int low; + du_int high; + du_int low; #endif /* _YUGA_LITTLE_ENDIAN */ - }s; + } s; } utwords; static __inline ti_int make_ti(di_int h, di_int l) { - twords r; - r.s.high = h; - r.s.low = l; - return r.all; + twords r; + r.s.high = h; + r.s.low = l; + return r.all; } static __inline tu_int make_tu(du_int h, du_int l) { - utwords r; - r.s.high = h; - r.s.low = l; - return r.all; + utwords r; + r.s.high = h; + r.s.low = l; + return r.all; } #endif /* CRT_HAS_128BIT */ -typedef union -{ - su_int u; - float f; +typedef union { + su_int u; + float f; } float_bits; -typedef union -{ - udwords u; - double f; +typedef union { + udwords u; + double f; } double_bits; -typedef struct -{ +typedef struct { #if _YUGA_LITTLE_ENDIAN - udwords low; - udwords high; + udwords low; + udwords high; #else - udwords high; - udwords low; + udwords high; + udwords low; #endif /* _YUGA_LITTLE_ENDIAN */ } uqwords; @@ -150,17 +139,16 @@ * still makes it 80 bits. Clang will match whatever compiler it is trying to * be compatible with. */ -#if ((defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)) || \ +#if ((defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)) || \ defined(__m68k__) || defined(__ia64__) #define HAS_80_BIT_LONG_DOUBLE 1 #else #define HAS_80_BIT_LONG_DOUBLE 0 #endif -typedef union -{ - uqwords u; - long double f; +typedef union { + uqwords u; + long double f; } long_double_bits; #if __STDC_VERSION__ >= 199901L @@ -171,14 +159,19 @@ #define COMPLEX_REAL(x) __real__(x) #define COMPLEX_IMAGINARY(x) __imag__(x) #else -typedef struct { float real, imaginary; } Fcomplex; - -typedef struct { double real, imaginary; } Dcomplex; - -typedef struct { long double real, imaginary; } Lcomplex; +typedef struct { + float real, imaginary; +} Fcomplex; + +typedef struct { + double real, imaginary; +} Dcomplex; + +typedef struct { + long double real, imaginary; +} Lcomplex; #define COMPLEX_REAL(x) (x).real #define COMPLEX_IMAGINARY(x) (x).imaginary #endif #endif /* INT_TYPES_H */ - Index: compiler-rt/trunk/lib/builtins/int_util.c =================================================================== --- compiler-rt/trunk/lib/builtins/int_util.c +++ compiler-rt/trunk/lib/builtins/int_util.c @@ -8,7 +8,6 @@ */ #include "int_lib.h" -#include "int_util.h" /* NOTE: The definitions in this file are declared weak because we clients to be * able to arbitrarily package individual functions into separate .a files. If Index: compiler-rt/trunk/lib/builtins/lshrdi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/lshrdi3.c +++ compiler-rt/trunk/lib/builtins/lshrdi3.c @@ -17,28 +17,26 @@ /* Precondition: 0 <= b < bits_in_dword */ -COMPILER_RT_ABI di_int -__lshrdi3(di_int a, si_int b) -{ - const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); - udwords input; - udwords result; - input.all = a; - if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ - { - result.s.high = 0; - result.s.low = input.s.high >> (b - bits_in_word); - } - else /* 0 <= b < bits_in_word */ - { - if (b == 0) - return a; - result.s.high = input.s.high >> b; - result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); - } - return result.all; +COMPILER_RT_ABI di_int __lshrdi3(di_int a, si_int b) { + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + udwords input; + udwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ + { + result.s.high = 0; + result.s.low = input.s.high >> (b - bits_in_word); + } else /* 0 <= b < bits_in_word */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); + } + return result.all; } #if defined(__ARM_EABI__) -AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) COMPILER_RT_ALIAS(__lshrdi3); +AEABI_RTABI di_int __aeabi_llsr(di_int a, si_int b) + COMPILER_RT_ALIAS(__lshrdi3); #endif Index: compiler-rt/trunk/lib/builtins/lshrti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/lshrti3.c +++ compiler-rt/trunk/lib/builtins/lshrti3.c @@ -19,26 +19,23 @@ /* Precondition: 0 <= b < bits_in_tword */ -COMPILER_RT_ABI ti_int -__lshrti3(ti_int a, si_int b) -{ - const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); - utwords input; - utwords result; - input.all = a; - if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ - { - result.s.high = 0; - result.s.low = input.s.high >> (b - bits_in_dword); - } - else /* 0 <= b < bits_in_dword */ - { - if (b == 0) - return a; - result.s.high = input.s.high >> b; - result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); - } - return result.all; +COMPILER_RT_ABI ti_int __lshrti3(ti_int a, si_int b) { + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + utwords input; + utwords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ + { + result.s.high = 0; + result.s.low = input.s.high >> (b - bits_in_dword); + } else /* 0 <= b < bits_in_dword */ + { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); + } + return result.all; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/moddi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/moddi3.c +++ compiler-rt/trunk/lib/builtins/moddi3.c @@ -15,15 +15,13 @@ /* Returns: a % b */ -COMPILER_RT_ABI di_int -__moddi3(di_int a, di_int b) -{ - const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; - di_int s = b >> bits_in_dword_m1; /* s = b < 0 ? -1 : 0 */ - b = (b ^ s) - s; /* negate if s == -1 */ - s = a >> bits_in_dword_m1; /* s = a < 0 ? -1 : 0 */ - a = (a ^ s) - s; /* negate if s == -1 */ - du_int r; - __udivmoddi4(a, b, &r); - return ((di_int)r ^ s) - s; /* negate if s == -1 */ +COMPILER_RT_ABI di_int __moddi3(di_int a, di_int b) { + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s = b >> bits_in_dword_m1; /* s = b < 0 ? -1 : 0 */ + b = (b ^ s) - s; /* negate if s == -1 */ + s = a >> bits_in_dword_m1; /* s = a < 0 ? -1 : 0 */ + a = (a ^ s) - s; /* negate if s == -1 */ + du_int r; + __udivmoddi4(a, b, &r); + return ((di_int)r ^ s) - s; /* negate if s == -1 */ } Index: compiler-rt/trunk/lib/builtins/modsi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/modsi3.c +++ compiler-rt/trunk/lib/builtins/modsi3.c @@ -15,8 +15,6 @@ /* Returns: a % b */ -COMPILER_RT_ABI si_int -__modsi3(si_int a, si_int b) -{ - return a - __divsi3(a, b) * b; +COMPILER_RT_ABI si_int __modsi3(si_int a, si_int b) { + return a - __divsi3(a, b) * b; } Index: compiler-rt/trunk/lib/builtins/modti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/modti3.c +++ compiler-rt/trunk/lib/builtins/modti3.c @@ -17,17 +17,15 @@ /*Returns: a % b */ -COMPILER_RT_ABI ti_int -__modti3(ti_int a, ti_int b) -{ - const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; - ti_int s = b >> bits_in_tword_m1; /* s = b < 0 ? -1 : 0 */ - b = (b ^ s) - s; /* negate if s == -1 */ - s = a >> bits_in_tword_m1; /* s = a < 0 ? -1 : 0 */ - a = (a ^ s) - s; /* negate if s == -1 */ - tu_int r; - __udivmodti4(a, b, &r); - return ((ti_int)r ^ s) - s; /* negate if s == -1 */ +COMPILER_RT_ABI ti_int __modti3(ti_int a, ti_int b) { + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s = b >> bits_in_tword_m1; /* s = b < 0 ? -1 : 0 */ + b = (b ^ s) - s; /* negate if s == -1 */ + s = a >> bits_in_tword_m1; /* s = a < 0 ? -1 : 0 */ + a = (a ^ s) - s; /* negate if s == -1 */ + tu_int r; + __udivmodti4(a, b, &r); + return ((ti_int)r ^ s) - s; /* negate if s == -1 */ } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/muldc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/muldc3.c +++ compiler-rt/trunk/lib/builtins/muldc3.c @@ -16,57 +16,51 @@ /* Returns: the product of a + ib and c + id */ -COMPILER_RT_ABI Dcomplex -__muldc3(double __a, double __b, double __c, double __d) -{ - double __ac = __a * __c; - double __bd = __b * __d; - double __ad = __a * __d; - double __bc = __b * __c; - Dcomplex z; - COMPLEX_REAL(z) = __ac - __bd; - COMPLEX_IMAGINARY(z) = __ad + __bc; - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - int __recalc = 0; - if (crt_isinf(__a) || crt_isinf(__b)) - { - __a = crt_copysign(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysign(crt_isinf(__b) ? 1 : 0, __b); - if (crt_isnan(__c)) - __c = crt_copysign(0, __c); - if (crt_isnan(__d)) - __d = crt_copysign(0, __d); - __recalc = 1; - } - if (crt_isinf(__c) || crt_isinf(__d)) - { - __c = crt_copysign(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysign(crt_isinf(__d) ? 1 : 0, __d); - if (crt_isnan(__a)) - __a = crt_copysign(0, __a); - if (crt_isnan(__b)) - __b = crt_copysign(0, __b); - __recalc = 1; - } - if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || - crt_isinf(__ad) || crt_isinf(__bc))) - { - if (crt_isnan(__a)) - __a = crt_copysign(0, __a); - if (crt_isnan(__b)) - __b = crt_copysign(0, __b); - if (crt_isnan(__c)) - __c = crt_copysign(0, __c); - if (crt_isnan(__d)) - __d = crt_copysign(0, __d); - __recalc = 1; - } - if (__recalc) - { - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); - } +COMPILER_RT_ABI Dcomplex __muldc3(double __a, double __b, double __c, + double __d) { + double __ac = __a * __c; + double __bd = __b * __d; + double __ad = __a * __d; + double __bc = __b * __c; + Dcomplex z; + COMPLEX_REAL(z) = __ac - __bd; + COMPLEX_IMAGINARY(z) = __ad + __bc; + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + int __recalc = 0; + if (crt_isinf(__a) || crt_isinf(__b)) { + __a = crt_copysign(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysign(crt_isinf(__b) ? 1 : 0, __b); + if (crt_isnan(__c)) + __c = crt_copysign(0, __c); + if (crt_isnan(__d)) + __d = crt_copysign(0, __d); + __recalc = 1; } - return z; + if (crt_isinf(__c) || crt_isinf(__d)) { + __c = crt_copysign(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysign(crt_isinf(__d) ? 1 : 0, __d); + if (crt_isnan(__a)) + __a = crt_copysign(0, __a); + if (crt_isnan(__b)) + __b = crt_copysign(0, __b); + __recalc = 1; + } + if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || crt_isinf(__ad) || + crt_isinf(__bc))) { + if (crt_isnan(__a)) + __a = crt_copysign(0, __a); + if (crt_isnan(__b)) + __b = crt_copysign(0, __b); + if (crt_isnan(__c)) + __c = crt_copysign(0, __c); + if (crt_isnan(__d)) + __d = crt_copysign(0, __d); + __recalc = 1; + } + if (__recalc) { + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); + } + } + return z; } Index: compiler-rt/trunk/lib/builtins/muldf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/muldf3.c +++ compiler-rt/trunk/lib/builtins/muldf3.c @@ -14,15 +14,11 @@ #define DOUBLE_PRECISION #include "fp_mul_impl.inc" -COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { - return __mulXf3__(a, b); -} +COMPILER_RT_ABI fp_t __muldf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) { - return __muldf3(a, b); -} +AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) { return __muldf3(a, b); } #else AEABI_RTABI fp_t __aeabi_dmul(fp_t a, fp_t b) COMPILER_RT_ALIAS(__muldf3); #endif Index: compiler-rt/trunk/lib/builtins/muldi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/muldi3.c +++ compiler-rt/trunk/lib/builtins/muldi3.c @@ -15,41 +15,36 @@ /* Returns: a * b */ -static -di_int -__muldsi3(su_int a, su_int b) -{ - dwords r; - const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2; - const su_int lower_mask = (su_int)~0 >> bits_in_word_2; - r.s.low = (a & lower_mask) * (b & lower_mask); - su_int t = r.s.low >> bits_in_word_2; - r.s.low &= lower_mask; - t += (a >> bits_in_word_2) * (b & lower_mask); - r.s.low += (t & lower_mask) << bits_in_word_2; - r.s.high = t >> bits_in_word_2; - t = r.s.low >> bits_in_word_2; - r.s.low &= lower_mask; - t += (b >> bits_in_word_2) * (a & lower_mask); - r.s.low += (t & lower_mask) << bits_in_word_2; - r.s.high += t >> bits_in_word_2; - r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2); - return r.all; +static di_int __muldsi3(su_int a, su_int b) { + dwords r; + const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2; + const su_int lower_mask = (su_int)~0 >> bits_in_word_2; + r.s.low = (a & lower_mask) * (b & lower_mask); + su_int t = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t += (a >> bits_in_word_2) * (b & lower_mask); + r.s.low += (t & lower_mask) << bits_in_word_2; + r.s.high = t >> bits_in_word_2; + t = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t += (b >> bits_in_word_2) * (a & lower_mask); + r.s.low += (t & lower_mask) << bits_in_word_2; + r.s.high += t >> bits_in_word_2; + r.s.high += (a >> bits_in_word_2) * (b >> bits_in_word_2); + return r.all; } /* Returns: a * b */ -COMPILER_RT_ABI di_int -__muldi3(di_int a, di_int b) -{ - dwords x; - x.all = a; - dwords y; - y.all = b; - dwords r; - r.all = __muldsi3(x.s.low, y.s.low); - r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; - return r.all; +COMPILER_RT_ABI di_int __muldi3(di_int a, di_int b) { + dwords x; + x.all = a; + dwords y; + y.all = b; + dwords r; + r.all = __muldsi3(x.s.low, y.s.low); + r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; + return r.all; } #if defined(__ARM_EABI__) Index: compiler-rt/trunk/lib/builtins/mulodi4.c =================================================================== --- compiler-rt/trunk/lib/builtins/mulodi4.c +++ compiler-rt/trunk/lib/builtins/mulodi4.c @@ -17,41 +17,34 @@ /* Effects: sets *overflow to 1 if a * b overflows */ -COMPILER_RT_ABI di_int -__mulodi4(di_int a, di_int b, int* overflow) -{ - const int N = (int)(sizeof(di_int) * CHAR_BIT); - const di_int MIN = (di_int)1 << (N-1); - const di_int MAX = ~MIN; - *overflow = 0; - di_int result = a * b; - if (a == MIN) - { - if (b != 0 && b != 1) - *overflow = 1; - return result; - } - if (b == MIN) - { - if (a != 0 && a != 1) - *overflow = 1; - return result; - } - di_int sa = a >> (N - 1); - di_int abs_a = (a ^ sa) - sa; - di_int sb = b >> (N - 1); - di_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return result; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - *overflow = 1; - } - else - { - if (abs_a > MIN / -abs_b) - *overflow = 1; - } +COMPILER_RT_ABI di_int __mulodi4(di_int a, di_int b, int *overflow) { + const int N = (int)(sizeof(di_int) * CHAR_BIT); + const di_int MIN = (di_int)1 << (N - 1); + const di_int MAX = ~MIN; + *overflow = 0; + di_int result = a * b; + if (a == MIN) { + if (b != 0 && b != 1) + *overflow = 1; return result; + } + if (b == MIN) { + if (a != 0 && a != 1) + *overflow = 1; + return result; + } + di_int sa = a >> (N - 1); + di_int abs_a = (a ^ sa) - sa; + di_int sb = b >> (N - 1); + di_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return result; + if (sa == sb) { + if (abs_a > MAX / abs_b) + *overflow = 1; + } else { + if (abs_a > MIN / -abs_b) + *overflow = 1; + } + return result; } Index: compiler-rt/trunk/lib/builtins/mulosi4.c =================================================================== --- compiler-rt/trunk/lib/builtins/mulosi4.c +++ compiler-rt/trunk/lib/builtins/mulosi4.c @@ -17,41 +17,34 @@ /* Effects: sets *overflow to 1 if a * b overflows */ -COMPILER_RT_ABI si_int -__mulosi4(si_int a, si_int b, int* overflow) -{ - const int N = (int)(sizeof(si_int) * CHAR_BIT); - const si_int MIN = (si_int)1 << (N-1); - const si_int MAX = ~MIN; - *overflow = 0; - si_int result = a * b; - if (a == MIN) - { - if (b != 0 && b != 1) - *overflow = 1; - return result; - } - if (b == MIN) - { - if (a != 0 && a != 1) - *overflow = 1; - return result; - } - si_int sa = a >> (N - 1); - si_int abs_a = (a ^ sa) - sa; - si_int sb = b >> (N - 1); - si_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return result; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - *overflow = 1; - } - else - { - if (abs_a > MIN / -abs_b) - *overflow = 1; - } +COMPILER_RT_ABI si_int __mulosi4(si_int a, si_int b, int *overflow) { + const int N = (int)(sizeof(si_int) * CHAR_BIT); + const si_int MIN = (si_int)1 << (N - 1); + const si_int MAX = ~MIN; + *overflow = 0; + si_int result = a * b; + if (a == MIN) { + if (b != 0 && b != 1) + *overflow = 1; return result; + } + if (b == MIN) { + if (a != 0 && a != 1) + *overflow = 1; + return result; + } + si_int sa = a >> (N - 1); + si_int abs_a = (a ^ sa) - sa; + si_int sb = b >> (N - 1); + si_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return result; + if (sa == sb) { + if (abs_a > MAX / abs_b) + *overflow = 1; + } else { + if (abs_a > MIN / -abs_b) + *overflow = 1; + } + return result; } Index: compiler-rt/trunk/lib/builtins/muloti4.c =================================================================== --- compiler-rt/trunk/lib/builtins/muloti4.c +++ compiler-rt/trunk/lib/builtins/muloti4.c @@ -19,43 +19,36 @@ /* Effects: sets *overflow to 1 if a * b overflows */ -COMPILER_RT_ABI ti_int -__muloti4(ti_int a, ti_int b, int* overflow) -{ - const int N = (int)(sizeof(ti_int) * CHAR_BIT); - const ti_int MIN = (ti_int)1 << (N-1); - const ti_int MAX = ~MIN; - *overflow = 0; - ti_int result = a * b; - if (a == MIN) - { - if (b != 0 && b != 1) - *overflow = 1; - return result; - } - if (b == MIN) - { - if (a != 0 && a != 1) - *overflow = 1; - return result; - } - ti_int sa = a >> (N - 1); - ti_int abs_a = (a ^ sa) - sa; - ti_int sb = b >> (N - 1); - ti_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return result; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - *overflow = 1; - } - else - { - if (abs_a > MIN / -abs_b) - *overflow = 1; - } +COMPILER_RT_ABI ti_int __muloti4(ti_int a, ti_int b, int *overflow) { + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + const ti_int MIN = (ti_int)1 << (N - 1); + const ti_int MAX = ~MIN; + *overflow = 0; + ti_int result = a * b; + if (a == MIN) { + if (b != 0 && b != 1) + *overflow = 1; return result; + } + if (b == MIN) { + if (a != 0 && a != 1) + *overflow = 1; + return result; + } + ti_int sa = a >> (N - 1); + ti_int abs_a = (a ^ sa) - sa; + ti_int sb = b >> (N - 1); + ti_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return result; + if (sa == sb) { + if (abs_a > MAX / abs_b) + *overflow = 1; + } else { + if (abs_a > MIN / -abs_b) + *overflow = 1; + } + return result; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/mulsc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/mulsc3.c +++ compiler-rt/trunk/lib/builtins/mulsc3.c @@ -16,57 +16,50 @@ /* Returns: the product of a + ib and c + id */ -COMPILER_RT_ABI Fcomplex -__mulsc3(float __a, float __b, float __c, float __d) -{ - float __ac = __a * __c; - float __bd = __b * __d; - float __ad = __a * __d; - float __bc = __b * __c; - Fcomplex z; - COMPLEX_REAL(z) = __ac - __bd; - COMPLEX_IMAGINARY(z) = __ad + __bc; - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - int __recalc = 0; - if (crt_isinf(__a) || crt_isinf(__b)) - { - __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); - if (crt_isnan(__c)) - __c = crt_copysignf(0, __c); - if (crt_isnan(__d)) - __d = crt_copysignf(0, __d); - __recalc = 1; - } - if (crt_isinf(__c) || crt_isinf(__d)) - { - __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); - if (crt_isnan(__a)) - __a = crt_copysignf(0, __a); - if (crt_isnan(__b)) - __b = crt_copysignf(0, __b); - __recalc = 1; - } - if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || - crt_isinf(__ad) || crt_isinf(__bc))) - { - if (crt_isnan(__a)) - __a = crt_copysignf(0, __a); - if (crt_isnan(__b)) - __b = crt_copysignf(0, __b); - if (crt_isnan(__c)) - __c = crt_copysignf(0, __c); - if (crt_isnan(__d)) - __d = crt_copysignf(0, __d); - __recalc = 1; - } - if (__recalc) - { - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); - } +COMPILER_RT_ABI Fcomplex __mulsc3(float __a, float __b, float __c, float __d) { + float __ac = __a * __c; + float __bd = __b * __d; + float __ad = __a * __d; + float __bc = __b * __c; + Fcomplex z; + COMPLEX_REAL(z) = __ac - __bd; + COMPLEX_IMAGINARY(z) = __ad + __bc; + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + int __recalc = 0; + if (crt_isinf(__a) || crt_isinf(__b)) { + __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); + if (crt_isnan(__c)) + __c = crt_copysignf(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignf(0, __d); + __recalc = 1; } - return z; + if (crt_isinf(__c) || crt_isinf(__d)) { + __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); + if (crt_isnan(__a)) + __a = crt_copysignf(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignf(0, __b); + __recalc = 1; + } + if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || crt_isinf(__ad) || + crt_isinf(__bc))) { + if (crt_isnan(__a)) + __a = crt_copysignf(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignf(0, __b); + if (crt_isnan(__c)) + __c = crt_copysignf(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignf(0, __d); + __recalc = 1; + } + if (__recalc) { + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); + } + } + return z; } Index: compiler-rt/trunk/lib/builtins/mulsf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/mulsf3.c +++ compiler-rt/trunk/lib/builtins/mulsf3.c @@ -14,15 +14,11 @@ #define SINGLE_PRECISION #include "fp_mul_impl.inc" -COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { - return __mulXf3__(a, b); -} +COMPILER_RT_ABI fp_t __mulsf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) { - return __mulsf3(a, b); -} +AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) { return __mulsf3(a, b); } #else AEABI_RTABI fp_t __aeabi_fmul(fp_t a, fp_t b) COMPILER_RT_ALIAS(__mulsf3); #endif Index: compiler-rt/trunk/lib/builtins/multc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/multc3.c +++ compiler-rt/trunk/lib/builtins/multc3.c @@ -16,52 +16,51 @@ /* Returns: the product of a + ib and c + id */ -COMPILER_RT_ABI long double _Complex -__multc3(long double a, long double b, long double c, long double d) -{ - long double ac = a * c; - long double bd = b * d; - long double ad = a * d; - long double bc = b * c; - long double _Complex z; - __real__ z = ac - bd; - __imag__ z = ad + bc; - if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) { - int recalc = 0; - if (crt_isinf(a) || crt_isinf(b)) { - a = crt_copysignl(crt_isinf(a) ? 1 : 0, a); - b = crt_copysignl(crt_isinf(b) ? 1 : 0, b); - if (crt_isnan(c)) - c = crt_copysignl(0, c); - if (crt_isnan(d)) - d = crt_copysignl(0, d); - recalc = 1; - } - if (crt_isinf(c) || crt_isinf(d)) { - c = crt_copysignl(crt_isinf(c) ? 1 : 0, c); - d = crt_copysignl(crt_isinf(d) ? 1 : 0, d); - if (crt_isnan(a)) - a = crt_copysignl(0, a); - if (crt_isnan(b)) - b = crt_copysignl(0, b); - recalc = 1; - } - if (!recalc && (crt_isinf(ac) || crt_isinf(bd) || - crt_isinf(ad) || crt_isinf(bc))) { - if (crt_isnan(a)) - a = crt_copysignl(0, a); - if (crt_isnan(b)) - b = crt_copysignl(0, b); - if (crt_isnan(c)) - c = crt_copysignl(0, c); - if (crt_isnan(d)) - d = crt_copysignl(0, d); - recalc = 1; - } - if (recalc) { - __real__ z = CRT_INFINITY * (a * c - b * d); - __imag__ z = CRT_INFINITY * (a * d + b * c); - } +COMPILER_RT_ABI long double _Complex __multc3(long double a, long double b, + long double c, long double d) { + long double ac = a * c; + long double bd = b * d; + long double ad = a * d; + long double bc = b * c; + long double _Complex z; + __real__ z = ac - bd; + __imag__ z = ad + bc; + if (crt_isnan(__real__ z) && crt_isnan(__imag__ z)) { + int recalc = 0; + if (crt_isinf(a) || crt_isinf(b)) { + a = crt_copysignl(crt_isinf(a) ? 1 : 0, a); + b = crt_copysignl(crt_isinf(b) ? 1 : 0, b); + if (crt_isnan(c)) + c = crt_copysignl(0, c); + if (crt_isnan(d)) + d = crt_copysignl(0, d); + recalc = 1; } - return z; + if (crt_isinf(c) || crt_isinf(d)) { + c = crt_copysignl(crt_isinf(c) ? 1 : 0, c); + d = crt_copysignl(crt_isinf(d) ? 1 : 0, d); + if (crt_isnan(a)) + a = crt_copysignl(0, a); + if (crt_isnan(b)) + b = crt_copysignl(0, b); + recalc = 1; + } + if (!recalc && + (crt_isinf(ac) || crt_isinf(bd) || crt_isinf(ad) || crt_isinf(bc))) { + if (crt_isnan(a)) + a = crt_copysignl(0, a); + if (crt_isnan(b)) + b = crt_copysignl(0, b); + if (crt_isnan(c)) + c = crt_copysignl(0, c); + if (crt_isnan(d)) + d = crt_copysignl(0, d); + recalc = 1; + } + if (recalc) { + __real__ z = CRT_INFINITY * (a * c - b * d); + __imag__ z = CRT_INFINITY * (a * d + b * c); + } + } + return z; } Index: compiler-rt/trunk/lib/builtins/multf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/multf3.c +++ compiler-rt/trunk/lib/builtins/multf3.c @@ -17,8 +17,6 @@ #if defined(CRT_HAS_128BIT) && defined(CRT_LDBL_128BIT) #include "fp_mul_impl.inc" -COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { - return __mulXf3__(a, b); -} +COMPILER_RT_ABI fp_t __multf3(fp_t a, fp_t b) { return __mulXf3__(a, b); } #endif Index: compiler-rt/trunk/lib/builtins/multi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/multi3.c +++ compiler-rt/trunk/lib/builtins/multi3.c @@ -17,41 +17,36 @@ /* Returns: a * b */ -static -ti_int -__mulddi3(du_int a, du_int b) -{ - twords r; - const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2; - const du_int lower_mask = (du_int)~0 >> bits_in_dword_2; - r.s.low = (a & lower_mask) * (b & lower_mask); - du_int t = r.s.low >> bits_in_dword_2; - r.s.low &= lower_mask; - t += (a >> bits_in_dword_2) * (b & lower_mask); - r.s.low += (t & lower_mask) << bits_in_dword_2; - r.s.high = t >> bits_in_dword_2; - t = r.s.low >> bits_in_dword_2; - r.s.low &= lower_mask; - t += (b >> bits_in_dword_2) * (a & lower_mask); - r.s.low += (t & lower_mask) << bits_in_dword_2; - r.s.high += t >> bits_in_dword_2; - r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2); - return r.all; +static ti_int __mulddi3(du_int a, du_int b) { + twords r; + const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2; + const du_int lower_mask = (du_int)~0 >> bits_in_dword_2; + r.s.low = (a & lower_mask) * (b & lower_mask); + du_int t = r.s.low >> bits_in_dword_2; + r.s.low &= lower_mask; + t += (a >> bits_in_dword_2) * (b & lower_mask); + r.s.low += (t & lower_mask) << bits_in_dword_2; + r.s.high = t >> bits_in_dword_2; + t = r.s.low >> bits_in_dword_2; + r.s.low &= lower_mask; + t += (b >> bits_in_dword_2) * (a & lower_mask); + r.s.low += (t & lower_mask) << bits_in_dword_2; + r.s.high += t >> bits_in_dword_2; + r.s.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2); + return r.all; } /* Returns: a * b */ -COMPILER_RT_ABI ti_int -__multi3(ti_int a, ti_int b) -{ - twords x; - x.all = a; - twords y; - y.all = b; - twords r; - r.all = __mulddi3(x.s.low, y.s.low); - r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; - return r.all; +COMPILER_RT_ABI ti_int __multi3(ti_int a, ti_int b) { + twords x; + x.all = a; + twords y; + y.all = b; + twords r; + r.all = __mulddi3(x.s.low, y.s.low); + r.s.high += x.s.high * y.s.low + x.s.low * y.s.high; + return r.all; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/mulvdi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/mulvdi3.c +++ compiler-rt/trunk/lib/builtins/mulvdi3.c @@ -17,39 +17,32 @@ /* Effects: aborts if a * b overflows */ -COMPILER_RT_ABI di_int -__mulvdi3(di_int a, di_int b) -{ - const int N = (int)(sizeof(di_int) * CHAR_BIT); - const di_int MIN = (di_int)1 << (N-1); - const di_int MAX = ~MIN; - if (a == MIN) - { - if (b == 0 || b == 1) - return a * b; - compilerrt_abort(); - } - if (b == MIN) - { - if (a == 0 || a == 1) - return a * b; - compilerrt_abort(); - } - di_int sa = a >> (N - 1); - di_int abs_a = (a ^ sa) - sa; - di_int sb = b >> (N - 1); - di_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return a * b; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - compilerrt_abort(); - } - else - { - if (abs_a > MIN / -abs_b) - compilerrt_abort(); - } +COMPILER_RT_ABI di_int __mulvdi3(di_int a, di_int b) { + const int N = (int)(sizeof(di_int) * CHAR_BIT); + const di_int MIN = (di_int)1 << (N - 1); + const di_int MAX = ~MIN; + if (a == MIN) { + if (b == 0 || b == 1) + return a * b; + compilerrt_abort(); + } + if (b == MIN) { + if (a == 0 || a == 1) + return a * b; + compilerrt_abort(); + } + di_int sa = a >> (N - 1); + di_int abs_a = (a ^ sa) - sa; + di_int sb = b >> (N - 1); + di_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) return a * b; + if (sa == sb) { + if (abs_a > MAX / abs_b) + compilerrt_abort(); + } else { + if (abs_a > MIN / -abs_b) + compilerrt_abort(); + } + return a * b; } Index: compiler-rt/trunk/lib/builtins/mulvsi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/mulvsi3.c +++ compiler-rt/trunk/lib/builtins/mulvsi3.c @@ -17,39 +17,32 @@ /* Effects: aborts if a * b overflows */ -COMPILER_RT_ABI si_int -__mulvsi3(si_int a, si_int b) -{ - const int N = (int)(sizeof(si_int) * CHAR_BIT); - const si_int MIN = (si_int)1 << (N-1); - const si_int MAX = ~MIN; - if (a == MIN) - { - if (b == 0 || b == 1) - return a * b; - compilerrt_abort(); - } - if (b == MIN) - { - if (a == 0 || a == 1) - return a * b; - compilerrt_abort(); - } - si_int sa = a >> (N - 1); - si_int abs_a = (a ^ sa) - sa; - si_int sb = b >> (N - 1); - si_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return a * b; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - compilerrt_abort(); - } - else - { - if (abs_a > MIN / -abs_b) - compilerrt_abort(); - } +COMPILER_RT_ABI si_int __mulvsi3(si_int a, si_int b) { + const int N = (int)(sizeof(si_int) * CHAR_BIT); + const si_int MIN = (si_int)1 << (N - 1); + const si_int MAX = ~MIN; + if (a == MIN) { + if (b == 0 || b == 1) + return a * b; + compilerrt_abort(); + } + if (b == MIN) { + if (a == 0 || a == 1) + return a * b; + compilerrt_abort(); + } + si_int sa = a >> (N - 1); + si_int abs_a = (a ^ sa) - sa; + si_int sb = b >> (N - 1); + si_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) return a * b; + if (sa == sb) { + if (abs_a > MAX / abs_b) + compilerrt_abort(); + } else { + if (abs_a > MIN / -abs_b) + compilerrt_abort(); + } + return a * b; } Index: compiler-rt/trunk/lib/builtins/mulvti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/mulvti3.c +++ compiler-rt/trunk/lib/builtins/mulvti3.c @@ -19,41 +19,34 @@ /* Effects: aborts if a * b overflows */ -COMPILER_RT_ABI ti_int -__mulvti3(ti_int a, ti_int b) -{ - const int N = (int)(sizeof(ti_int) * CHAR_BIT); - const ti_int MIN = (ti_int)1 << (N-1); - const ti_int MAX = ~MIN; - if (a == MIN) - { - if (b == 0 || b == 1) - return a * b; - compilerrt_abort(); - } - if (b == MIN) - { - if (a == 0 || a == 1) - return a * b; - compilerrt_abort(); - } - ti_int sa = a >> (N - 1); - ti_int abs_a = (a ^ sa) - sa; - ti_int sb = b >> (N - 1); - ti_int abs_b = (b ^ sb) - sb; - if (abs_a < 2 || abs_b < 2) - return a * b; - if (sa == sb) - { - if (abs_a > MAX / abs_b) - compilerrt_abort(); - } - else - { - if (abs_a > MIN / -abs_b) - compilerrt_abort(); - } +COMPILER_RT_ABI ti_int __mulvti3(ti_int a, ti_int b) { + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + const ti_int MIN = (ti_int)1 << (N - 1); + const ti_int MAX = ~MIN; + if (a == MIN) { + if (b == 0 || b == 1) + return a * b; + compilerrt_abort(); + } + if (b == MIN) { + if (a == 0 || a == 1) + return a * b; + compilerrt_abort(); + } + ti_int sa = a >> (N - 1); + ti_int abs_a = (a ^ sa) - sa; + ti_int sb = b >> (N - 1); + ti_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) return a * b; + if (sa == sb) { + if (abs_a > MAX / abs_b) + compilerrt_abort(); + } else { + if (abs_a > MIN / -abs_b) + compilerrt_abort(); + } + return a * b; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/mulxc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/mulxc3.c +++ compiler-rt/trunk/lib/builtins/mulxc3.c @@ -18,59 +18,53 @@ /* Returns: the product of a + ib and c + id */ -COMPILER_RT_ABI Lcomplex -__mulxc3(long double __a, long double __b, long double __c, long double __d) -{ - long double __ac = __a * __c; - long double __bd = __b * __d; - long double __ad = __a * __d; - long double __bc = __b * __c; - Lcomplex z; - COMPLEX_REAL(z) = __ac - __bd; - COMPLEX_IMAGINARY(z) = __ad + __bc; - if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) - { - int __recalc = 0; - if (crt_isinf(__a) || crt_isinf(__b)) - { - __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); - __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); - if (crt_isnan(__c)) - __c = crt_copysignl(0, __c); - if (crt_isnan(__d)) - __d = crt_copysignl(0, __d); - __recalc = 1; - } - if (crt_isinf(__c) || crt_isinf(__d)) - { - __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); - __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); - if (crt_isnan(__a)) - __a = crt_copysignl(0, __a); - if (crt_isnan(__b)) - __b = crt_copysignl(0, __b); - __recalc = 1; - } - if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || - crt_isinf(__ad) || crt_isinf(__bc))) - { - if (crt_isnan(__a)) - __a = crt_copysignl(0, __a); - if (crt_isnan(__b)) - __b = crt_copysignl(0, __b); - if (crt_isnan(__c)) - __c = crt_copysignl(0, __c); - if (crt_isnan(__d)) - __d = crt_copysignl(0, __d); - __recalc = 1; - } - if (__recalc) - { - COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); - COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); - } +COMPILER_RT_ABI Lcomplex __mulxc3(long double __a, long double __b, + long double __c, long double __d) { + long double __ac = __a * __c; + long double __bd = __b * __d; + long double __ad = __a * __d; + long double __bc = __b * __c; + Lcomplex z; + COMPLEX_REAL(z) = __ac - __bd; + COMPLEX_IMAGINARY(z) = __ad + __bc; + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + int __recalc = 0; + if (crt_isinf(__a) || crt_isinf(__b)) { + __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); + if (crt_isnan(__c)) + __c = crt_copysignl(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignl(0, __d); + __recalc = 1; } - return z; + if (crt_isinf(__c) || crt_isinf(__d)) { + __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); + if (crt_isnan(__a)) + __a = crt_copysignl(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignl(0, __b); + __recalc = 1; + } + if (!__recalc && (crt_isinf(__ac) || crt_isinf(__bd) || crt_isinf(__ad) || + crt_isinf(__bc))) { + if (crt_isnan(__a)) + __a = crt_copysignl(0, __a); + if (crt_isnan(__b)) + __b = crt_copysignl(0, __b); + if (crt_isnan(__c)) + __c = crt_copysignl(0, __c); + if (crt_isnan(__d)) + __d = crt_copysignl(0, __d); + __recalc = 1; + } + if (__recalc) { + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c - __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__a * __d + __b * __c); + } + } + return z; } #endif Index: compiler-rt/trunk/lib/builtins/negdf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/negdf2.c +++ compiler-rt/trunk/lib/builtins/negdf2.c @@ -13,16 +13,11 @@ #define DOUBLE_PRECISION #include "fp_lib.h" -COMPILER_RT_ABI fp_t -__negdf2(fp_t a) { - return fromRep(toRep(a) ^ signBit); -} +COMPILER_RT_ABI fp_t __negdf2(fp_t a) { return fromRep(toRep(a) ^ signBit); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_dneg(fp_t a) { - return __negdf2(a); -} +AEABI_RTABI fp_t __aeabi_dneg(fp_t a) { return __negdf2(a); } #else AEABI_RTABI fp_t __aeabi_dneg(fp_t a) COMPILER_RT_ALIAS(__negdf2); #endif Index: compiler-rt/trunk/lib/builtins/negdi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/negdi2.c +++ compiler-rt/trunk/lib/builtins/negdi2.c @@ -15,11 +15,9 @@ /* Returns: -a */ -COMPILER_RT_ABI di_int -__negdi2(di_int a) -{ - /* Note: this routine is here for API compatibility; any sane compiler - * should expand it inline. - */ - return -a; +COMPILER_RT_ABI di_int __negdi2(di_int a) { + /* Note: this routine is here for API compatibility; any sane compiler + * should expand it inline. + */ + return -a; } Index: compiler-rt/trunk/lib/builtins/negsf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/negsf2.c +++ compiler-rt/trunk/lib/builtins/negsf2.c @@ -13,16 +13,11 @@ #define SINGLE_PRECISION #include "fp_lib.h" -COMPILER_RT_ABI fp_t -__negsf2(fp_t a) { - return fromRep(toRep(a) ^ signBit); -} +COMPILER_RT_ABI fp_t __negsf2(fp_t a) { return fromRep(toRep(a) ^ signBit); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_fneg(fp_t a) { - return __negsf2(a); -} +AEABI_RTABI fp_t __aeabi_fneg(fp_t a) { return __negsf2(a); } #else AEABI_RTABI fp_t __aeabi_fneg(fp_t a) COMPILER_RT_ALIAS(__negsf2); #endif Index: compiler-rt/trunk/lib/builtins/negti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/negti2.c +++ compiler-rt/trunk/lib/builtins/negti2.c @@ -17,13 +17,11 @@ /* Returns: -a */ -COMPILER_RT_ABI ti_int -__negti2(ti_int a) -{ - /* Note: this routine is here for API compatibility; any sane compiler - * should expand it inline. - */ - return -a; +COMPILER_RT_ABI ti_int __negti2(ti_int a) { + /* Note: this routine is here for API compatibility; any sane compiler + * should expand it inline. + */ + return -a; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/negvdi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/negvdi2.c +++ compiler-rt/trunk/lib/builtins/negvdi2.c @@ -17,11 +17,9 @@ /* Effects: aborts if -a overflows */ -COMPILER_RT_ABI di_int -__negvdi2(di_int a) -{ - const di_int MIN = (di_int)1 << ((int)(sizeof(di_int) * CHAR_BIT)-1); - if (a == MIN) - compilerrt_abort(); - return -a; +COMPILER_RT_ABI di_int __negvdi2(di_int a) { + const di_int MIN = (di_int)1 << ((int)(sizeof(di_int) * CHAR_BIT) - 1); + if (a == MIN) + compilerrt_abort(); + return -a; } Index: compiler-rt/trunk/lib/builtins/negvsi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/negvsi2.c +++ compiler-rt/trunk/lib/builtins/negvsi2.c @@ -17,11 +17,9 @@ /* Effects: aborts if -a overflows */ -COMPILER_RT_ABI si_int -__negvsi2(si_int a) -{ - const si_int MIN = (si_int)1 << ((int)(sizeof(si_int) * CHAR_BIT)-1); - if (a == MIN) - compilerrt_abort(); - return -a; +COMPILER_RT_ABI si_int __negvsi2(si_int a) { + const si_int MIN = (si_int)1 << ((int)(sizeof(si_int) * CHAR_BIT) - 1); + if (a == MIN) + compilerrt_abort(); + return -a; } Index: compiler-rt/trunk/lib/builtins/negvti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/negvti2.c +++ compiler-rt/trunk/lib/builtins/negvti2.c @@ -19,13 +19,11 @@ /* Effects: aborts if -a overflows */ -COMPILER_RT_ABI ti_int -__negvti2(ti_int a) -{ - const ti_int MIN = (ti_int)1 << ((int)(sizeof(ti_int) * CHAR_BIT)-1); - if (a == MIN) - compilerrt_abort(); - return -a; +COMPILER_RT_ABI ti_int __negvti2(ti_int a) { + const ti_int MIN = (ti_int)1 << ((int)(sizeof(ti_int) * CHAR_BIT) - 1); + if (a == MIN) + compilerrt_abort(); + return -a; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/os_version_check.c =================================================================== --- compiler-rt/trunk/lib/builtins/os_version_check.c +++ compiler-rt/trunk/lib/builtins/os_version_check.c @@ -30,7 +30,7 @@ * just forward declare everything that we need from it. */ typedef const void *CFDataRef, *CFAllocatorRef, *CFPropertyListRef, - *CFStringRef, *CFDictionaryRef, *CFTypeRef, *CFErrorRef; + *CFStringRef, *CFDictionaryRef, *CFTypeRef, *CFErrorRef; #if __LLP64__ typedef unsigned long long CFTypeID; @@ -87,8 +87,8 @@ if (!CFDataCreateWithBytesNoCopyFunc) return; CFPropertyListCreateWithDataFuncTy CFPropertyListCreateWithDataFunc = - (CFPropertyListCreateWithDataFuncTy)dlsym( - RTLD_DEFAULT, "CFPropertyListCreateWithData"); + (CFPropertyListCreateWithDataFuncTy)dlsym(RTLD_DEFAULT, + "CFPropertyListCreateWithData"); /* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it * will be NULL on earlier OS versions. */ #pragma clang diagnostic push Index: compiler-rt/trunk/lib/builtins/paritydi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/paritydi2.c +++ compiler-rt/trunk/lib/builtins/paritydi2.c @@ -15,10 +15,8 @@ /* Returns: 1 if number of bits is odd else returns 0 */ -COMPILER_RT_ABI si_int -__paritydi2(di_int a) -{ - dwords x; - x.all = a; - return __paritysi2(x.s.high ^ x.s.low); +COMPILER_RT_ABI si_int __paritydi2(di_int a) { + dwords x; + x.all = a; + return __paritysi2(x.s.high ^ x.s.low); } Index: compiler-rt/trunk/lib/builtins/paritysi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/paritysi2.c +++ compiler-rt/trunk/lib/builtins/paritysi2.c @@ -15,12 +15,10 @@ /* Returns: 1 if number of bits is odd else returns 0 */ -COMPILER_RT_ABI si_int -__paritysi2(si_int a) -{ - su_int x = (su_int)a; - x ^= x >> 16; - x ^= x >> 8; - x ^= x >> 4; - return (0x6996 >> (x & 0xF)) & 1; +COMPILER_RT_ABI si_int __paritysi2(si_int a) { + su_int x = (su_int)a; + x ^= x >> 16; + x ^= x >> 8; + x ^= x >> 4; + return (0x6996 >> (x & 0xF)) & 1; } Index: compiler-rt/trunk/lib/builtins/parityti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/parityti2.c +++ compiler-rt/trunk/lib/builtins/parityti2.c @@ -9,7 +9,7 @@ * This file implements __parityti2 for the compiler_rt library. * * ===----------------------------------------------------------------------=== - */ + */ #include "int_lib.h" @@ -17,12 +17,10 @@ /* Returns: 1 if number of bits is odd else returns 0 */ -COMPILER_RT_ABI si_int -__parityti2(ti_int a) -{ - twords x; - x.all = a; - return __paritydi2(x.s.high ^ x.s.low); +COMPILER_RT_ABI si_int __parityti2(ti_int a) { + twords x; + x.all = a; + return __paritydi2(x.s.high ^ x.s.low); } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/popcountdi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/popcountdi2.c +++ compiler-rt/trunk/lib/builtins/popcountdi2.c @@ -15,21 +15,21 @@ /* Returns: count of 1 bits */ -COMPILER_RT_ABI si_int -__popcountdi2(di_int a) -{ - du_int x2 = (du_int)a; - x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); - /* Every 2 bits holds the sum of every pair of bits (32) */ - x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); - /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) */ - x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; - /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) */ - su_int x = (su_int)(x2 + (x2 >> 32)); - /* The lower 32 bits hold four 16 bit sums (5 significant bits). */ - /* Upper 32 bits are garbage */ - x = x + (x >> 16); - /* The lower 16 bits hold two 32 bit sums (6 significant bits). */ - /* Upper 16 bits are garbage */ - return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */ +COMPILER_RT_ABI si_int __popcountdi2(di_int a) { + du_int x2 = (du_int)a; + x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); + /* Every 2 bits holds the sum of every pair of bits (32) */ + x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) + */ + x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) + */ + su_int x = (su_int)(x2 + (x2 >> 32)); + /* The lower 32 bits hold four 16 bit sums (5 significant bits). */ + /* Upper 32 bits are garbage */ + x = x + (x >> 16); + /* The lower 16 bits hold two 32 bit sums (6 significant bits). */ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0x0000007F; /* (7 significant bits) */ } Index: compiler-rt/trunk/lib/builtins/popcountsi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/popcountsi2.c +++ compiler-rt/trunk/lib/builtins/popcountsi2.c @@ -15,18 +15,16 @@ /* Returns: count of 1 bits */ -COMPILER_RT_ABI si_int -__popcountsi2(si_int a) -{ - su_int x = (su_int)a; - x = x - ((x >> 1) & 0x55555555); - /* Every 2 bits holds the sum of every pair of bits */ - x = ((x >> 2) & 0x33333333) + (x & 0x33333333); - /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) */ - x = (x + (x >> 4)) & 0x0F0F0F0F; - /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) */ - x = (x + (x >> 16)); - /* The lower 16 bits hold two 8 bit sums (5 significant bits).*/ - /* Upper 16 bits are garbage */ - return (x + (x >> 8)) & 0x0000003F; /* (6 significant bits) */ +COMPILER_RT_ABI si_int __popcountsi2(si_int a) { + su_int x = (su_int)a; + x = x - ((x >> 1) & 0x55555555); + /* Every 2 bits holds the sum of every pair of bits */ + x = ((x >> 2) & 0x33333333) + (x & 0x33333333); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) */ + x = (x + (x >> 4)) & 0x0F0F0F0F; + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) */ + x = (x + (x >> 16)); + /* The lower 16 bits hold two 8 bit sums (5 significant bits).*/ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0x0000003F; /* (6 significant bits) */ } Index: compiler-rt/trunk/lib/builtins/popcountti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/popcountti2.c +++ compiler-rt/trunk/lib/builtins/popcountti2.c @@ -17,27 +17,31 @@ /* Returns: count of 1 bits */ -COMPILER_RT_ABI si_int -__popcountti2(ti_int a) -{ - tu_int x3 = (tu_int)a; - x3 = x3 - ((x3 >> 1) & (((tu_int)0x5555555555555555uLL << 64) | - 0x5555555555555555uLL)); - /* Every 2 bits holds the sum of every pair of bits (64) */ - x3 = ((x3 >> 2) & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)) - + (x3 & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)); - /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (32) */ - x3 = (x3 + (x3 >> 4)) - & (((tu_int)0x0F0F0F0F0F0F0F0FuLL << 64) | 0x0F0F0F0F0F0F0F0FuLL); - /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (16) */ - du_int x2 = (du_int)(x3 + (x3 >> 64)); - /* Every 8 bits holds the sum of every 8-set of bits (5 significant bits) (8) */ - su_int x = (su_int)(x2 + (x2 >> 32)); - /* Every 8 bits holds the sum of every 8-set of bits (6 significant bits) (4) */ - x = x + (x >> 16); - /* Every 8 bits holds the sum of every 8-set of bits (7 significant bits) (2) */ - /* Upper 16 bits are garbage */ - return (x + (x >> 8)) & 0xFF; /* (8 significant bits) */ +COMPILER_RT_ABI si_int __popcountti2(ti_int a) { + tu_int x3 = (tu_int)a; + x3 = x3 - ((x3 >> 1) & + (((tu_int)0x5555555555555555uLL << 64) | 0x5555555555555555uLL)); + /* Every 2 bits holds the sum of every pair of bits (64) */ + x3 = ((x3 >> 2) & + (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)) + + (x3 & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)); + /* Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (32) + */ + x3 = (x3 + (x3 >> 4)) & + (((tu_int)0x0F0F0F0F0F0F0F0FuLL << 64) | 0x0F0F0F0F0F0F0F0FuLL); + /* Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (16) + */ + du_int x2 = (du_int)(x3 + (x3 >> 64)); + /* Every 8 bits holds the sum of every 8-set of bits (5 significant bits) (8) + */ + su_int x = (su_int)(x2 + (x2 >> 32)); + /* Every 8 bits holds the sum of every 8-set of bits (6 significant bits) (4) + */ + x = x + (x >> 16); + /* Every 8 bits holds the sum of every 8-set of bits (7 significant bits) (2) + */ + /* Upper 16 bits are garbage */ + return (x + (x >> 8)) & 0xFF; /* (8 significant bits) */ } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/powidf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/powidf2.c +++ compiler-rt/trunk/lib/builtins/powidf2.c @@ -15,19 +15,16 @@ /* Returns: a ^ b */ -COMPILER_RT_ABI double -__powidf2(double a, si_int b) -{ - const int recip = b < 0; - double r = 1; - while (1) - { - if (b & 1) - r *= a; - b /= 2; - if (b == 0) - break; - a *= a; - } - return recip ? 1/r : r; +COMPILER_RT_ABI double __powidf2(double a, si_int b) { + const int recip = b < 0; + double r = 1; + while (1) { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1 / r : r; } Index: compiler-rt/trunk/lib/builtins/powisf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/powisf2.c +++ compiler-rt/trunk/lib/builtins/powisf2.c @@ -15,19 +15,16 @@ /* Returns: a ^ b */ -COMPILER_RT_ABI float -__powisf2(float a, si_int b) -{ - const int recip = b < 0; - float r = 1; - while (1) - { - if (b & 1) - r *= a; - b /= 2; - if (b == 0) - break; - a *= a; - } - return recip ? 1/r : r; +COMPILER_RT_ABI float __powisf2(float a, si_int b) { + const int recip = b < 0; + float r = 1; + while (1) { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1 / r : r; } Index: compiler-rt/trunk/lib/builtins/powitf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/powitf2.c +++ compiler-rt/trunk/lib/builtins/powitf2.c @@ -17,21 +17,18 @@ /* Returns: a ^ b */ -COMPILER_RT_ABI long double -__powitf2(long double a, si_int b) -{ - const int recip = b < 0; - long double r = 1; - while (1) - { - if (b & 1) - r *= a; - b /= 2; - if (b == 0) - break; - a *= a; - } - return recip ? 1/r : r; +COMPILER_RT_ABI long double __powitf2(long double a, si_int b) { + const int recip = b < 0; + long double r = 1; + while (1) { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1 / r : r; } #endif Index: compiler-rt/trunk/lib/builtins/powixf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/powixf2.c +++ compiler-rt/trunk/lib/builtins/powixf2.c @@ -17,21 +17,18 @@ /* Returns: a ^ b */ -COMPILER_RT_ABI long double -__powixf2(long double a, si_int b) -{ - const int recip = b < 0; - long double r = 1; - while (1) - { - if (b & 1) - r *= a; - b /= 2; - if (b == 0) - break; - a *= a; - } - return recip ? 1/r : r; +COMPILER_RT_ABI long double __powixf2(long double a, si_int b) { + const int recip = b < 0; + long double r = 1; + while (1) { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1 / r : r; } #endif Index: compiler-rt/trunk/lib/builtins/ppc/DD.h =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/DD.h +++ compiler-rt/trunk/lib/builtins/ppc/DD.h @@ -4,20 +4,20 @@ #include "../int_lib.h" typedef union { - long double ld; - struct { - double hi; - double lo; - }s; + long double ld; + struct { + double hi; + double lo; + } s; } DD; -typedef union { - double d; - uint64_t x; +typedef union { + double d; + uint64_t x; } doublebits; -#define LOWORDER(xy,xHi,xLo,yHi,yLo) \ - (((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo)) +#define LOWORDER(xy, xHi, xLo, yHi, yLo) \ + (((((xHi) * (yHi) - (xy)) + (xHi) * (yLo)) + (xLo) * (yHi)) + (xLo) * (yLo)) static __inline ALWAYS_INLINE double local_fabs(double x) { doublebits result = {.d = x}; Index: compiler-rt/trunk/lib/builtins/ppc/divtc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/divtc3.c +++ compiler-rt/trunk/lib/builtins/ppc/divtc3.c @@ -3,8 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ -#include "DD.h" #include "../int_math.h" +#include "DD.h" // Use DOUBLE_PRECISION because the soft-fp method we use is logb (on the upper // half of the long doubles), even though this file defines complex division for // 128-bit floats. @@ -15,84 +15,83 @@ #define CRT_INFINITY HUGE_VAL #endif /* CRT_INFINITY */ -#define makeFinite(x) { \ - (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ - (x).s.lo = 0.0; \ +#define makeFinite(x) \ + { \ + (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ + (x).s.lo = 0.0; \ + } + +long double _Complex __divtc3(long double a, long double b, long double c, + long double d) { + DD cDD = {.ld = c}; + DD dDD = {.ld = d}; + + int ilogbw = 0; + const double logbw = + __compiler_rt_logb(crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi))); + + if (crt_isfinite(logbw)) { + ilogbw = (int)logbw; + + cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw); + cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw); + dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw); + dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw); + } + + const long double denom = + __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld)); + const long double realNumerator = + __gcc_qadd(__gcc_qmul(a, cDD.ld), __gcc_qmul(b, dDD.ld)); + const long double imagNumerator = + __gcc_qsub(__gcc_qmul(b, cDD.ld), __gcc_qmul(a, dDD.ld)); + + DD real = {.ld = __gcc_qdiv(realNumerator, denom)}; + DD imag = {.ld = __gcc_qdiv(imagNumerator, denom)}; + + real.s.hi = crt_scalbn(real.s.hi, -ilogbw); + real.s.lo = crt_scalbn(real.s.lo, -ilogbw); + imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw); + imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw); + + if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) { + DD aDD = {.ld = a}; + DD bDD = {.ld = b}; + DD rDD = {.ld = denom}; + + if ((rDD.s.hi == 0.0) && (!crt_isnan(aDD.s.hi) || !crt_isnan(bDD.s.hi))) { + real.s.hi = crt_copysign(CRT_INFINITY, cDD.s.hi) * aDD.s.hi; + real.s.lo = 0.0; + imag.s.hi = crt_copysign(CRT_INFINITY, cDD.s.hi) * bDD.s.hi; + imag.s.lo = 0.0; + } + + else if ((crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) && + crt_isfinite(cDD.s.hi) && crt_isfinite(dDD.s.hi)) { + makeFinite(aDD); + makeFinite(bDD); + real.s.hi = CRT_INFINITY * (aDD.s.hi * cDD.s.hi + bDD.s.hi * dDD.s.hi); + real.s.lo = 0.0; + imag.s.hi = CRT_INFINITY * (bDD.s.hi * cDD.s.hi - aDD.s.hi * dDD.s.hi); + imag.s.lo = 0.0; + } + + else if ((crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) && + crt_isfinite(aDD.s.hi) && crt_isfinite(bDD.s.hi)) { + makeFinite(cDD); + makeFinite(dDD); + real.s.hi = + crt_copysign(0.0, (aDD.s.hi * cDD.s.hi + bDD.s.hi * dDD.s.hi)); + real.s.lo = 0.0; + imag.s.hi = + crt_copysign(0.0, (bDD.s.hi * cDD.s.hi - aDD.s.hi * dDD.s.hi)); + imag.s.lo = 0.0; + } } -long double _Complex -__divtc3(long double a, long double b, long double c, long double d) -{ - DD cDD = { .ld = c }; - DD dDD = { .ld = d }; - - int ilogbw = 0; - const double logbw = __compiler_rt_logb( - crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi))); - - if (crt_isfinite(logbw)) - { - ilogbw = (int)logbw; - - cDD.s.hi = crt_scalbn(cDD.s.hi, -ilogbw); - cDD.s.lo = crt_scalbn(cDD.s.lo, -ilogbw); - dDD.s.hi = crt_scalbn(dDD.s.hi, -ilogbw); - dDD.s.lo = crt_scalbn(dDD.s.lo, -ilogbw); - } - - const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld)); - const long double realNumerator = __gcc_qadd(__gcc_qmul(a,cDD.ld), __gcc_qmul(b,dDD.ld)); - const long double imagNumerator = __gcc_qsub(__gcc_qmul(b,cDD.ld), __gcc_qmul(a,dDD.ld)); - - DD real = { .ld = __gcc_qdiv(realNumerator, denom) }; - DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) }; - - real.s.hi = crt_scalbn(real.s.hi, -ilogbw); - real.s.lo = crt_scalbn(real.s.lo, -ilogbw); - imag.s.hi = crt_scalbn(imag.s.hi, -ilogbw); - imag.s.lo = crt_scalbn(imag.s.lo, -ilogbw); - - if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) - { - DD aDD = { .ld = a }; - DD bDD = { .ld = b }; - DD rDD = { .ld = denom }; - - if ((rDD.s.hi == 0.0) && (!crt_isnan(aDD.s.hi) || - !crt_isnan(bDD.s.hi))) - { - real.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * aDD.s.hi; - real.s.lo = 0.0; - imag.s.hi = crt_copysign(CRT_INFINITY,cDD.s.hi) * bDD.s.hi; - imag.s.lo = 0.0; - } - - else if ((crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) && - crt_isfinite(cDD.s.hi) && crt_isfinite(dDD.s.hi)) - { - makeFinite(aDD); - makeFinite(bDD); - real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi); - real.s.lo = 0.0; - imag.s.hi = CRT_INFINITY * (bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi); - imag.s.lo = 0.0; - } - - else if ((crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) && - crt_isfinite(aDD.s.hi) && crt_isfinite(bDD.s.hi)) - { - makeFinite(cDD); - makeFinite(dDD); - real.s.hi = crt_copysign(0.0,(aDD.s.hi*cDD.s.hi + bDD.s.hi*dDD.s.hi)); - real.s.lo = 0.0; - imag.s.hi = crt_copysign(0.0,(bDD.s.hi*cDD.s.hi - aDD.s.hi*dDD.s.hi)); - imag.s.lo = 0.0; - } - } - - long double _Complex z; - __real__ z = real.ld; - __imag__ z = imag.ld; - - return z; + long double _Complex z; + __real__ z = real.ld; + __imag__ z = imag.ld; + + return z; } Index: compiler-rt/trunk/lib/builtins/ppc/fixtfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/fixtfdi.c +++ compiler-rt/trunk/lib/builtins/ppc/fixtfdi.c @@ -7,99 +7,98 @@ * This file implements the PowerPC 128-bit double-double -> int64_t conversion */ -#include "DD.h" #include "../int_math.h" +#include "DD.h" + +uint64_t __fixtfdi(long double input) { + const DD x = {.ld = input}; + const doublebits hibits = {.d = x.s.hi}; + + const uint32_t absHighWord = + (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff); + const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000); + + /* If (1.0 - tiny) <= input < 0x1.0p63: */ + if (UINT32_C(0x03f00000) > absHighWordMinusOne) { + /* Do an unsigned conversion of the absolute value, then restore the sign. + */ + const int unbiasedHeadExponent = absHighWordMinusOne >> 20; + + int64_t result = hibits.x & INT64_C(0x000fffffffffffff); /* mantissa(hi) */ + result |= INT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ + result <<= 10; /* mantissa(hi) with one zero preceding bit. */ + + const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63; + + /* If the tail is non-zero, we need to patch in the tail bits. */ + if (0.0 != x.s.lo) { + const doublebits lobits = {.d = x.s.lo}; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + /* At this point we have the mantissa of |tail| */ + /* We need to negate it if head and tail have different signs. */ + const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63; + const int64_t negationMask = loNegationMask ^ hiNegationMask; + tailMantissa = (tailMantissa ^ negationMask) - negationMask; + + /* Now we have the mantissa of tail as a signed 2s-complement integer */ + + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + + /* Shift the tail mantissa into the right position, accounting for the + * bias of 10 that we shifted the head mantissa by. + */ + tailMantissa >>= + (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10))); + + result += tailMantissa; + } + + result >>= (62 - unbiasedHeadExponent); + + /* Restore the sign of the result and return */ + result = (result ^ hiNegationMask) - hiNegationMask; + return result; + } + + /* Edge cases handled here: */ + + /* |x| < 1, result is zero. */ + if (1.0 > crt_fabs(x.s.hi)) + return INT64_C(0); + + /* x very close to INT64_MIN, care must be taken to see which side we are on. + */ + if (x.s.hi == -0x1.0p63) { + + int64_t result = INT64_MIN; + + if (0.0 < x.s.lo) { + /* If the tail is positive, the correct result is something other than + * INT64_MIN. we'll need to figure out what it is. + */ + + const doublebits lobits = {.d = x.s.lo}; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + /* Now we negate the tailMantissa */ + tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1); + + /* And shift it by the appropriate amount */ + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + tailMantissa >>= 1075 - biasedTailExponent; + + result -= tailMantissa; + } + + return result; + } -uint64_t __fixtfdi(long double input) -{ - const DD x = { .ld = input }; - const doublebits hibits = { .d = x.s.hi }; - - const uint32_t absHighWord = (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff); - const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000); - - /* If (1.0 - tiny) <= input < 0x1.0p63: */ - if (UINT32_C(0x03f00000) > absHighWordMinusOne) - { - /* Do an unsigned conversion of the absolute value, then restore the sign. */ - const int unbiasedHeadExponent = absHighWordMinusOne >> 20; - - int64_t result = hibits.x & INT64_C(0x000fffffffffffff); /* mantissa(hi) */ - result |= INT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ - result <<= 10; /* mantissa(hi) with one zero preceding bit. */ - - const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63; - - /* If the tail is non-zero, we need to patch in the tail bits. */ - if (0.0 != x.s.lo) - { - const doublebits lobits = { .d = x.s.lo }; - int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); - tailMantissa |= INT64_C(0x0010000000000000); - - /* At this point we have the mantissa of |tail| */ - /* We need to negate it if head and tail have different signs. */ - const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63; - const int64_t negationMask = loNegationMask ^ hiNegationMask; - tailMantissa = (tailMantissa ^ negationMask) - negationMask; - - /* Now we have the mantissa of tail as a signed 2s-complement integer */ - - const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; - - /* Shift the tail mantissa into the right position, accounting for the - * bias of 10 that we shifted the head mantissa by. - */ - tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10))); - - result += tailMantissa; - } - - result >>= (62 - unbiasedHeadExponent); - - /* Restore the sign of the result and return */ - result = (result ^ hiNegationMask) - hiNegationMask; - return result; - - } - - /* Edge cases handled here: */ - - /* |x| < 1, result is zero. */ - if (1.0 > crt_fabs(x.s.hi)) - return INT64_C(0); - - /* x very close to INT64_MIN, care must be taken to see which side we are on. */ - if (x.s.hi == -0x1.0p63) { - - int64_t result = INT64_MIN; - - if (0.0 < x.s.lo) - { - /* If the tail is positive, the correct result is something other than INT64_MIN. - * we'll need to figure out what it is. - */ - - const doublebits lobits = { .d = x.s.lo }; - int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); - tailMantissa |= INT64_C(0x0010000000000000); - - /* Now we negate the tailMantissa */ - tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1); - - /* And shift it by the appropriate amount */ - const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; - tailMantissa >>= 1075 - biasedTailExponent; - - result -= tailMantissa; - } - - return result; - } - - /* Signed overflows, infinities, and NaNs */ - if (x.s.hi > 0.0) - return INT64_MAX; - else - return INT64_MIN; + /* Signed overflows, infinities, and NaNs */ + if (x.s.hi > 0.0) + return INT64_MAX; + else + return INT64_MIN; } Index: compiler-rt/trunk/lib/builtins/ppc/fixunstfdi.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/fixunstfdi.c +++ compiler-rt/trunk/lib/builtins/ppc/fixunstfdi.c @@ -4,57 +4,58 @@ */ /* uint64_t __fixunstfdi(long double x); */ -/* This file implements the PowerPC 128-bit double-double -> uint64_t conversion */ +/* This file implements the PowerPC 128-bit double-double -> uint64_t conversion + */ #include "DD.h" -uint64_t __fixunstfdi(long double input) -{ - const DD x = { .ld = input }; - const doublebits hibits = { .d = x.s.hi }; - - const uint32_t highWordMinusOne = (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000); - - /* If (1.0 - tiny) <= input < 0x1.0p64: */ - if (UINT32_C(0x04000000) > highWordMinusOne) - { - const int unbiasedHeadExponent = highWordMinusOne >> 20; - - uint64_t result = hibits.x & UINT64_C(0x000fffffffffffff); /* mantissa(hi) */ - result |= UINT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ - result <<= 11; /* mantissa(hi) left aligned in the int64 field. */ - - /* If the tail is non-zero, we need to patch in the tail bits. */ - if (0.0 != x.s.lo) - { - const doublebits lobits = { .d = x.s.lo }; - int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); - tailMantissa |= INT64_C(0x0010000000000000); - - /* At this point we have the mantissa of |tail| */ - - const int64_t negationMask = ((int64_t)(lobits.x)) >> 63; - tailMantissa = (tailMantissa ^ negationMask) - negationMask; - - /* Now we have the mantissa of tail as a signed 2s-complement integer */ - - const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; - - /* Shift the tail mantissa into the right position, accounting for the - * bias of 11 that we shifted the head mantissa by. - */ - tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11))); - - result += tailMantissa; - } - - result >>= (63 - unbiasedHeadExponent); - return result; - } - - /* Edge cases are handled here, with saturation. */ - if (1.0 > x.s.hi) - return UINT64_C(0); - else - return UINT64_MAX; +uint64_t __fixunstfdi(long double input) { + const DD x = {.ld = input}; + const doublebits hibits = {.d = x.s.hi}; + + const uint32_t highWordMinusOne = + (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000); + + /* If (1.0 - tiny) <= input < 0x1.0p64: */ + if (UINT32_C(0x04000000) > highWordMinusOne) { + const int unbiasedHeadExponent = highWordMinusOne >> 20; + + uint64_t result = + hibits.x & UINT64_C(0x000fffffffffffff); /* mantissa(hi) */ + result |= UINT64_C(0x0010000000000000); /* matissa(hi) with implicit bit */ + result <<= 11; /* mantissa(hi) left aligned in the int64 field. */ + + /* If the tail is non-zero, we need to patch in the tail bits. */ + if (0.0 != x.s.lo) { + const doublebits lobits = {.d = x.s.lo}; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + /* At this point we have the mantissa of |tail| */ + + const int64_t negationMask = ((int64_t)(lobits.x)) >> 63; + tailMantissa = (tailMantissa ^ negationMask) - negationMask; + + /* Now we have the mantissa of tail as a signed 2s-complement integer */ + + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + + /* Shift the tail mantissa into the right position, accounting for the + * bias of 11 that we shifted the head mantissa by. + */ + tailMantissa >>= + (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11))); + + result += tailMantissa; + } + + result >>= (63 - unbiasedHeadExponent); + return result; + } + + /* Edge cases are handled here, with saturation. */ + if (1.0 > x.s.hi) + return UINT64_C(0); + else + return UINT64_MAX; } Index: compiler-rt/trunk/lib/builtins/ppc/floatditf.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/floatditf.c +++ compiler-rt/trunk/lib/builtins/ppc/floatditf.c @@ -9,29 +9,28 @@ #include "DD.h" long double __floatditf(int64_t a) { - - static const double twop32 = 0x1.0p32; - static const double twop52 = 0x1.0p52; - - doublebits low = { .d = twop52 }; - low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a. */ - - const double high_addend = (double)((int32_t)(a >> 32))*twop32 - twop52; - - /* At this point, we have two double precision numbers - * high_addend and low.d, and we wish to return their sum - * as a canonicalized long double: - */ - - /* This implementation sets the inexact flag spuriously. - * This could be avoided, but at some substantial cost. - */ - - DD result; - - result.s.hi = high_addend + low.d; - result.s.lo = (high_addend - result.s.hi) + low.d; - - return result.ld; - + + static const double twop32 = 0x1.0p32; + static const double twop52 = 0x1.0p52; + + doublebits low = {.d = twop52}; + low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a. */ + + const double high_addend = (double)((int32_t)(a >> 32)) * twop32 - twop52; + + /* At this point, we have two double precision numbers + * high_addend and low.d, and we wish to return their sum + * as a canonicalized long double: + */ + + /* This implementation sets the inexact flag spuriously. + * This could be avoided, but at some substantial cost. + */ + + DD result; + + result.s.hi = high_addend + low.d; + result.s.lo = (high_addend - result.s.hi) + low.d; + + return result.ld; } Index: compiler-rt/trunk/lib/builtins/ppc/floatunditf.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/floatunditf.c +++ compiler-rt/trunk/lib/builtins/ppc/floatunditf.c @@ -4,39 +4,39 @@ */ /* long double __floatunditf(unsigned long long x); */ -/* This file implements the PowerPC unsigned long long -> long double conversion */ +/* This file implements the PowerPC unsigned long long -> long double conversion + */ #include "DD.h" long double __floatunditf(uint64_t a) { - - /* Begins with an exact copy of the code from __floatundidf */ - - static const double twop52 = 0x1.0p52; - static const double twop84 = 0x1.0p84; - static const double twop84_plus_twop52 = 0x1.00000001p84; - - doublebits high = { .d = twop84 }; - doublebits low = { .d = twop52 }; - - high.x |= a >> 32; /* 0x1.0p84 + high 32 bits of a */ - low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a */ - - const double high_addend = high.d - twop84_plus_twop52; - - /* At this point, we have two double precision numbers - * high_addend and low.d, and we wish to return their sum - * as a canonicalized long double: - */ - - /* This implementation sets the inexact flag spuriously. */ - /* This could be avoided, but at some substantial cost. */ - - DD result; - - result.s.hi = high_addend + low.d; - result.s.lo = (high_addend - result.s.hi) + low.d; - - return result.ld; - + + /* Begins with an exact copy of the code from __floatundidf */ + + static const double twop52 = 0x1.0p52; + static const double twop84 = 0x1.0p84; + static const double twop84_plus_twop52 = 0x1.00000001p84; + + doublebits high = {.d = twop84}; + doublebits low = {.d = twop52}; + + high.x |= a >> 32; /* 0x1.0p84 + high 32 bits of a */ + low.x |= a & UINT64_C(0x00000000ffffffff); /* 0x1.0p52 + low 32 bits of a */ + + const double high_addend = high.d - twop84_plus_twop52; + + /* At this point, we have two double precision numbers + * high_addend and low.d, and we wish to return their sum + * as a canonicalized long double: + */ + + /* This implementation sets the inexact flag spuriously. */ + /* This could be avoided, but at some substantial cost. */ + + DD result; + + result.s.hi = high_addend + low.d; + result.s.lo = (high_addend - result.s.hi) + low.d; + + return result.ld; } Index: compiler-rt/trunk/lib/builtins/ppc/gcc_qadd.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/gcc_qadd.c +++ compiler-rt/trunk/lib/builtins/ppc/gcc_qadd.c @@ -10,68 +10,67 @@ #include "DD.h" -long double __gcc_qadd(long double x, long double y) -{ - static const uint32_t infinityHi = UINT32_C(0x7ff00000); - - DD dst = { .ld = x }, src = { .ld = y }; - - register double A = dst.s.hi, a = dst.s.lo, - B = src.s.hi, b = src.s.lo; - - /* If both operands are zero: */ - if ((A == 0.0) && (B == 0.0)) { - dst.s.hi = A + B; - dst.s.lo = 0.0; - return dst.ld; - } - - /* If either operand is NaN or infinity: */ - const doublebits abits = { .d = A }; - const doublebits bbits = { .d = B }; - if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || - (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { - dst.s.hi = A + B; - dst.s.lo = 0.0; - return dst.ld; - } - - /* If the computation overflows: */ - /* This may be playing things a little bit fast and loose, but it will do for a start. */ - const double testForOverflow = A + (B + (a + b)); - const doublebits testbits = { .d = testForOverflow }; - if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { - dst.s.hi = testForOverflow; - dst.s.lo = 0.0; - return dst.ld; - } - - double H, h; - double T, t; - double W, w; - double Y; - - H = B + (A - (A + B)); - T = b + (a - (a + b)); - h = A + (B - (A + B)); - t = a + (b - (a + b)); - - if (local_fabs(A) <= local_fabs(B)) - w = (a + b) + h; - else - w = (a + b) + H; - - W = (A + B) + w; - Y = (A + B) - W; - Y += w; - - if (local_fabs(a) <= local_fabs(b)) - w = t + Y; - else - w = T + Y; - - dst.s.hi = Y = W + w; - dst.s.lo = (W - Y) + w; - - return dst.ld; +long double __gcc_qadd(long double x, long double y) { + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + + DD dst = {.ld = x}, src = {.ld = y}; + + register double A = dst.s.hi, a = dst.s.lo, B = src.s.hi, b = src.s.lo; + + /* If both operands are zero: */ + if ((A == 0.0) && (B == 0.0)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If either operand is NaN or infinity: */ + const doublebits abits = {.d = A}; + const doublebits bbits = {.d = B}; + if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || + (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If the computation overflows: */ + /* This may be playing things a little bit fast and loose, but it will do for + * a start. */ + const double testForOverflow = A + (B + (a + b)); + const doublebits testbits = {.d = testForOverflow}; + if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = testForOverflow; + dst.s.lo = 0.0; + return dst.ld; + } + + double H, h; + double T, t; + double W, w; + double Y; + + H = B + (A - (A + B)); + T = b + (a - (a + b)); + h = A + (B - (A + B)); + t = a + (b - (a + b)); + + if (local_fabs(A) <= local_fabs(B)) + w = (a + b) + h; + else + w = (a + b) + H; + + W = (A + B) + w; + Y = (A + B) - W; + Y += w; + + if (local_fabs(a) <= local_fabs(b)) + w = t + Y; + else + w = T + Y; + + dst.s.hi = Y = W + w; + dst.s.lo = (W - Y) + w; + + return dst.ld; } Index: compiler-rt/trunk/lib/builtins/ppc/gcc_qdiv.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/gcc_qdiv.c +++ compiler-rt/trunk/lib/builtins/ppc/gcc_qdiv.c @@ -10,47 +10,45 @@ #include "DD.h" -long double __gcc_qdiv(long double a, long double b) -{ - static const uint32_t infinityHi = UINT32_C(0x7ff00000); - DD dst = { .ld = a }, src = { .ld = b }; - - register double x = dst.s.hi, x1 = dst.s.lo, - y = src.s.hi, y1 = src.s.lo; - - double yHi, yLo, qHi, qLo; - double yq, tmp, q; - - q = x / y; - - /* Detect special cases */ - if (q == 0.0) { - dst.s.hi = q; - dst.s.lo = 0.0; - return dst.ld; - } - - const doublebits qBits = { .d = q }; - if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) { - dst.s.hi = q; - dst.s.lo = 0.0; - return dst.ld; - } - - yHi = high26bits(y); - qHi = high26bits(q); - - yq = y * q; - yLo = y - yHi; - qLo = q - qHi; - - tmp = LOWORDER(yq, yHi, yLo, qHi, qLo); - tmp = (x - yq) - tmp; - tmp = ((tmp + x1) - y1 * q) / y; - x = q + tmp; - - dst.s.lo = (q - x) + tmp; - dst.s.hi = x; - +long double __gcc_qdiv(long double a, long double b) { + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + DD dst = {.ld = a}, src = {.ld = b}; + + register double x = dst.s.hi, x1 = dst.s.lo, y = src.s.hi, y1 = src.s.lo; + + double yHi, yLo, qHi, qLo; + double yq, tmp, q; + + q = x / y; + + /* Detect special cases */ + if (q == 0.0) { + dst.s.hi = q; + dst.s.lo = 0.0; + return dst.ld; + } + + const doublebits qBits = {.d = q}; + if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = q; + dst.s.lo = 0.0; return dst.ld; + } + + yHi = high26bits(y); + qHi = high26bits(q); + + yq = y * q; + yLo = y - yHi; + qLo = q - qHi; + + tmp = LOWORDER(yq, yHi, yLo, qHi, qLo); + tmp = (x - yq) - tmp; + tmp = ((tmp + x1) - y1 * q) / y; + x = q + tmp; + + dst.s.lo = (q - x) + tmp; + dst.s.hi = x; + + return dst.ld; } Index: compiler-rt/trunk/lib/builtins/ppc/gcc_qmul.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/gcc_qmul.c +++ compiler-rt/trunk/lib/builtins/ppc/gcc_qmul.c @@ -10,45 +10,43 @@ #include "DD.h" -long double __gcc_qmul(long double x, long double y) -{ - static const uint32_t infinityHi = UINT32_C(0x7ff00000); - DD dst = { .ld = x }, src = { .ld = y }; - - register double A = dst.s.hi, a = dst.s.lo, - B = src.s.hi, b = src.s.lo; - - double aHi, aLo, bHi, bLo; - double ab, tmp, tau; - - ab = A * B; - - /* Detect special cases */ - if (ab == 0.0) { - dst.s.hi = ab; - dst.s.lo = 0.0; - return dst.ld; - } - - const doublebits abBits = { .d = ab }; - if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) { - dst.s.hi = ab; - dst.s.lo = 0.0; - return dst.ld; - } - - /* Generic cases handled here. */ - aHi = high26bits(A); - bHi = high26bits(B); - aLo = A - aHi; - bLo = B - bHi; - - tmp = LOWORDER(ab, aHi, aLo, bHi, bLo); - tmp += (A * b + a * B); - tau = ab + tmp; - - dst.s.lo = (ab - tau) + tmp; - dst.s.hi = tau; - +long double __gcc_qmul(long double x, long double y) { + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + DD dst = {.ld = x}, src = {.ld = y}; + + register double A = dst.s.hi, a = dst.s.lo, B = src.s.hi, b = src.s.lo; + + double aHi, aLo, bHi, bLo; + double ab, tmp, tau; + + ab = A * B; + + /* Detect special cases */ + if (ab == 0.0) { + dst.s.hi = ab; + dst.s.lo = 0.0; + return dst.ld; + } + + const doublebits abBits = {.d = ab}; + if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = ab; + dst.s.lo = 0.0; return dst.ld; + } + + /* Generic cases handled here. */ + aHi = high26bits(A); + bHi = high26bits(B); + aLo = A - aHi; + bLo = B - bHi; + + tmp = LOWORDER(ab, aHi, aLo, bHi, bLo); + tmp += (A * b + a * B); + tau = ab + tmp; + + dst.s.lo = (ab - tau) + tmp; + dst.s.hi = tau; + + return dst.ld; } Index: compiler-rt/trunk/lib/builtins/ppc/gcc_qsub.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/gcc_qsub.c +++ compiler-rt/trunk/lib/builtins/ppc/gcc_qsub.c @@ -10,68 +10,67 @@ #include "DD.h" -long double __gcc_qsub(long double x, long double y) -{ - static const uint32_t infinityHi = UINT32_C(0x7ff00000); - - DD dst = { .ld = x }, src = { .ld = y }; - - register double A = dst.s.hi, a = dst.s.lo, - B = -src.s.hi, b = -src.s.lo; - - /* If both operands are zero: */ - if ((A == 0.0) && (B == 0.0)) { - dst.s.hi = A + B; - dst.s.lo = 0.0; - return dst.ld; - } - - /* If either operand is NaN or infinity: */ - const doublebits abits = { .d = A }; - const doublebits bbits = { .d = B }; - if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || - (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { - dst.s.hi = A + B; - dst.s.lo = 0.0; - return dst.ld; - } - - /* If the computation overflows: */ - /* This may be playing things a little bit fast and loose, but it will do for a start. */ - const double testForOverflow = A + (B + (a + b)); - const doublebits testbits = { .d = testForOverflow }; - if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { - dst.s.hi = testForOverflow; - dst.s.lo = 0.0; - return dst.ld; - } - - double H, h; - double T, t; - double W, w; - double Y; - - H = B + (A - (A + B)); - T = b + (a - (a + b)); - h = A + (B - (A + B)); - t = a + (b - (a + b)); - - if (local_fabs(A) <= local_fabs(B)) - w = (a + b) + h; - else - w = (a + b) + H; - - W = (A + B) + w; - Y = (A + B) - W; - Y += w; - - if (local_fabs(a) <= local_fabs(b)) - w = t + Y; - else - w = T + Y; - - dst.s.hi = Y = W + w; - dst.s.lo = (W - Y) + w; - - return dst.ld; +long double __gcc_qsub(long double x, long double y) { + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + + DD dst = {.ld = x}, src = {.ld = y}; + + register double A = dst.s.hi, a = dst.s.lo, B = -src.s.hi, b = -src.s.lo; + + /* If both operands are zero: */ + if ((A == 0.0) && (B == 0.0)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If either operand is NaN or infinity: */ + const doublebits abits = {.d = A}; + const doublebits bbits = {.d = B}; + if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || + (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { + dst.s.hi = A + B; + dst.s.lo = 0.0; + return dst.ld; + } + + /* If the computation overflows: */ + /* This may be playing things a little bit fast and loose, but it will do for + * a start. */ + const double testForOverflow = A + (B + (a + b)); + const doublebits testbits = {.d = testForOverflow}; + if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { + dst.s.hi = testForOverflow; + dst.s.lo = 0.0; + return dst.ld; + } + + double H, h; + double T, t; + double W, w; + double Y; + + H = B + (A - (A + B)); + T = b + (a - (a + b)); + h = A + (B - (A + B)); + t = a + (b - (a + b)); + + if (local_fabs(A) <= local_fabs(B)) + w = (a + b) + h; + else + w = (a + b) + H; + + W = (A + B) + w; + Y = (A + B) - W; + Y += w; + + if (local_fabs(a) <= local_fabs(b)) + w = t + Y; + else + w = T + Y; + + dst.s.hi = Y = W + w; + dst.s.lo = (W - Y) + w; + + return dst.ld; } Index: compiler-rt/trunk/lib/builtins/ppc/multc3.c =================================================================== --- compiler-rt/trunk/lib/builtins/ppc/multc3.c +++ compiler-rt/trunk/lib/builtins/ppc/multc3.c @@ -3,89 +3,84 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ -#include "DD.h" #include "../int_math.h" +#include "DD.h" -#define makeFinite(x) { \ - (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ - (x).s.lo = 0.0; \ +#define makeFinite(x) \ + { \ + (x).s.hi = crt_copysign(crt_isinf((x).s.hi) ? 1.0 : 0.0, (x).s.hi); \ + (x).s.lo = 0.0; \ } -#define zeroNaN(x) { \ - if (crt_isnan((x).s.hi)) { \ - (x).s.hi = crt_copysign(0.0, (x).s.hi); \ - (x).s.lo = 0.0; \ - } \ +#define zeroNaN(x) \ + { \ + if (crt_isnan((x).s.hi)) { \ + (x).s.hi = crt_copysign(0.0, (x).s.hi); \ + (x).s.lo = 0.0; \ + } \ } -long double _Complex -__multc3(long double a, long double b, long double c, long double d) -{ - long double ac = __gcc_qmul(a,c); - long double bd = __gcc_qmul(b,d); - long double ad = __gcc_qmul(a,d); - long double bc = __gcc_qmul(b,c); - - DD real = { .ld = __gcc_qsub(ac,bd) }; - DD imag = { .ld = __gcc_qadd(ad,bc) }; - - if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) - { - int recalc = 0; - - DD aDD = { .ld = a }; - DD bDD = { .ld = b }; - DD cDD = { .ld = c }; - DD dDD = { .ld = d }; - - if (crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) - { - makeFinite(aDD); - makeFinite(bDD); - zeroNaN(cDD); - zeroNaN(dDD); - recalc = 1; - } - - if (crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) - { - makeFinite(cDD); - makeFinite(dDD); - zeroNaN(aDD); - zeroNaN(bDD); - recalc = 1; - } - - if (!recalc) - { - DD acDD = { .ld = ac }; - DD bdDD = { .ld = bd }; - DD adDD = { .ld = ad }; - DD bcDD = { .ld = bc }; - - if (crt_isinf(acDD.s.hi) || crt_isinf(bdDD.s.hi) || - crt_isinf(adDD.s.hi) || crt_isinf(bcDD.s.hi)) - { - zeroNaN(aDD); - zeroNaN(bDD); - zeroNaN(cDD); - zeroNaN(dDD); - recalc = 1; - } - } - - if (recalc) - { - real.s.hi = CRT_INFINITY * (aDD.s.hi*cDD.s.hi - bDD.s.hi*dDD.s.hi); - real.s.lo = 0.0; - imag.s.hi = CRT_INFINITY * (aDD.s.hi*dDD.s.hi + bDD.s.hi*cDD.s.hi); - imag.s.lo = 0.0; - } - } - - long double _Complex z; - __real__ z = real.ld; - __imag__ z = imag.ld; - - return z; +long double _Complex __multc3(long double a, long double b, long double c, + long double d) { + long double ac = __gcc_qmul(a, c); + long double bd = __gcc_qmul(b, d); + long double ad = __gcc_qmul(a, d); + long double bc = __gcc_qmul(b, c); + + DD real = {.ld = __gcc_qsub(ac, bd)}; + DD imag = {.ld = __gcc_qadd(ad, bc)}; + + if (crt_isnan(real.s.hi) && crt_isnan(imag.s.hi)) { + int recalc = 0; + + DD aDD = {.ld = a}; + DD bDD = {.ld = b}; + DD cDD = {.ld = c}; + DD dDD = {.ld = d}; + + if (crt_isinf(aDD.s.hi) || crt_isinf(bDD.s.hi)) { + makeFinite(aDD); + makeFinite(bDD); + zeroNaN(cDD); + zeroNaN(dDD); + recalc = 1; + } + + if (crt_isinf(cDD.s.hi) || crt_isinf(dDD.s.hi)) { + makeFinite(cDD); + makeFinite(dDD); + zeroNaN(aDD); + zeroNaN(bDD); + recalc = 1; + } + + if (!recalc) { + DD acDD = {.ld = ac}; + DD bdDD = {.ld = bd}; + DD adDD = {.ld = ad}; + DD bcDD = {.ld = bc}; + + if (crt_isinf(acDD.s.hi) || crt_isinf(bdDD.s.hi) || + crt_isinf(adDD.s.hi) || crt_isinf(bcDD.s.hi)) { + zeroNaN(aDD); + zeroNaN(bDD); + zeroNaN(cDD); + zeroNaN(dDD); + recalc = 1; + } + } + + if (recalc) { + real.s.hi = CRT_INFINITY * (aDD.s.hi * cDD.s.hi - bDD.s.hi * dDD.s.hi); + real.s.lo = 0.0; + imag.s.hi = CRT_INFINITY * (aDD.s.hi * dDD.s.hi + bDD.s.hi * cDD.s.hi); + imag.s.lo = 0.0; + } + } + + long double _Complex z; + __real__ z = real.ld; + __imag__ z = imag.ld; + + return z; } Index: compiler-rt/trunk/lib/builtins/subdf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/subdf3.c +++ compiler-rt/trunk/lib/builtins/subdf3.c @@ -15,16 +15,13 @@ #include "fp_lib.h" // Subtraction; flip the sign bit of b and add. -COMPILER_RT_ABI fp_t -__subdf3(fp_t a, fp_t b) { - return __adddf3(a, fromRep(toRep(b) ^ signBit)); +COMPILER_RT_ABI fp_t __subdf3(fp_t a, fp_t b) { + return __adddf3(a, fromRep(toRep(b) ^ signBit)); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) { - return __subdf3(a, b); -} +AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) { return __subdf3(a, b); } #else AEABI_RTABI fp_t __aeabi_dsub(fp_t a, fp_t b) COMPILER_RT_ALIAS(__subdf3); #endif Index: compiler-rt/trunk/lib/builtins/subsf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/subsf3.c +++ compiler-rt/trunk/lib/builtins/subsf3.c @@ -15,16 +15,13 @@ #include "fp_lib.h" // Subtraction; flip the sign bit of b and add. -COMPILER_RT_ABI fp_t -__subsf3(fp_t a, fp_t b) { - return __addsf3(a, fromRep(toRep(b) ^ signBit)); +COMPILER_RT_ABI fp_t __subsf3(fp_t a, fp_t b) { + return __addsf3(a, fromRep(toRep(b) ^ signBit)); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) { - return __subsf3(a, b); -} +AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) { return __subsf3(a, b); } #else AEABI_RTABI fp_t __aeabi_fsub(fp_t a, fp_t b) COMPILER_RT_ALIAS(__subsf3); #endif Index: compiler-rt/trunk/lib/builtins/subtf3.c =================================================================== --- compiler-rt/trunk/lib/builtins/subtf3.c +++ compiler-rt/trunk/lib/builtins/subtf3.c @@ -18,9 +18,8 @@ COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b); // Subtraction; flip the sign bit of b and add. -COMPILER_RT_ABI fp_t -__subtf3(fp_t a, fp_t b) { - return __addtf3(a, fromRep(toRep(b) ^ signBit)); +COMPILER_RT_ABI fp_t __subtf3(fp_t a, fp_t b) { + return __addtf3(a, fromRep(toRep(b) ^ signBit)); } #endif Index: compiler-rt/trunk/lib/builtins/subvdi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/subvdi3.c +++ compiler-rt/trunk/lib/builtins/subvdi3.c @@ -17,19 +17,14 @@ /* Effects: aborts if a - b overflows */ -COMPILER_RT_ABI di_int -__subvdi3(di_int a, di_int b) -{ - di_int s = (du_int) a - (du_int) b; - if (b >= 0) - { - if (s > a) - compilerrt_abort(); - } - else - { - if (s <= a) - compilerrt_abort(); - } - return s; +COMPILER_RT_ABI di_int __subvdi3(di_int a, di_int b) { + di_int s = (du_int)a - (du_int)b; + if (b >= 0) { + if (s > a) + compilerrt_abort(); + } else { + if (s <= a) + compilerrt_abort(); + } + return s; } Index: compiler-rt/trunk/lib/builtins/subvsi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/subvsi3.c +++ compiler-rt/trunk/lib/builtins/subvsi3.c @@ -17,19 +17,14 @@ /* Effects: aborts if a - b overflows */ -COMPILER_RT_ABI si_int -__subvsi3(si_int a, si_int b) -{ - si_int s = (su_int) a - (su_int) b; - if (b >= 0) - { - if (s > a) - compilerrt_abort(); - } - else - { - if (s <= a) - compilerrt_abort(); - } - return s; +COMPILER_RT_ABI si_int __subvsi3(si_int a, si_int b) { + si_int s = (su_int)a - (su_int)b; + if (b >= 0) { + if (s > a) + compilerrt_abort(); + } else { + if (s <= a) + compilerrt_abort(); + } + return s; } Index: compiler-rt/trunk/lib/builtins/subvti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/subvti3.c +++ compiler-rt/trunk/lib/builtins/subvti3.c @@ -19,21 +19,16 @@ /* Effects: aborts if a - b overflows */ -COMPILER_RT_ABI ti_int -__subvti3(ti_int a, ti_int b) -{ - ti_int s = (tu_int) a - (tu_int) b; - if (b >= 0) - { - if (s > a) - compilerrt_abort(); - } - else - { - if (s <= a) - compilerrt_abort(); - } - return s; +COMPILER_RT_ABI ti_int __subvti3(ti_int a, ti_int b) { + ti_int s = (tu_int)a - (tu_int)b; + if (b >= 0) { + if (s > a) + compilerrt_abort(); + } else { + if (s <= a) + compilerrt_abort(); + } + return s; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/trampoline_setup.c =================================================================== --- compiler-rt/trunk/lib/builtins/trampoline_setup.c +++ compiler-rt/trunk/lib/builtins/trampoline_setup.c @@ -9,39 +9,38 @@ #include "int_lib.h" -extern void __clear_cache(void* start, void* end); +extern void __clear_cache(void *start, void *end); /* - * The ppc compiler generates calls to __trampoline_setup() when creating + * The ppc compiler generates calls to __trampoline_setup() when creating * trampoline functions on the stack for use with nested functions. - * This function creates a custom 40-byte trampoline function on the stack + * This function creates a custom 40-byte trampoline function on the stack * which loads r11 with a pointer to the outer function's locals * and then jumps to the target nested function. */ #if __ppc__ && !defined(__powerpc64__) -COMPILER_RT_ABI void -__trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, - const void* realFunc, void* localsPtr) -{ - /* should never happen, but if compiler did not allocate */ - /* enough space on stack for the trampoline, abort */ - if ( trampSizeAllocated < 40 ) - compilerrt_abort(); - - /* create trampoline */ - trampOnStack[0] = 0x7c0802a6; /* mflr r0 */ - trampOnStack[1] = 0x4800000d; /* bl Lbase */ - trampOnStack[2] = (uint32_t)realFunc; - trampOnStack[3] = (uint32_t)localsPtr; - trampOnStack[4] = 0x7d6802a6; /* Lbase: mflr r11 */ - trampOnStack[5] = 0x818b0000; /* lwz r12,0(r11) */ - trampOnStack[6] = 0x7c0803a6; /* mtlr r0 */ - trampOnStack[7] = 0x7d8903a6; /* mtctr r12 */ - trampOnStack[8] = 0x816b0004; /* lwz r11,4(r11) */ - trampOnStack[9] = 0x4e800420; /* bctr */ - - /* clear instruction cache */ - __clear_cache(trampOnStack, &trampOnStack[10]); +COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, + int trampSizeAllocated, + const void *realFunc, void *localsPtr) { + /* should never happen, but if compiler did not allocate */ + /* enough space on stack for the trampoline, abort */ + if (trampSizeAllocated < 40) + compilerrt_abort(); + + /* create trampoline */ + trampOnStack[0] = 0x7c0802a6; /* mflr r0 */ + trampOnStack[1] = 0x4800000d; /* bl Lbase */ + trampOnStack[2] = (uint32_t)realFunc; + trampOnStack[3] = (uint32_t)localsPtr; + trampOnStack[4] = 0x7d6802a6; /* Lbase: mflr r11 */ + trampOnStack[5] = 0x818b0000; /* lwz r12,0(r11) */ + trampOnStack[6] = 0x7c0803a6; /* mtlr r0 */ + trampOnStack[7] = 0x7d8903a6; /* mtctr r12 */ + trampOnStack[8] = 0x816b0004; /* lwz r11,4(r11) */ + trampOnStack[9] = 0x4e800420; /* bctr */ + + /* clear instruction cache */ + __clear_cache(trampOnStack, &trampOnStack[10]); } #endif /* __ppc__ && !defined(__powerpc64__) */ Index: compiler-rt/trunk/lib/builtins/truncdfhf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/truncdfhf2.c +++ compiler-rt/trunk/lib/builtins/truncdfhf2.c @@ -10,15 +10,11 @@ #define DST_HALF #include "fp_trunc_impl.inc" -COMPILER_RT_ABI uint16_t __truncdfhf2(double a) { - return __truncXfYf2__(a); -} +COMPILER_RT_ABI uint16_t __truncdfhf2(double a) { return __truncXfYf2__(a); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI uint16_t __aeabi_d2h(double a) { - return __truncdfhf2(a); -} +AEABI_RTABI uint16_t __aeabi_d2h(double a) { return __truncdfhf2(a); } #else AEABI_RTABI uint16_t __aeabi_d2h(double a) COMPILER_RT_ALIAS(__truncdfhf2); #endif Index: compiler-rt/trunk/lib/builtins/truncdfsf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/truncdfsf2.c +++ compiler-rt/trunk/lib/builtins/truncdfsf2.c @@ -10,15 +10,11 @@ #define DST_SINGLE #include "fp_trunc_impl.inc" -COMPILER_RT_ABI float __truncdfsf2(double a) { - return __truncXfYf2__(a); -} +COMPILER_RT_ABI float __truncdfsf2(double a) { return __truncXfYf2__(a); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI float __aeabi_d2f(double a) { - return __truncdfsf2(a); -} +AEABI_RTABI float __aeabi_d2f(double a) { return __truncdfsf2(a); } #else AEABI_RTABI float __aeabi_d2f(double a) COMPILER_RT_ALIAS(__truncdfsf2); #endif Index: compiler-rt/trunk/lib/builtins/truncsfhf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/truncsfhf2.c +++ compiler-rt/trunk/lib/builtins/truncsfhf2.c @@ -13,18 +13,14 @@ // Use a forwarding definition and noinline to implement a poor man's alias, // as there isn't a good cross-platform way of defining one. COMPILER_RT_ABI NOINLINE uint16_t __truncsfhf2(float a) { - return __truncXfYf2__(a); + return __truncXfYf2__(a); } -COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) { - return __truncsfhf2(a); -} +COMPILER_RT_ABI uint16_t __gnu_f2h_ieee(float a) { return __truncsfhf2(a); } #if defined(__ARM_EABI__) #if defined(COMPILER_RT_ARMHF_TARGET) -AEABI_RTABI uint16_t __aeabi_f2h(float a) { - return __truncsfhf2(a); -} +AEABI_RTABI uint16_t __aeabi_f2h(float a) { return __truncsfhf2(a); } #else AEABI_RTABI uint16_t __aeabi_f2h(float a) COMPILER_RT_ALIAS(__truncsfhf2); #endif Index: compiler-rt/trunk/lib/builtins/trunctfdf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/trunctfdf2.c +++ compiler-rt/trunk/lib/builtins/trunctfdf2.c @@ -14,8 +14,6 @@ #define DST_DOUBLE #include "fp_trunc_impl.inc" -COMPILER_RT_ABI double __trunctfdf2(long double a) { - return __truncXfYf2__(a); -} +COMPILER_RT_ABI double __trunctfdf2(long double a) { return __truncXfYf2__(a); } #endif Index: compiler-rt/trunk/lib/builtins/trunctfsf2.c =================================================================== --- compiler-rt/trunk/lib/builtins/trunctfsf2.c +++ compiler-rt/trunk/lib/builtins/trunctfsf2.c @@ -14,8 +14,6 @@ #define DST_SINGLE #include "fp_trunc_impl.inc" -COMPILER_RT_ABI float __trunctfsf2(long double a) { - return __truncXfYf2__(a); -} +COMPILER_RT_ABI float __trunctfsf2(long double a) { return __truncXfYf2__(a); } #endif Index: compiler-rt/trunk/lib/builtins/ucmpdi2.c =================================================================== --- compiler-rt/trunk/lib/builtins/ucmpdi2.c +++ compiler-rt/trunk/lib/builtins/ucmpdi2.c @@ -18,33 +18,28 @@ * if (a > b) returns 2 */ -COMPILER_RT_ABI si_int -__ucmpdi2(du_int a, du_int b) -{ - udwords x; - x.all = a; - udwords y; - y.all = b; - if (x.s.high < y.s.high) - return 0; - if (x.s.high > y.s.high) - return 2; - if (x.s.low < y.s.low) - return 0; - if (x.s.low > y.s.low) - return 2; - return 1; +COMPILER_RT_ABI si_int __ucmpdi2(du_int a, du_int b) { + udwords x; + x.all = a; + udwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; } #ifdef __ARM_EABI__ /* Returns: if (a < b) returns -1 -* if (a == b) returns 0 -* if (a > b) returns 1 -*/ -COMPILER_RT_ABI si_int -__aeabi_ulcmp(di_int a, di_int b) -{ - return __ucmpdi2(a, b) - 1; + * if (a == b) returns 0 + * if (a > b) returns 1 + */ +COMPILER_RT_ABI si_int __aeabi_ulcmp(di_int a, di_int b) { + return __ucmpdi2(a, b) - 1; } #endif - Index: compiler-rt/trunk/lib/builtins/ucmpti2.c =================================================================== --- compiler-rt/trunk/lib/builtins/ucmpti2.c +++ compiler-rt/trunk/lib/builtins/ucmpti2.c @@ -20,22 +20,20 @@ * if (a > b) returns 2 */ -COMPILER_RT_ABI si_int -__ucmpti2(tu_int a, tu_int b) -{ - utwords x; - x.all = a; - utwords y; - y.all = b; - if (x.s.high < y.s.high) - return 0; - if (x.s.high > y.s.high) - return 2; - if (x.s.low < y.s.low) - return 0; - if (x.s.low > y.s.low) - return 2; - return 1; +COMPILER_RT_ABI si_int __ucmpti2(tu_int a, tu_int b) { + utwords x; + x.all = a; + utwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/udivdi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/udivdi3.c +++ compiler-rt/trunk/lib/builtins/udivdi3.c @@ -15,8 +15,6 @@ /* Returns: a / b */ -COMPILER_RT_ABI du_int -__udivdi3(du_int a, du_int b) -{ - return __udivmoddi4(a, b, 0); +COMPILER_RT_ABI du_int __udivdi3(du_int a, du_int b) { + return __udivmoddi4(a, b, 0); } Index: compiler-rt/trunk/lib/builtins/udivmoddi4.c =================================================================== --- compiler-rt/trunk/lib/builtins/udivmoddi4.c +++ compiler-rt/trunk/lib/builtins/udivmoddi4.c @@ -19,212 +19,190 @@ /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ -COMPILER_RT_ABI du_int -__udivmoddi4(du_int a, du_int b, du_int* rem) -{ - const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; - const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; - udwords n; - n.all = a; - udwords d; - d.all = b; - udwords q; - udwords r; - unsigned sr; - /* special cases, X is unknown, K != 0 */ - if (n.s.high == 0) - { - if (d.s.high == 0) - { - /* 0 X - * --- - * 0 X - */ - if (rem) - *rem = n.s.low % d.s.low; - return n.s.low / d.s.low; - } - /* 0 X - * --- - * K X - */ - if (rem) - *rem = n.s.low; - return 0; +COMPILER_RT_ABI du_int __udivmoddi4(du_int a, du_int b, du_int *rem) { + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + udwords n; + n.all = a; + udwords d; + d.all = b; + udwords q; + udwords r; + unsigned sr; + /* special cases, X is unknown, K != 0 */ + if (n.s.high == 0) { + if (d.s.high == 0) { + /* 0 X + * --- + * 0 X + */ + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; } - /* n.s.high != 0 */ - if (d.s.low == 0) + /* 0 X + * --- + * K X + */ + if (rem) + *rem = n.s.low; + return 0; + } + /* n.s.high != 0 */ + if (d.s.low == 0) { + if (d.s.high == 0) { + /* K X + * --- + * 0 0 + */ + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; + } + /* d.s.high != 0 */ + if (n.s.low == 0) { + /* K 0 + * --- + * K 0 + */ + if (rem) { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + /* K K + * --- + * K 0 + */ + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ { - if (d.s.high == 0) - { - /* K X - * --- - * 0 0 - */ - if (rem) - *rem = n.s.high % d.s.low; - return n.s.high / d.s.low; - } - /* d.s.high != 0 */ - if (n.s.low == 0) - { - /* K 0 - * --- - * K 0 - */ - if (rem) - { - r.s.high = n.s.high % d.s.high; - r.s.low = 0; - *rem = r.all; - } - return n.s.high / d.s.high; - } - /* K K - * --- - * K 0 - */ - if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ - { - if (rem) - { - r.s.low = n.s.low; - r.s.high = n.s.high & (d.s.high - 1); - *rem = r.all; - } - return n.s.high >> __builtin_ctz(d.s.high); - } - /* K K - * --- - * K 0 - */ - sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); - /* 0 <= sr <= n_uword_bits - 2 or sr large */ - if (sr > n_uword_bits - 2) - { - if (rem) - *rem = n.all; - return 0; - } - ++sr; - /* 1 <= sr <= n_uword_bits - 1 */ - /* q.all = n.all << (n_udword_bits - sr); */ + if (rem) { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> __builtin_ctz(d.s.high); + } + /* K K + * --- + * K 0 + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 2 or sr large */ + if (sr > n_uword_bits - 2) { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits - 1 */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + /* r.all = n.all >> sr; */ + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } else /* d.s.low != 0 */ + { + if (d.s.high == 0) { + /* K X + * --- + * 0 K + */ + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = __builtin_ctz(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + /* K X + * --- + * 0 K + */ + sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); + /* 2 <= sr <= n_udword_bits - 1 + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + */ + if (sr == n_uword_bits) { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 + { q.s.low = 0; q.s.high = n.s.low << (n_uword_bits - sr); - /* r.all = n.all >> sr; */ r.s.high = n.s.high >> sr; r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 + { + q.s.low = n.s.low << (n_udword_bits - sr); + q.s.high = (n.s.high << (n_udword_bits - sr)) | + (n.s.low >> (sr - n_uword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_uword_bits); + } + } else { + /* K X + * --- + * K K + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 1 or sr large */ + if (sr > n_uword_bits - 1) { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + if (sr == n_uword_bits) { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } else { + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } } - else /* d.s.low != 0 */ - { - if (d.s.high == 0) - { - /* K X - * --- - * 0 K - */ - if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ - { - if (rem) - *rem = n.s.low & (d.s.low - 1); - if (d.s.low == 1) - return n.all; - sr = __builtin_ctz(d.s.low); - q.s.high = n.s.high >> sr; - q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); - return q.all; - } - /* K X - * --- - * 0 K - */ - sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); - /* 2 <= sr <= n_udword_bits - 1 - * q.all = n.all << (n_udword_bits - sr); - * r.all = n.all >> sr; - */ - if (sr == n_uword_bits) - { - q.s.low = 0; - q.s.high = n.s.low; - r.s.high = 0; - r.s.low = n.s.high; - } - else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 - { - q.s.low = 0; - q.s.high = n.s.low << (n_uword_bits - sr); - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); - } - else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 - { - q.s.low = n.s.low << (n_udword_bits - sr); - q.s.high = (n.s.high << (n_udword_bits - sr)) | - (n.s.low >> (sr - n_uword_bits)); - r.s.high = 0; - r.s.low = n.s.high >> (sr - n_uword_bits); - } - } - else - { - /* K X - * --- - * K K - */ - sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); - /* 0 <= sr <= n_uword_bits - 1 or sr large */ - if (sr > n_uword_bits - 1) - { - if (rem) - *rem = n.all; - return 0; - } - ++sr; - /* 1 <= sr <= n_uword_bits */ - /* q.all = n.all << (n_udword_bits - sr); */ - q.s.low = 0; - if (sr == n_uword_bits) - { - q.s.high = n.s.low; - r.s.high = 0; - r.s.low = n.s.high; - } - else - { - q.s.high = n.s.low << (n_uword_bits - sr); - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); - } - } - } - /* Not a special case - * q and r are initialized with: - * q.all = n.all << (n_udword_bits - sr); - * r.all = n.all >> sr; - * 1 <= sr <= n_udword_bits - 1 + } + /* Not a special case + * q and r are initialized with: + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + * 1 <= sr <= n_udword_bits - 1 + */ + su_int carry = 0; + for (; sr > 0; --sr) { + /* r:q = ((r:q) << 1) | carry */ + r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } */ - su_int carry = 0; - for (; sr > 0; --sr) - { - /* r:q = ((r:q) << 1) | carry */ - r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); - r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); - q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); - q.s.low = (q.s.low << 1) | carry; - /* carry = 0; - * if (r.all >= d.all) - * { - * r.all -= d.all; - * carry = 1; - * } - */ - const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); - carry = s & 1; - r.all -= d.all & s; - } - q.all = (q.all << 1) | carry; - if (rem) - *rem = r.all; - return q.all; + const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; } Index: compiler-rt/trunk/lib/builtins/udivmodsi4.c =================================================================== --- compiler-rt/trunk/lib/builtins/udivmodsi4.c +++ compiler-rt/trunk/lib/builtins/udivmodsi4.c @@ -15,12 +15,8 @@ /* Returns: a / b, *rem = a % b */ -COMPILER_RT_ABI su_int -__udivmodsi4(su_int a, su_int b, su_int* rem) -{ - si_int d = __udivsi3(a,b); - *rem = a - (d*b); +COMPILER_RT_ABI su_int __udivmodsi4(su_int a, su_int b, su_int *rem) { + si_int d = __udivsi3(a, b); + *rem = a - (d * b); return d; } - - Index: compiler-rt/trunk/lib/builtins/udivmodti4.c =================================================================== --- compiler-rt/trunk/lib/builtins/udivmodti4.c +++ compiler-rt/trunk/lib/builtins/udivmodti4.c @@ -9,229 +9,207 @@ * This file implements __udivmodti4 for the compiler_rt library. * * ===----------------------------------------------------------------------=== - */ + */ #include "int_lib.h" #ifdef CRT_HAS_128BIT -/* Effects: if rem != 0, *rem = a % b - * Returns: a / b +/* Effects: if rem != 0, *rem = a % b + * Returns: a / b */ /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ -COMPILER_RT_ABI tu_int -__udivmodti4(tu_int a, tu_int b, tu_int* rem) -{ - const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; - const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; - utwords n; - n.all = a; - utwords d; - d.all = b; - utwords q; - utwords r; - unsigned sr; - /* special cases, X is unknown, K != 0 */ - if (n.s.high == 0) - { - if (d.s.high == 0) - { - /* 0 X - * --- - * 0 X - */ - if (rem) - *rem = n.s.low % d.s.low; - return n.s.low / d.s.low; - } - /* 0 X - * --- - * K X - */ - if (rem) - *rem = n.s.low; - return 0; +COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) { + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; + utwords n; + n.all = a; + utwords d; + d.all = b; + utwords q; + utwords r; + unsigned sr; + /* special cases, X is unknown, K != 0 */ + if (n.s.high == 0) { + if (d.s.high == 0) { + /* 0 X + * --- + * 0 X + */ + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; + } + /* 0 X + * --- + * K X + */ + if (rem) + *rem = n.s.low; + return 0; + } + /* n.s.high != 0 */ + if (d.s.low == 0) { + if (d.s.high == 0) { + /* K X + * --- + * 0 0 + */ + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; } - /* n.s.high != 0 */ - if (d.s.low == 0) + /* d.s.high != 0 */ + if (n.s.low == 0) { + /* K 0 + * --- + * K 0 + */ + if (rem) { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + /* K K + * --- + * K 0 + */ + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ { - if (d.s.high == 0) - { - /* K X - * --- - * 0 0 - */ - if (rem) - *rem = n.s.high % d.s.low; - return n.s.high / d.s.low; - } - /* d.s.high != 0 */ - if (n.s.low == 0) - { - /* K 0 - * --- - * K 0 - */ - if (rem) - { - r.s.high = n.s.high % d.s.high; - r.s.low = 0; - *rem = r.all; - } - return n.s.high / d.s.high; - } - /* K K - * --- - * K 0 - */ - if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ - { - if (rem) - { - r.s.low = n.s.low; - r.s.high = n.s.high & (d.s.high - 1); - *rem = r.all; - } - return n.s.high >> __builtin_ctzll(d.s.high); - } - /* K K - * --- - * K 0 - */ - sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); - /* 0 <= sr <= n_udword_bits - 2 or sr large */ - if (sr > n_udword_bits - 2) - { - if (rem) - *rem = n.all; - return 0; - } - ++sr; - /* 1 <= sr <= n_udword_bits - 1 */ - /* q.all = n.all << (n_utword_bits - sr); */ + if (rem) { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> __builtin_ctzll(d.s.high); + } + /* K K + * --- + * K 0 + */ + sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); + /* 0 <= sr <= n_udword_bits - 2 or sr large */ + if (sr > n_udword_bits - 2) { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_udword_bits - 1 */ + /* q.all = n.all << (n_utword_bits - sr); */ + q.s.low = 0; + q.s.high = n.s.low << (n_udword_bits - sr); + /* r.all = n.all >> sr; */ + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + } else /* d.s.low != 0 */ + { + if (d.s.high == 0) { + /* K X + * --- + * 0 K + */ + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = __builtin_ctzll(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + /* K X + * --- + * 0 K + */ + sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) - + __builtin_clzll(n.s.high); + /* 2 <= sr <= n_utword_bits - 1 + * q.all = n.all << (n_utword_bits - sr); + * r.all = n.all >> sr; + */ + if (sr == n_udword_bits) { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 + { q.s.low = 0; q.s.high = n.s.low << (n_udword_bits - sr); - /* r.all = n.all >> sr; */ r.s.high = n.s.high >> sr; r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + } else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 + { + q.s.low = n.s.low << (n_utword_bits - sr); + q.s.high = (n.s.high << (n_utword_bits - sr)) | + (n.s.low >> (sr - n_udword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_udword_bits); + } + } else { + /* K X + * --- + * K K + */ + sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); + /*0 <= sr <= n_udword_bits - 1 or sr large */ + if (sr > n_udword_bits - 1) { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_udword_bits + * q.all = n.all << (n_utword_bits - sr); + * r.all = n.all >> sr; + */ + q.s.low = 0; + if (sr == n_udword_bits) { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } else { + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); + q.s.high = n.s.low << (n_udword_bits - sr); + } } - else /* d.s.low != 0 */ - { - if (d.s.high == 0) - { - /* K X - * --- - * 0 K - */ - if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ - { - if (rem) - *rem = n.s.low & (d.s.low - 1); - if (d.s.low == 1) - return n.all; - sr = __builtin_ctzll(d.s.low); - q.s.high = n.s.high >> sr; - q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); - return q.all; - } - /* K X - * --- - * 0 K - */ - sr = 1 + n_udword_bits + __builtin_clzll(d.s.low) - - __builtin_clzll(n.s.high); - /* 2 <= sr <= n_utword_bits - 1 - * q.all = n.all << (n_utword_bits - sr); - * r.all = n.all >> sr; - */ - if (sr == n_udword_bits) - { - q.s.low = 0; - q.s.high = n.s.low; - r.s.high = 0; - r.s.low = n.s.high; - } - else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 - { - q.s.low = 0; - q.s.high = n.s.low << (n_udword_bits - sr); - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); - } - else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 - { - q.s.low = n.s.low << (n_utword_bits - sr); - q.s.high = (n.s.high << (n_utword_bits - sr)) | - (n.s.low >> (sr - n_udword_bits)); - r.s.high = 0; - r.s.low = n.s.high >> (sr - n_udword_bits); - } - } - else - { - /* K X - * --- - * K K - */ - sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high); - /*0 <= sr <= n_udword_bits - 1 or sr large */ - if (sr > n_udword_bits - 1) - { - if (rem) - *rem = n.all; - return 0; - } - ++sr; - /* 1 <= sr <= n_udword_bits - * q.all = n.all << (n_utword_bits - sr); - * r.all = n.all >> sr; - */ - q.s.low = 0; - if (sr == n_udword_bits) - { - q.s.high = n.s.low; - r.s.high = 0; - r.s.low = n.s.high; - } - else - { - r.s.high = n.s.high >> sr; - r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr); - q.s.high = n.s.low << (n_udword_bits - sr); - } - } - } - /* Not a special case - * q and r are initialized with: - * q.all = n.all << (n_utword_bits - sr); - * r.all = n.all >> sr; - * 1 <= sr <= n_utword_bits - 1 + } + /* Not a special case + * q and r are initialized with: + * q.all = n.all << (n_utword_bits - sr); + * r.all = n.all >> sr; + * 1 <= sr <= n_utword_bits - 1 + */ + su_int carry = 0; + for (; sr > 0; --sr) { + /* r:q = ((r:q) << 1) | carry */ + r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } */ - su_int carry = 0; - for (; sr > 0; --sr) - { - /* r:q = ((r:q) << 1) | carry */ - r.s.high = (r.s.high << 1) | (r.s.low >> (n_udword_bits - 1)); - r.s.low = (r.s.low << 1) | (q.s.high >> (n_udword_bits - 1)); - q.s.high = (q.s.high << 1) | (q.s.low >> (n_udword_bits - 1)); - q.s.low = (q.s.low << 1) | carry; - /* carry = 0; - * if (r.all >= d.all) - * { - * r.all -= d.all; - * carry = 1; - * } - */ - const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); - carry = s & 1; - r.all -= d.all & s; - } - q.all = (q.all << 1) | carry; - if (rem) - *rem = r.all; - return q.all; + const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/udivsi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/udivsi3.c +++ compiler-rt/trunk/lib/builtins/udivsi3.c @@ -18,50 +18,48 @@ /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ /* This function should not call __divsi3! */ -COMPILER_RT_ABI su_int -__udivsi3(su_int n, su_int d) -{ - const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; - su_int q; - su_int r; - unsigned sr; - /* special cases */ - if (d == 0) - return 0; /* ?! */ - if (n == 0) - return 0; - sr = __builtin_clz(d) - __builtin_clz(n); - /* 0 <= sr <= n_uword_bits - 1 or sr large */ - if (sr > n_uword_bits - 1) /* d > r */ - return 0; - if (sr == n_uword_bits - 1) /* d == 1 */ - return n; - ++sr; - /* 1 <= sr <= n_uword_bits - 1 */ - /* Not a special case */ - q = n << (n_uword_bits - sr); - r = n >> sr; - su_int carry = 0; - for (; sr > 0; --sr) - { - /* r:q = ((r:q) << 1) | carry */ - r = (r << 1) | (q >> (n_uword_bits - 1)); - q = (q << 1) | carry; - /* carry = 0; - * if (r.all >= d.all) - * { - * r.all -= d.all; - * carry = 1; - * } - */ - const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1); - carry = s & 1; - r -= d & s; - } +COMPILER_RT_ABI su_int __udivsi3(su_int n, su_int d) { + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + su_int q; + su_int r; + unsigned sr; + /* special cases */ + if (d == 0) + return 0; /* ?! */ + if (n == 0) + return 0; + sr = __builtin_clz(d) - __builtin_clz(n); + /* 0 <= sr <= n_uword_bits - 1 or sr large */ + if (sr > n_uword_bits - 1) /* d > r */ + return 0; + if (sr == n_uword_bits - 1) /* d == 1 */ + return n; + ++sr; + /* 1 <= sr <= n_uword_bits - 1 */ + /* Not a special case */ + q = n << (n_uword_bits - sr); + r = n >> sr; + su_int carry = 0; + for (; sr > 0; --sr) { + /* r:q = ((r:q) << 1) | carry */ + r = (r << 1) | (q >> (n_uword_bits - 1)); q = (q << 1) | carry; - return q; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1); + carry = s & 1; + r -= d & s; + } + q = (q << 1) | carry; + return q; } #if defined(__ARM_EABI__) -AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) COMPILER_RT_ALIAS(__udivsi3); +AEABI_RTABI su_int __aeabi_uidiv(su_int n, su_int d) + COMPILER_RT_ALIAS(__udivsi3); #endif Index: compiler-rt/trunk/lib/builtins/udivti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/udivti3.c +++ compiler-rt/trunk/lib/builtins/udivti3.c @@ -17,10 +17,8 @@ /* Returns: a / b */ -COMPILER_RT_ABI tu_int -__udivti3(tu_int a, tu_int b) -{ - return __udivmodti4(a, b, 0); +COMPILER_RT_ABI tu_int __udivti3(tu_int a, tu_int b) { + return __udivmodti4(a, b, 0); } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/umoddi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/umoddi3.c +++ compiler-rt/trunk/lib/builtins/umoddi3.c @@ -15,10 +15,8 @@ /* Returns: a % b */ -COMPILER_RT_ABI du_int -__umoddi3(du_int a, du_int b) -{ - du_int r; - __udivmoddi4(a, b, &r); - return r; +COMPILER_RT_ABI du_int __umoddi3(du_int a, du_int b) { + du_int r; + __udivmoddi4(a, b, &r); + return r; } Index: compiler-rt/trunk/lib/builtins/umodsi3.c =================================================================== --- compiler-rt/trunk/lib/builtins/umodsi3.c +++ compiler-rt/trunk/lib/builtins/umodsi3.c @@ -15,8 +15,6 @@ /* Returns: a % b */ -COMPILER_RT_ABI su_int -__umodsi3(su_int a, su_int b) -{ - return a - __udivsi3(a, b) * b; +COMPILER_RT_ABI su_int __umodsi3(su_int a, su_int b) { + return a - __udivsi3(a, b) * b; } Index: compiler-rt/trunk/lib/builtins/umodti3.c =================================================================== --- compiler-rt/trunk/lib/builtins/umodti3.c +++ compiler-rt/trunk/lib/builtins/umodti3.c @@ -17,12 +17,10 @@ /* Returns: a % b */ -COMPILER_RT_ABI tu_int -__umodti3(tu_int a, tu_int b) -{ - tu_int r; - __udivmodti4(a, b, &r); - return r; +COMPILER_RT_ABI tu_int __umodti3(tu_int a, tu_int b) { + tu_int r; + __udivmodti4(a, b, &r); + return r; } #endif /* CRT_HAS_128BIT */ Index: compiler-rt/trunk/lib/builtins/unwind-ehabi-helpers.h =================================================================== --- compiler-rt/trunk/lib/builtins/unwind-ehabi-helpers.h +++ compiler-rt/trunk/lib/builtins/unwind-ehabi-helpers.h @@ -35,8 +35,8 @@ * those states. */ -#define _URC_OK 0 -#define _URC_FAILURE 9 +#define _URC_OK 0 +#define _URC_FAILURE 9 typedef uint32_t _Unwind_State; @@ -51,4 +51,3 @@ #endif #endif - Index: compiler-rt/trunk/lib/builtins/x86_64/floatdidf.c =================================================================== --- compiler-rt/trunk/lib/builtins/x86_64/floatdidf.c +++ compiler-rt/trunk/lib/builtins/x86_64/floatdidf.c @@ -9,9 +9,6 @@ #include "../int_lib.h" -double __floatdidf(int64_t a) -{ - return (double)a; -} +double __floatdidf(int64_t a) { return (double)a; } #endif /* __x86_64__ */ Index: compiler-rt/trunk/lib/builtins/x86_64/floatdisf.c =================================================================== --- compiler-rt/trunk/lib/builtins/x86_64/floatdisf.c +++ compiler-rt/trunk/lib/builtins/x86_64/floatdisf.c @@ -7,9 +7,6 @@ #include "../int_lib.h" -float __floatdisf(int64_t a) -{ - return (float)a; -} +float __floatdisf(int64_t a) { return (float)a; } #endif /* __x86_64__ */ Index: compiler-rt/trunk/lib/builtins/x86_64/floatdixf.c =================================================================== --- compiler-rt/trunk/lib/builtins/x86_64/floatdixf.c +++ compiler-rt/trunk/lib/builtins/x86_64/floatdixf.c @@ -9,9 +9,6 @@ #include "../int_lib.h" -long double __floatdixf(int64_t a) -{ - return (long double)a; -} +long double __floatdixf(int64_t a) { return (long double)a; } #endif /* __i386__ */