Index: lib/builtins/arm/sync-ops.h =================================================================== --- lib/builtins/arm/sync-ops.h +++ lib/builtins/arm/sync-ops.h @@ -10,11 +10,28 @@ * This file implements outline macros for the __sync_fetch_and_* * operations. Different instantiations will generate appropriate assembly for * ARM and Thumb-2 versions of the functions. + * These builtins are documented here: + * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html * *===----------------------------------------------------------------------===*/ #include "../assembly.h" +#if defined(__ARM_ARCH_6M__) +#define SYNC_OP_4(op) \ + .thumb ; \ + .syntax unified ; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ + mrs r3, PRIMASK ; \ + cpsid i ; \ + ldr r2, [r0] ; \ + mov r12, r2; \ + op(r2, r2, r1) ; \ + str r2, [r0] ; \ + msr PRIMASK, r3 ; \ + mov r0, r12 ; \ + bx lr +#else #define SYNC_OP_4(op) \ .p2align 2 ; \ .thumb ; \ @@ -30,7 +47,29 @@ bne LOCAL_LABEL(tryatomic_ ## op) ; \ dmb ; \ bx lr +#endif +#if __ARM_ARCH_PROFILE == 'M' +#define SYNC_OP_8(op) \ + .thumb ; \ + .syntax unified ; \ + DEFINE_COMPILERRT_THUMB_FUNCTION(__sync_fetch_and_ ## op) \ + push {r4, r5, r6, lr} ; \ + mov r4, r0 ; \ + mrs r5, PRIMASK ; \ + cpsid i ; \ + ldr r0, [r4, #0] ; \ + ldr r1, [r4, #4] ; \ + mov r6, r0 ; \ + mov r12, r1 ; \ + op(r0, r1, r0, r1, r2, r3) ; \ + str r0, [r4, #0] ; \ + str r1, [r4, #4] ; \ + msr PRIMASK, r5 ; \ + mov r0, r6 ; \ + mov r1, r12 ; \ + pop {r4, r5, r6, pc} +#else #define SYNC_OP_8(op) \ .p2align 2 ; \ .thumb ; \ @@ -47,6 +86,7 @@ bne LOCAL_LABEL(tryatomic_ ## op) ; \ dmb ; \ pop {r4, r5, r6, pc} +#endif #define MINMAX_4(rD, rN, rM, cmp_kind) \ cmp rN, rM ; \ Index: lib/builtins/arm/sync_fetch_and_add_4.S =================================================================== --- lib/builtins/arm/sync_fetch_and_add_4.S +++ lib/builtins/arm/sync_fetch_and_add_4.S @@ -15,7 +15,7 @@ #include "sync-ops.h" /* "adds" is 2 bytes shorter than "add". */ -#define add_4(rD, rN, rM) add rD, rN, rM +#define add_4(rD, rN, rM) adds rD, rN, rM SYNC_OP_4(add_4) Index: lib/builtins/arm/sync_fetch_and_add_8.S =================================================================== --- lib/builtins/arm/sync_fetch_and_add_8.S +++ lib/builtins/arm/sync_fetch_and_add_8.S @@ -14,13 +14,11 @@ #include "sync-ops.h" -#if __ARM_ARCH_PROFILE != 'M' #define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ adds rD_LO, rN_LO, rM_LO ; \ - adc rD_HI, rN_HI, rM_HI + adcs rD_HI, rN_HI, rM_HI SYNC_OP_8(add_8) -#endif NO_EXEC_STACK_DIRECTIVE Index: lib/builtins/arm/sync_fetch_and_and_4.S =================================================================== --- lib/builtins/arm/sync_fetch_and_and_4.S +++ lib/builtins/arm/sync_fetch_and_and_4.S @@ -14,7 +14,7 @@ #include "sync-ops.h" -#define and_4(rD, rN, rM) and rD, rN, rM +#define and_4(rD, rN, rM) ands rD, rN, rM SYNC_OP_4(and_4) Index: lib/builtins/arm/sync_fetch_and_and_8.S =================================================================== --- lib/builtins/arm/sync_fetch_and_and_8.S +++ lib/builtins/arm/sync_fetch_and_and_8.S @@ -14,13 +14,11 @@ #include "sync-ops.h" -#if __ARM_ARCH_PROFILE != 'M' #define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - and rD_LO, rN_LO, rM_LO ; \ - and rD_HI, rN_HI, rM_HI + ands rD_LO, rN_LO, rM_LO ; \ + ands rD_HI, rN_HI, rM_HI SYNC_OP_8(and_8) -#endif NO_EXEC_STACK_DIRECTIVE Index: lib/builtins/arm/sync_fetch_and_nand_4.S =================================================================== --- lib/builtins/arm/sync_fetch_and_nand_4.S +++ lib/builtins/arm/sync_fetch_and_nand_4.S @@ -14,7 +14,7 @@ #include "sync-ops.h" -#define nand_4(rD, rN, rM) bic rD, rN, rM +#define nand_4(rD, rN, rM) bics rD, rN, rM SYNC_OP_4(nand_4) Index: lib/builtins/arm/sync_fetch_and_nand_8.S =================================================================== --- lib/builtins/arm/sync_fetch_and_nand_8.S +++ lib/builtins/arm/sync_fetch_and_nand_8.S @@ -14,13 +14,11 @@ #include "sync-ops.h" -#if __ARM_ARCH_PROFILE != 'M' #define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - bic rD_LO, rN_LO, rM_LO ; \ - bic rD_HI, rN_HI, rM_HI + bics rD_LO, rN_LO, rM_LO ; \ + bics rD_HI, rN_HI, rM_HI SYNC_OP_8(nand_8) -#endif NO_EXEC_STACK_DIRECTIVE Index: lib/builtins/arm/sync_fetch_and_or_4.S =================================================================== --- lib/builtins/arm/sync_fetch_and_or_4.S +++ lib/builtins/arm/sync_fetch_and_or_4.S @@ -14,7 +14,7 @@ #include "sync-ops.h" -#define or_4(rD, rN, rM) orr rD, rN, rM +#define or_4(rD, rN, rM) orrs rD, rN, rM SYNC_OP_4(or_4) Index: lib/builtins/arm/sync_fetch_and_or_8.S =================================================================== --- lib/builtins/arm/sync_fetch_and_or_8.S +++ lib/builtins/arm/sync_fetch_and_or_8.S @@ -14,13 +14,11 @@ #include "sync-ops.h" -#if __ARM_ARCH_PROFILE != 'M' #define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - orr rD_LO, rN_LO, rM_LO ; \ - orr rD_HI, rN_HI, rM_HI + orrs rD_LO, rN_LO, rM_LO ; \ + orrs rD_HI, rN_HI, rM_HI SYNC_OP_8(or_8) -#endif NO_EXEC_STACK_DIRECTIVE Index: lib/builtins/arm/sync_fetch_and_sub_4.S =================================================================== --- lib/builtins/arm/sync_fetch_and_sub_4.S +++ lib/builtins/arm/sync_fetch_and_sub_4.S @@ -15,7 +15,7 @@ #include "sync-ops.h" /* "subs" is 2 bytes shorter than "sub". */ -#define sub_4(rD, rN, rM) sub rD, rN, rM +#define sub_4(rD, rN, rM) subs rD, rN, rM SYNC_OP_4(sub_4) Index: lib/builtins/arm/sync_fetch_and_sub_8.S =================================================================== --- lib/builtins/arm/sync_fetch_and_sub_8.S +++ lib/builtins/arm/sync_fetch_and_sub_8.S @@ -14,13 +14,11 @@ #include "sync-ops.h" -#if __ARM_ARCH_PROFILE != 'M' #define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ subs rD_LO, rN_LO, rM_LO ; \ - sbc rD_HI, rN_HI, rM_HI + sbcs rD_HI, rN_HI, rM_HI SYNC_OP_8(sub_8) -#endif NO_EXEC_STACK_DIRECTIVE Index: lib/builtins/arm/sync_fetch_and_xor_4.S =================================================================== --- lib/builtins/arm/sync_fetch_and_xor_4.S +++ lib/builtins/arm/sync_fetch_and_xor_4.S @@ -14,7 +14,7 @@ #include "sync-ops.h" -#define xor_4(rD, rN, rM) eor rD, rN, rM +#define xor_4(rD, rN, rM) eors rD, rN, rM SYNC_OP_4(xor_4) Index: lib/builtins/arm/sync_fetch_and_xor_8.S =================================================================== --- lib/builtins/arm/sync_fetch_and_xor_8.S +++ lib/builtins/arm/sync_fetch_and_xor_8.S @@ -14,13 +14,11 @@ #include "sync-ops.h" -#if __ARM_ARCH_PROFILE != 'M' #define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ - eor rD_LO, rN_LO, rM_LO ; \ - eor rD_HI, rN_HI, rM_HI + eors rD_LO, rN_LO, rM_LO ; \ + eors rD_HI, rN_HI, rM_HI SYNC_OP_8(xor_8) -#endif NO_EXEC_STACK_DIRECTIVE