Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -1057,6 +1057,10 @@ } else { // If there's anything we can use as a barrier, go through custom lowering // for ATOMIC_FENCE. + // If target has DMB in thumb, Fences can be inserted. + if (Subtarget->hasDataBarrier()) + InsertFencesForAtomic = true; + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Subtarget->hasAnyDataBarrier() ? Custom : Expand); @@ -1075,8 +1079,10 @@ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the // Unordered/Monotonic case. - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + if (!InsertFencesForAtomic) { + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + } } setOperationAction(ISD::PREFETCH, MVT::Other, Custom); @@ -12879,7 +12885,8 @@ TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return (Size <= (Subtarget->isMClass() ? 32U : 64U)) + bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); + return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; } @@ -12891,7 +12898,9 @@ // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. - return getTargetMachine().getOptLevel() != 0; + bool hasAtomicCmpXchg = + !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); + return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg; } bool ARMTargetLowering::shouldInsertFencesForAtomic( Index: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp +++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp @@ -319,9 +319,7 @@ return (!isThumb() || hasThumb2()); } -bool ARMSubtarget::enableAtomicExpand() const { - return hasAnyDataBarrier() && (!isThumb() || hasV8MBaselineOps()); -} +bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { // For general targets, the prologue can grow when VFPs are allocated with Index: llvm/trunk/test/CodeGen/ARM/atomic-op.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/atomic-op.ll +++ llvm/trunk/test/CodeGen/ARM/atomic-op.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix CHECK-ARMV7 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2 ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1 -; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-T1 +; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-T1-M0 ; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" @@ -30,6 +30,7 @@ ; CHECK: add ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_add_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_add_4 ; CHECK-BAREMETAL: add ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw add i32* %val1, i32 %tmp monotonic @@ -38,6 +39,7 @@ ; CHECK: sub ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_sub_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_sub_4 ; CHECK-BAREMETAL: sub ; CHECK-BAREMETAL-NOT: __sync %1 = atomicrmw sub i32* %val2, i32 30 monotonic @@ -46,6 +48,7 @@ ; CHECK: add ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_add_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_add_4 ; CHECK-BAREMETAL: add ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw add i32* %val2, i32 1 monotonic @@ -54,6 +57,7 @@ ; CHECK: sub ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_sub_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_sub_4 ; CHECK-BAREMETAL: sub ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw sub i32* %val2, i32 1 monotonic @@ -62,6 +66,7 @@ ; CHECK: and ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_and_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_and_4 ; CHECK-BAREMETAL: and ; CHECK-BAREMETAL-NOT: __sync %4 = atomicrmw and i32* %andt, i32 4080 monotonic @@ -70,6 +75,7 @@ ; CHECK: or ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_or_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_or_4 ; CHECK-BAREMETAL: or ; CHECK-BAREMETAL-NOT: __sync %5 = atomicrmw or i32* %ort, i32 4080 monotonic @@ -78,6 +84,7 @@ ; CHECK: eor ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_xor_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_xor_4 ; CHECK-BAREMETAL: eor ; CHECK-BAREMETAL-NOT: __sync %6 = atomicrmw xor i32* %xort, i32 4080 monotonic @@ -86,6 +93,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_min_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_min_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %7 = atomicrmw min i32* %val2, i32 16 monotonic @@ -95,6 +103,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_min_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_min_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %8 = atomicrmw min i32* %val2, i32 %neg monotonic @@ -103,6 +112,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_max_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_max_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %9 = atomicrmw max i32* %val2, i32 1 monotonic @@ -111,6 +121,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_max_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_max_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %10 = atomicrmw max i32* %val2, i32 0 monotonic @@ -119,6 +130,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_umin_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %11 = atomicrmw umin i32* %val2, i32 16 monotonic @@ -128,6 +140,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_umin_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic @@ -136,6 +149,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_umax_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %13 = atomicrmw umax i32* %val2, i32 1 monotonic @@ -144,6 +158,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_4 + ; CHECK-T1-M0: bl ___sync_fetch_and_umax_4 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %14 = atomicrmw umax i32* %val2, i32 0 monotonic @@ -161,6 +176,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_2 + ; CHECK-T1-M0: bl ___sync_fetch_and_umin_2 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i16* %val, i16 16 monotonic @@ -170,6 +186,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_2 + ; CHECK-T1-M0: bl ___sync_fetch_and_umin_2 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %1 = atomicrmw umin i16* %val, i16 %uneg monotonic @@ -178,6 +195,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_2 + ; CHECK-T1-M0: bl ___sync_fetch_and_umax_2 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw umax i16* %val, i16 1 monotonic @@ -186,6 +204,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_2 + ; CHECK-T1-M0: bl ___sync_fetch_and_umax_2 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw umax i16* %val, i16 0 monotonic @@ -202,6 +221,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_1 + ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i8* %val, i8 16 monotonic @@ -210,6 +230,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umin_1 + ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %uneg = sub i8 0, 1 @@ -219,6 +240,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_1 + ; CHECK-T1-M0: bl ___sync_fetch_and_umax_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw umax i8* %val, i8 1 monotonic @@ -227,6 +249,7 @@ ; CHECK: cmp ; CHECK: strex ; CHECK-T1: bl ___sync_fetch_and_umax_1 + ; CHECK-T1-M0: bl ___sync_fetch_and_umax_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw umax i8* %val, i8 0 monotonic @@ -319,6 +342,11 @@ ; CHECK: dmb ; CHECK: add r0, +; CHECK-T1-M0: ldr {{r[0-9]}}, [r0] +; CHECK-T1-M0: dmb +; CHECK-T1-M0: ldr {{r[0-9]}}, [r1] +; CHECK-T1-M0: dmb + ; CHECK-T1: ___sync_val_compare_and_swap_4 ; CHECK-T1: ___sync_val_compare_and_swap_4 @@ -344,6 +372,11 @@ ; CHECK-T1: ___sync_lock_test_and_set ; CHECK-T1: ___sync_lock_test_and_set +; CHECK-T1-M0: dmb +; CHECK-T1-M0: str r1, [r0] +; CHECK-T1-M0: dmb +; CHECK-T1-M0: str r3, [r2] + ; CHECK-BAREMETAL-NOT: dmb ; CHECK-BAREMTEAL: str r1, [r0] ; CHECK-BAREMETAL-NOT: dmb @@ -362,6 +395,10 @@ ; CHECK: dmb ; CHECK: str [[R0]], [r1] +; CHECK-T1-M0: ldr [[R0:r[0-9]]], [r0] +; CHECK-T1-M0: dmb +; CHECK-T1-M0: str [[R0]], [r1] + ; CHECK-T1: ldr [[R0:r[0-9]]], [{{r[0-9]+}}] ; CHECK-T1: {{dmb|bl ___sync_synchronize}} ; CHECK-T1: str [[R0]], [{{r[0-9]+}}]