Index: include/llvm/CodeGen/CommandFlags.h =================================================================== --- include/llvm/CodeGen/CommandFlags.h +++ include/llvm/CodeGen/CommandFlags.h @@ -54,6 +54,16 @@ "Relocatable external references, non-relocatable code"), clEnumValEnd)); +cl::opt +TMModel("thread-model", + cl::desc("Choose threading model"), + cl::init(ThreadModel::POSIX), + cl::values(clEnumValN(ThreadModel::POSIX, "POSIX", + "POSIX thread model"), + clEnumValN(ThreadModel::Single, "Single", + "Single thread model"), + clEnumValEnd)); + cl::opt CMModel("code-model", cl::desc("Choose code model"), @@ -245,6 +255,8 @@ Options.MCOptions = InitMCTargetOptionsFromFlags(); Options.JTType = JTableType; + Options.ThreadModel = TMModel; + return Options; } Index: include/llvm/Target/TargetOptions.h =================================================================== --- include/llvm/Target/TargetOptions.h +++ include/llvm/Target/TargetOptions.h @@ -50,6 +50,13 @@ }; } + namespace ThreadModel { + enum Model { + POSIX, // POSIX Threads + Single // Single Threaded Environment + }; + } + class TargetOptions { public: TargetOptions() @@ -220,6 +227,10 @@ /// create for functions that have the jumptable attribute. JumpTable::JumpTableType JTType; + /// ThreadModel - This flag specifies the type of threading model to assume + /// for things like atomics + ThreadModel::Model ThreadModel; + /// Machine level options. MCTargetOptions MCOptions; }; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -749,11 +749,18 @@ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use - // the default expansion. - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { - // ATOMIC_FENCE needs custom lowering; the others should have been expanded - // to ldrex/strex loops already. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + // the default expansion. If we are targeting a single threded system, + // then set them all for expand so we can lower them later into their + // non-atomic form. + if ((Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) || + TM.Options.ThreadModel == ThreadModel::Single) { + if (TM.Options.ThreadModel == ThreadModel::Single) { + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + } else { + // ATOMIC_FENCE needs custom lowering; the others should have been expanded + // to ldrex/strex loops already. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + } // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. Index: lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- lib/Target/ARM/ARMTargetMachine.cpp +++ lib/Target/ARM/ARMTargetMachine.cpp @@ -160,6 +160,9 @@ void ARMPassConfig::addIRPasses() { addPass(createAtomicExpandLoadLinkedPass(TM)); + if (TM->Options.ThreadModel == ThreadModel::Single) + addPass(createLowerAtomicPass()); + // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. Index: test/CodeGen/ARM/atomic-op.ll =================================================================== --- test/CodeGen/ARM/atomic-op.ll +++ test/CodeGen/ARM/atomic-op.ll @@ -2,6 +2,7 @@ ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1 ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-M0 +; RUN: llc < %s -mtriple=armv4t--none-eabi -thread-model Single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL define void @func(i32 %argc, i8** %argv) nounwind { entry: @@ -28,6 +29,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_add_4 ; CHECK-M0: bl ___sync_fetch_and_add_4 + ; CHECK-BAREMETAL: add + ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw add i32* %val1, i32 %tmp monotonic store i32 %0, i32* %old ; CHECK: ldrex @@ -35,6 +38,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_sub_4 ; CHECK-M0: bl ___sync_fetch_and_sub_4 + ; CHECK-BAREMETAL: sub + ; CHECK-BAREMETAL-NOT: __sync %1 = atomicrmw sub i32* %val2, i32 30 monotonic store i32 %1, i32* %old ; CHECK: ldrex @@ -42,6 +47,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_add_4 ; CHECK-M0: bl ___sync_fetch_and_add_4 + ; CHECK-BAREMETAL: add + ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw add i32* %val2, i32 1 monotonic store i32 %2, i32* %old ; CHECK: ldrex @@ -49,6 +56,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_sub_4 ; CHECK-M0: bl ___sync_fetch_and_sub_4 + ; CHECK-BAREMETAL: sub + ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw sub i32* %val2, i32 1 monotonic store i32 %3, i32* %old ; CHECK: ldrex @@ -56,6 +65,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_and_4 ; CHECK-M0: bl ___sync_fetch_and_and_4 + ; CHECK-BAREMETAL: and + ; CHECK-BAREMETAL-NOT: __sync %4 = atomicrmw and i32* %andt, i32 4080 monotonic store i32 %4, i32* %old ; CHECK: ldrex @@ -63,6 +74,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_or_4 ; CHECK-M0: bl ___sync_fetch_and_or_4 + ; CHECK-BAREMETAL: or + ; CHECK-BAREMETAL-NOT: __sync %5 = atomicrmw or i32* %ort, i32 4080 monotonic store i32 %5, i32* %old ; CHECK: ldrex @@ -70,6 +83,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_xor_4 ; CHECK-M0: bl ___sync_fetch_and_xor_4 + ; CHECK-BAREMETAL: eor + ; CHECK-BAREMETAL-NOT: __sync %6 = atomicrmw xor i32* %xort, i32 4080 monotonic store i32 %6, i32* %old ; CHECK: ldrex @@ -77,6 +92,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_min_4 ; CHECK-M0: bl ___sync_fetch_and_min_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %7 = atomicrmw min i32* %val2, i32 16 monotonic store i32 %7, i32* %old %neg = sub i32 0, 1 @@ -85,6 +102,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_min_4 ; CHECK-M0: bl ___sync_fetch_and_min_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %8 = atomicrmw min i32* %val2, i32 %neg monotonic store i32 %8, i32* %old ; CHECK: ldrex @@ -92,6 +111,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_max_4 ; CHECK-M0: bl ___sync_fetch_and_max_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %9 = atomicrmw max i32* %val2, i32 1 monotonic store i32 %9, i32* %old ; CHECK: ldrex @@ -99,6 +120,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_max_4 ; CHECK-M0: bl ___sync_fetch_and_max_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %10 = atomicrmw max i32* %val2, i32 0 monotonic store i32 %10, i32* %old ; CHECK: ldrex @@ -106,6 +129,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_4 ; CHECK-M0: bl ___sync_fetch_and_umin_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %11 = atomicrmw umin i32* %val2, i32 16 monotonic store i32 %11, i32* %old %uneg = sub i32 0, 1 @@ -114,6 +139,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_4 ; CHECK-M0: bl ___sync_fetch_and_umin_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic store i32 %12, i32* %old ; CHECK: ldrex @@ -121,6 +148,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_4 ; CHECK-M0: bl ___sync_fetch_and_umax_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %13 = atomicrmw umax i32* %val2, i32 1 monotonic store i32 %13, i32* %old ; CHECK: ldrex @@ -128,6 +157,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_4 ; CHECK-M0: bl ___sync_fetch_and_umax_4 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %14 = atomicrmw umax i32* %val2, i32 0 monotonic store i32 %14, i32* %old @@ -144,6 +175,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_2 ; CHECK-M0: bl ___sync_fetch_and_umin_2 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i16* %val, i16 16 monotonic store i16 %0, i16* %old %uneg = sub i16 0, 1 @@ -152,6 +185,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_2 ; CHECK-M0: bl ___sync_fetch_and_umin_2 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %1 = atomicrmw umin i16* %val, i16 %uneg monotonic store i16 %1, i16* %old ; CHECK: ldrex @@ -159,6 +194,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_2 ; CHECK-M0: bl ___sync_fetch_and_umax_2 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw umax i16* %val, i16 1 monotonic store i16 %2, i16* %old ; CHECK: ldrex @@ -166,6 +203,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_2 ; CHECK-M0: bl ___sync_fetch_and_umax_2 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw umax i16* %val, i16 0 monotonic store i16 %3, i16* %old ret void @@ -181,6 +220,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_1 ; CHECK-M0: bl ___sync_fetch_and_umin_1 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i8* %val, i8 16 monotonic store i8 %0, i8* %old ; CHECK: ldrex @@ -188,6 +229,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umin_1 ; CHECK-M0: bl ___sync_fetch_and_umin_1 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %uneg = sub i8 0, 1 %1 = atomicrmw umin i8* %val, i8 %uneg monotonic store i8 %1, i8* %old @@ -196,6 +239,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_1 ; CHECK-M0: bl ___sync_fetch_and_umax_1 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %2 = atomicrmw umax i8* %val, i8 1 monotonic store i8 %2, i8* %old ; CHECK: ldrex @@ -203,6 +248,8 @@ ; CHECK: strex ; CHECK-T1: blx ___sync_fetch_and_umax_1 ; CHECK-M0: bl ___sync_fetch_and_umax_1 + ; CHECK-BAREMETAL: cmp + ; CHECK-BAREMETAL-NOT: __sync %3 = atomicrmw umax i8* %val, i8 0 monotonic store i8 %3, i8* %old ret void