Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2127,6 +2127,10 @@ HelpText<"Disallow use of CRC instructions (ARM only)">; def mno_neg_immediates: Flag<["-"], "mno-neg-immediates">, Group, HelpText<"Disallow converting instructions with negative immediates to their negation or inversion.">; +def mgm: Flag<["-"], "mgm">, Group, + HelpText<"Target has a Global Monitor, supports ldrex/strex atomic instructions.">; +def mno_gm: Flag<["-"], "mno-gm">, Group, + HelpText<"Target has no Global Monitor, does not support /strex atomic instructions.">; def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, HelpText<"Generate code which only uses the general purpose registers (AArch64 only)">; Index: clang/lib/Driver/ToolChains/Arch/ARM.cpp =================================================================== --- clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -557,6 +557,11 @@ if (Args.hasArg(options::OPT_mno_neg_immediates)) Features.push_back("+no-neg-immediates"); + + if (Arg *A = Args.getLastArg(options::OPT_mno_gm, options::OPT_mgm)) { + if (A->getOption().matches(options::OPT_mno_gm)) + Features.push_back("+no-gm"); + } } const std::string arm::getARMArch(StringRef Arch, const llvm::Triple &Triple) { Index: llvm/include/llvm/CodeGen/TargetSubtargetInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -208,6 +208,9 @@ /// True if the subtarget should run the atomic expansion pass. virtual bool enableAtomicExpand() const; + /// True if the subtarget has a global monitor + virtual bool hasGlobalMonitor() const; + /// True if the subtarget should run the indirectbr expansion pass. virtual bool enableIndirectBrExpand() const; Index: llvm/lib/CodeGen/AtomicExpandPass.cpp =================================================================== --- llvm/lib/CodeGen/AtomicExpandPass.cpp +++ llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -236,7 +236,7 @@ continue; } } else if (RMWI) { - if (!atomicSizeSupported(TLI, RMWI)) { + if (!atomicSizeSupported(TLI, RMWI) || !TM.getSubtargetImpl(F)->hasGlobalMonitor()) { expandAtomicRMWToLibcall(RMWI); MadeChange = true; continue; Index: llvm/lib/CodeGen/TargetSubtargetInfo.cpp =================================================================== --- llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -29,6 +29,10 @@ return true; } +bool TargetSubtargetInfo::hasGlobalMonitor() const { + return true; +} + bool TargetSubtargetInfo::enableIndirectBrExpand() const { return false; } Index: llvm/lib/Target/ARM/ARM.td =================================================================== --- llvm/lib/Target/ARM/ARM.td +++ llvm/lib/Target/ARM/ARM.td @@ -95,6 +95,10 @@ "Has v8 acquire/release (lda/ldaex " " etc) instructions">; +def FeatureHasNoGlobalMonitor : SubtargetFeature<"no-gm", + "HasNoGlobalMonitor", "true", + "Has no Global Monitor, no support" + "for ldrex/strex instructions">; def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -305,6 +305,8 @@ def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">, AssemblerPredicate<"FeatureV7Clrex", "v7 clrex">; +def HasNoGlobalMonitor : Predicate<"Subtarget->hasNoGlobalMonitor()">; +def HasGlobalMonitor : Predicate<"!Subtarget->hasNoGlobalMonitor()">; def HasAcquireRelease : Predicate<"Subtarget->hasAcquireRelease()">, AssemblerPredicate<"FeatureAcquireRelease", "acquire/release">; Index: llvm/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -3281,17 +3281,17 @@ AddrModeNone, 4, NoItinerary, "ldrexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline, HasGlobalMonitor]>; def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline, HasGlobalMonitor]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeT2_ldrex, 4, NoItinerary, "ldrex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]> { + Requires<[IsThumb, HasV8MBaseline, HasGlobalMonitor]> { bits<4> Rt; bits<12> addr; let Inst{31-27} = 0b11101; @@ -3307,7 +3307,7 @@ AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, - Requires<[IsThumb2, IsNotMClass]> { + Requires<[IsThumb2, IsNotMClass, HasGlobalMonitor]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3315,17 +3315,17 @@ AddrModeNone, 4, NoItinerary, "ldaexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex, HasGlobalMonitor]>; def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex, HasGlobalMonitor]>; def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> { + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex, HasGlobalMonitor]> { bits<4> Rt; bits<4> addr; let Inst{31-27} = 0b11101; @@ -3356,14 +3356,14 @@ "strexb", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline, HasGlobalMonitor]>; def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline, HasGlobalMonitor]>; def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), @@ -3371,7 +3371,7 @@ "strex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]> { + Requires<[IsThumb, HasV8MBaseline, HasGlobalMonitor]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3388,7 +3388,7 @@ AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, - Requires<[IsThumb2, IsNotMClass]> { + Requires<[IsThumb2, IsNotMClass, HasGlobalMonitor]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3399,7 +3399,7 @@ [(set rGPR:$Rd, (stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex]>; + HasV7Clrex, HasGlobalMonitor]>; def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3408,7 +3408,7 @@ [(set rGPR:$Rd, (stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex]>; + HasV7Clrex, HasGlobalMonitor]>; def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3433,7 +3433,7 @@ AddrModeNone, 4, NoItinerary, "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex, IsNotMClass]> { + HasV7Clrex, IsNotMClass, HasGlobalMonitor]> { bits<4> Rt2; let Inst{11-8} = Rt2; } Index: llvm/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/lib/Target/ARM/ARMSubtarget.h +++ llvm/lib/Target/ARM/ARMSubtarget.h @@ -257,6 +257,10 @@ /// instructions bool HasAcquireRelease = false; + /// HasNoGlobalMonitor - True if the subtarget does not have a Global Monitor + /// i.e. cannot support ldrex/strex instructions + bool HasNoGlobalMonitor = false; + /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions /// over 16-bit ones. bool Pref32BitThumb = false; @@ -592,6 +596,7 @@ bool hasFullDataBarrier() const { return HasFullDataBarrier; } bool hasV7Clrex() const { return HasV7Clrex; } bool hasAcquireRelease() const { return HasAcquireRelease; } + bool hasNoGlobalMonitor() const { return HasNoGlobalMonitor; } bool hasAnyDataBarrier() const { return HasDataBarrier || (hasV6Ops() && !isThumb()); @@ -771,6 +776,10 @@ // enableAtomicExpand- True if we need to expand our atomics. bool enableAtomicExpand() const override; + // hasGlobalMonitor - True if the processor has a Global Monitor + /// i.e. can support ldrex/strex instructions + bool hasGlobalMonitor() const override { return !hasNoGlobalMonitor(); } + /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData *getInstrItineraryData() const override { Index: llvm/test/CodeGen/ARM/atomic-nogm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/atomic-nogm.ll @@ -0,0 +1,87 @@ +; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 -mattr=+no-gm %s -o - | FileCheck %s --check-prefix=CHECK-T1 +; RUN: llc -verify-machineinstrs -mtriple=thumbv7m-none-eabi -mcpu=cortex-m4 -mattr=+no-gm -mattr=-no-gm %s -o - | FileCheck %s + +; CHECK-NOT: __atomic_fetch_add_8 +; CHECK: ldrex +; CHECK: strex +; CHECK-NOT: __atomic_fetch_add_4 +; CHECK: ldrexh +; CHECK: strexh +; CHECK-NOT: __atomic_fetch_add_2 +; CHECK: ldrexb +; CHECK: strexb +; CHECK-NOT: __atomic_fetch_add_1 +; CHECK: ldrexb +; CHECK: strexb +; CHECK-NOT: __atomic_exchange_1 + +; CHECK-T1: __atomic_fetch_add_8 +; CHECK-T1-NOT: ldrex +; CHECK-T1-NOT: strex +; CHECK-T1: __atomic_fetch_add_4 +; CHECK-T1-NOT: ldrexh +; CHECK-T1-NOT: strexh +; CHECK-T1: __atomic_fetch_add_2 +; CHECK-T1-NOT: ldrexb +; CHECK-T1-NOT: strexb +; CHECK-T1: __atomic_fetch_add_1 +; CHECK-T1-NOT: ldrexb +; CHECK-T1-NOT: strexb +; CHECK-T1: __atomic_exchange_1 + +define dso_local void @atomic_function_example_64() #0 { +entry: + %v = alloca i64, align 4 + store i64 0, i64* %v, align 4 + %0 = atomicrmw add i64* %v, i64 1 seq_cst + %1 = add i64 %0, 1 + ret void +} + +define dso_local void @atomic_function_example_32() #0 { +entry: + %v = alloca i32, align 4 + store i32 0, i32* %v, align 4 + %0 = atomicrmw add i32* %v, i32 1 seq_cst + %1 = add i32 %0, 1 + ret void +} + +define dso_local void @atomic_function_example_16() #0 { +entry: + %v = alloca i16, align 4 + store i16 0, i16* %v, align 4 + %0 = atomicrmw add i16* %v, i16 1 seq_cst + %1 = add i16 %0, 1 + ret void +} + +define dso_local void @atomic_function_example_8() #0 { +entry: + %v = alloca i8, align 4 + store i8 0, i8* %v, align 4 + %0 = atomicrmw add i8* %v, i8 1 seq_cst + %1 = add i8 %0, 1 + ret void +} + +%struct.atomic_flag = type { i8 } + +define dso_local zeroext i1 @atomic_flag_test_and_set(%struct.atomic_flag* %object) #0 { +entry: + %object.addr = alloca %struct.atomic_flag*, align 4 + %.atomictmp = alloca i8, align 1 + %atomic-temp = alloca i8, align 1 + store %struct.atomic_flag* %object, %struct.atomic_flag** %object.addr, align 4 + %0 = load %struct.atomic_flag*, %struct.atomic_flag** %object.addr, align 4 + %_Value = getelementptr inbounds %struct.atomic_flag, %struct.atomic_flag* %0, i32 0, i32 0 + store i8 1, i8* %.atomictmp, align 1 + %1 = load i8, i8* %.atomictmp, align 1 + %2 = atomicrmw volatile xchg i8* %_Value, i8 %1 seq_cst + store i8 %2, i8* %atomic-temp, align 1 + %3 = load i8, i8* %atomic-temp, align 1 + %tobool = trunc i8 %3 to i1 + ret i1 %tobool +} +