diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -716,10 +716,6 @@ /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. unsigned ARMPCLabelIndex; - // TODO: remove this, and have shouldInsertFencesForAtomic do the proper - // check. - bool InsertFencesForAtomic; - bool HasStandaloneRem = true; void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -153,6 +153,17 @@ ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; +// Prior to ARMv6, there were no LL/SC instructions available, which in general +// means that lock-free atomics can't be supported. However, on some OSes, +// kernel assistance to implement a cmpxchg operation is provided (e.g. via +// "Restartable Atomic Sequence" on FreeBSD, or the kuser_cmpxchg function on +// Linux). On such OSes, we can assume functioning of lock-free __sync_* atomic +// libcalls, regardless of the target CPU. +static bool hasLockFreeCmpXChgHelpers(const ARMSubtarget &Subtarget) { + return (Subtarget.isTargetDarwin() || Subtarget.isTargetLinux() || + Subtarget.isTargetFreeBSD() || Subtarget.isTargetNetBSD()); +} + void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { @@ -1279,55 +1290,47 @@ else setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use - // the default expansion. - InsertFencesForAtomic = false; - if (Subtarget->hasAnyDataBarrier() && - (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) { - // ATOMIC_FENCE needs custom lowering; the others should have been expanded - // to ldrex/strex loops already. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - if (!Subtarget->isThumb() || !Subtarget->isMClass()) + if (Subtarget->hasLdrex()) { + // If we have ldrex, we can support native atomics. And with ldrexd, 64bit + // atomics. + if (Subtarget->hasLdrexd()) { + setMaxAtomicSizeInBitsSupported(64); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); - - // On v8, we have particularly efficient implementations of atomic fences - // if they can be combined with nearby atomic loads and stores. - if (!Subtarget->hasAcquireRelease() || - getTargetMachine().getOptLevel() == 0) { - // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. - InsertFencesForAtomic = true; - } - } else { - // If there's anything we can use as a barrier, go through custom lowering - // for ATOMIC_FENCE. - // If target has DMB in thumb, Fences can be inserted. - if (Subtarget->hasDataBarrier()) - InsertFencesForAtomic = true; - - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, - Subtarget->hasAnyDataBarrier() ? Custom : Expand); - - // Set them all for expansion, which will force libcalls. - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + } else + setMaxAtomicSizeInBitsSupported(32); + } else if (hasLockFreeCmpXChgHelpers(*Subtarget)) { + // If we're on an OS which provide kernel assistance (exposed via lock-free + // __sync_* libcalls), we can emit 32-bit atomic loads/stores directly, and + // depend on the __sync_ calls for the other operations. We don't support + // 64-bit in this way, because the 64-bit load/store instructions are not + // (always) atomic. + + setMaxAtomicSizeInBitsSupported(32); + + // Set everything but ATOMIC_LOAD/ATOMIC_STORE for expansion to __sync_* + // libcalls. + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); - // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the - // Unordered/Monotonic case. - if (!InsertFencesForAtomic) { - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); - } + } else { + // Otherwise, native atomic support cannot be guaranteed. + setMaxAtomicSizeInBitsSupported(0); } + // If there's anything we can use as a barrier, go through custom lowering + // for ATOMIC_FENCE. Otherwise expand to __sync_synchronize libcall. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, + Subtarget->hasAnyDataBarrier() ? Custom : Expand); + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. @@ -9711,16 +9714,6 @@ return LowerVecReduce(Op, DAG, ST); } -static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { - if (isStrongerThanMonotonic(cast(Op)->getOrdering())) - // Acquire/Release load/store is not legal for targets without a dmb or - // equivalent available. - return SDValue(); - - // Monotonic load/store is legal for all targets. - return Op; -} - static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, @@ -9927,8 +9920,6 @@ case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAX: return LowerVecReduceF(Op, DAG, Subtarget); - case ISD::ATOMIC_LOAD: - case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); @@ -18841,8 +18832,6 @@ // First, if the target has no DMB, see what fallback we can use. if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. - // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get - // here. if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), @@ -18850,9 +18839,10 @@ Builder.getInt32(10), Builder.getInt32(5)}; return Builder.CreateCall(MCR, args); } else { - // Instead of using barriers, atomic accesses on these subtargets use - // libcalls. - llvm_unreachable("makeDMB on a target so old that it has no barriers"); + // Instead of barriers, atomic accesses on Thumb1 and pre-v6 ARM + // mode just use a libcall to __sync_synchronize. So, just emit + // a fence instruction. + return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent); } } else { Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); @@ -18907,64 +18897,93 @@ llvm_unreachable("Unknown fence ordering in emitTrailingFence"); } -// Loads and stores less than 64-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit -// anything for those. +// Loads and stores less than 64-bits are intrinsically atomic. For 64-bit +// operations, we can replace with ldrexd/strexd. We don't need to check for its +// availability, because when it's not available, we only support 32-bit +// lockfree atomics. +// +// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that +// guarantee, see DDI0406C ARM architecture reference manual, sections +// A8.8.72-74 LDRD); on such CPUs it would be advantageous to not expand 64-bit +// loads and stores to LL/SC sequences. bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - return (Size == 64) && !Subtarget->isMClass(); + return Size == 64; } -// Loads and stores less than 64-bits are already atomic; ones above that -// are doomed anyway, so defer to the default libcall and blame the OS when -// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit -// anything for those. -// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that -// guarantee, see DDI0406C ARM architecture reference manual, -// sections A8.8.72-74 LDRD) TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { unsigned Size = LI->getType()->getPrimitiveSizeInBits(); - return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly - : AtomicExpansionKind::None; + if (Size == 64) + return AtomicExpansionKind::LLOnly; + + return AtomicExpansionKind::None; } -// For the real atomic operations, we have ldrex/strex up to 32 bits, -// and up to 64 bits on the non-M profiles +// In the following "should*Atomic*" routines, there's two cases to consider: +// 1. We have native atomics (hasLdrex() is true). We want to expand to LL/SC. +// +// 2. We don't actually have native atomics, but we pretend that we do, because +// we're on an OS that provides a "magic" lock-free compare-and-swap +// routine. In this case, we rely on __sync libcall expansions for all the +// operations. Thus, we avoid doing expansions in IR. +// +// If there's neither native atomics, nor special OS routines allowing lock-free +// libcalls, these routines will not be called at all, because +// MaxAtomicSizeInBitsSupported was set to 0. + TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + // Floating-point operations are always emitted to a cmpxchg loop, because + // they may trigger a trap which aborts an LLSC sequence. if (AI->isFloatingPointOperation()) return AtomicExpansionKind::CmpXChg; - unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); - return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) - ? AtomicExpansionKind::LLSC - : AtomicExpansionKind::None; + if (!Subtarget->hasLdrex()) + return AtomicExpansionKind::None; + return AtomicExpansionKind::LLSC; } -// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32 -// bits, and up to 64 bits on the non-M profiles. +// Similar to shouldExpandAtomicRMWInIR, we use LL/SC when available, or a +// __sync_* function if not. TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { + if (!Subtarget->hasLdrex()) + return AtomicExpansionKind::None; // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. - unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits(); - bool HasAtomicCmpXchg = - !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); - if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg && - Size <= (Subtarget->isMClass() ? 32U : 64U)) - return AtomicExpansionKind::LLSC; - return AtomicExpansionKind::None; + if (getTargetMachine().getOptLevel() == 0) + return AtomicExpansionKind::None; + + return AtomicExpansionKind::LLSC; } bool ARMTargetLowering::shouldInsertFencesForAtomic( const Instruction *I) const { - return InsertFencesForAtomic; + // When we don't have ldrex, we may be emitting __sync_* libcalls. These don't + // need fences inserted as they already have appropriate barriers within the + // function. Load and Store, however, are handled directly, and thus do + // require fence insertion. + if (!Subtarget->hasLdrex()) { + return isa(I) || isa(I); + } + + // In -O0 mode, there's a hack in place to expand ATOMIC_CMP_SWAP in a late + // pseudo expansion instead of in IR. This pseduo requires fences to be + // emitted externally. + if (getTargetMachine().getOptLevel() == 0 && isa(I)) + return true; + + // On v8, we have additional acquire/release instructions that are more + // efficient than a separate fence. + if (Subtarget->hasAcquireRelease()) + return false; + + // Otherwise, insert fences (dmb ish) around all atomic operations. + return true; } // This has so far only been implemented for MachO. diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -739,6 +739,7 @@ bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } + bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); } bool isTargetWindows() const { return TargetTriple.isOSWindows(); } bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } @@ -857,8 +858,16 @@ /// scheduling, DAGCombine, etc.). bool useAA() const override { return true; } - // enableAtomicExpand- True if we need to expand our atomics. - bool enableAtomicExpand() const override; + // True for targets that support atomic ldrex/strex instructions. + bool hasLdrex() const { + return HasV6Ops && (!InThumbMode || HasV8MBaselineOps); + } + + // True for targets which support atomic ldrexd/strexd instructions. + bool hasLdrexd() const { + // The Cortex-M series only support 32bit atomics. + return hasLdrex() && !isMClass(); + } /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -409,8 +409,6 @@ return !isThumb1Only(); } -bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } - bool ARMSubtarget::useStride4VFPs() const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll --- a/llvm/test/CodeGen/ARM/atomic-64bit.ll +++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll @@ -1,12 +1,12 @@ -; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE -; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE -; RUN: llc < %s -mtriple=armebv7 -target-abi apcs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE -; RUN: llc < %s -mtriple=thumbebv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-BE -; RUN: llc < %s -mtriple=armv7m--none-eabi | FileCheck %s --check-prefix=CHECK-M -; RUN: llc < %s -mtriple=armv8m--none-eabi | FileCheck %s --check-prefix=CHECK-M +; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s --check-prefixes=COMMON,CHECK,CHECK-LE +; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefixes=COMMON,CHECK-THUMB,CHECK-THUMB-LE +; RUN: llc < %s -mtriple=armebv7 -target-abi apcs | FileCheck %s --check-prefixes=COMMON,CHECK,CHECK-BE +; RUN: llc < %s -mtriple=thumbebv7-none-linux-gnueabihf | FileCheck %s --check-prefixes=COMMON,CHECK-THUMB,CHECK-THUMB-BE +; RUN: llc < %s -mtriple=armv7m--none-eabi | FileCheck %s --check-prefixes=COMMON,CHECK-M +; RUN: llc < %s -mtriple=armv8m.base--none-eabi | FileCheck %s --check-prefixes=COMMON,CHECK-M define i64 @test1(i64* %ptr, i64 %val) { -; CHECK-LABEL: test1: +; COMMON-LABEL: test1: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK-LE: adds [[REG3:(r[0-9]?[02468])]], [[REG1]] @@ -18,7 +18,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test1: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE: adds.w [[REG3:[a-z0-9]+]], [[REG1]] @@ -30,14 +29,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_add_8 +; CHECK-M: __atomic_fetch_add_8 %r = atomicrmw add i64* %ptr, i64 %val seq_cst ret i64 %r } define i64 @test2(i64* %ptr, i64 %val) { -; CHECK-LABEL: test2: +; COMMON-LABEL: test2: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK-LE: subs [[REG3:(r[0-9]?[02468])]], [[REG1]] @@ -49,7 +48,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test2: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE: subs.w [[REG3:[a-z0-9]+]], [[REG1]] @@ -61,14 +59,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_sub_8 +; CHECK-M: __atomic_fetch_sub_8 %r = atomicrmw sub i64* %ptr, i64 %val seq_cst ret i64 %r } define i64 @test3(i64* %ptr, i64 %val) { -; CHECK-LABEL: test3: +; COMMON-LABEL: test3: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK-LE-DAG: and [[REG3:(r[0-9]?[02468])]], [[REG1]] @@ -80,7 +78,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test3: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: and.w [[REG3:[a-z0-9]+]], [[REG1]] @@ -92,14 +89,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_and_8 +; CHECK-M: __atomic_fetch_and_8 %r = atomicrmw and i64* %ptr, i64 %val seq_cst ret i64 %r } define i64 @test4(i64* %ptr, i64 %val) { -; CHECK-LABEL: test4: +; COMMON-LABEL: test4: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK-LE-DAG: orr [[REG3:(r[0-9]?[02468])]], [[REG1]] @@ -111,7 +108,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test4: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: orr.w [[REG3:[a-z0-9]+]], [[REG1]] @@ -123,14 +119,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_or_8 +; CHECK-M: __atomic_fetch_or_8 %r = atomicrmw or i64* %ptr, i64 %val seq_cst ret i64 %r } define i64 @test5(i64* %ptr, i64 %val) { -; CHECK-LABEL: test5: +; COMMON-LABEL: test5: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK-LE-DAG: eor [[REG3:(r[0-9]?[02468])]], [[REG1]] @@ -142,7 +138,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test5: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: eor.w [[REG3:[a-z0-9]+]], [[REG1]] @@ -154,14 +149,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_xor_8 +; CHECK-M: __atomic_fetch_xor_8 %r = atomicrmw xor i64* %ptr, i64 %val seq_cst ret i64 %r } define i64 @test6(i64* %ptr, i64 %val) { -; CHECK-LABEL: test6: +; COMMON-LABEL: test6: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} @@ -169,7 +164,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test6: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} @@ -177,14 +171,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_lock_test_and_set_8 +; CHECK-M: __atomic_exchange_8 %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst ret i64 %r } define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { -; CHECK-LABEL: test7: +; COMMON-LABEL: test7: ; CHECK-DAG: mov [[VAL1LO:r[0-9]+]], r1 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK-LE-DAG: eor [[MISMATCH_LO:.*]], [[REG1]], [[VAL1LO]] @@ -199,7 +193,6 @@ ; CHECK: beq ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test7: ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2 ; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3 @@ -213,7 +206,7 @@ ; CHECK-THUMB: beq ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_val_compare_and_swap_8 +; CHECK-M: __atomic_compare_exchange_8 %pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst %r = extractvalue { i64, i1 } %pair, 0 @@ -223,21 +216,20 @@ ; Compiles down to a single ldrexd, except on M class devices where ldrexd ; isn't supported. define i64 @test8(i64* %ptr) { -; CHECK-LABEL: test8: +; COMMON-LABEL: test8: ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK-NOT: strexd ; CHECK: clrex ; CHECK-NOT: strexd ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test8: ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-NOT: strexd ; CHECK-THUMB: clrex ; CHECK-THUMB-NOT: strexd ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_val_compare_and_swap_8 +; CHECK-M: __atomic_load_8 %r = load atomic i64, i64* %ptr seq_cst, align 8 ret i64 %r @@ -247,7 +239,7 @@ ; way to write it. Except on M class devices, where ldrexd/strexd aren't ; supported. define void @test9(i64* %ptr, i64 %val) { -; CHECK-LABEL: test9: +; COMMON-LABEL: test9: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}} @@ -255,7 +247,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test9: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} @@ -263,14 +254,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_lock_test_and_set_8 +; CHECK-M: __atomic_store_8 store atomic i64 %val, i64* %ptr seq_cst, align 8 ret void } define i64 @test10(i64* %ptr, i64 %val) { -; CHECK-LABEL: test10: +; COMMON-LABEL: test10: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 @@ -289,7 +280,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test10: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 @@ -308,14 +298,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_min_8 +; CHECK-M: __atomic_compare_exchange_8 %r = atomicrmw min i64* %ptr, i64 %val seq_cst ret i64 %r } define i64 @test11(i64* %ptr, i64 %val) { -; CHECK-LABEL: test11: +; COMMON-LABEL: test11: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 @@ -334,7 +324,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test11: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 @@ -353,14 +342,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_umin_8 +; CHECK-M: __atomic_compare_exchange_8 %r = atomicrmw umin i64* %ptr, i64 %val seq_cst ret i64 %r } define i64 @test12(i64* %ptr, i64 %val) { -; CHECK-LABEL: test12: +; COMMON-LABEL: test12: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 @@ -379,7 +368,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test12: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 @@ -398,14 +386,14 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_max_8 +; CHECK-M: __atomic_compare_exchange_8 %r = atomicrmw max i64* %ptr, i64 %val seq_cst ret i64 %r } define i64 @test13(i64* %ptr, i64 %val) { -; CHECK-LABEL: test13: +; COMMON-LABEL: test13: ; CHECK: dmb {{ish$}} ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] ; CHECK: mov [[OUT_HI:[a-z0-9]+]], r2 @@ -424,7 +412,6 @@ ; CHECK: bne ; CHECK: dmb {{ish$}} -; CHECK-THUMB-LABEL: test13: ; CHECK-THUMB: dmb {{ish$}} ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB: mov [[OUT_LO:[a-z0-9]+]], r2 @@ -443,7 +430,7 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} -; CHECK-M: __sync_fetch_and_umax_8 +; CHECK-M: __atomic_compare_exchange_8 %r = atomicrmw umax i64* %ptr, i64 %val seq_cst ret i64 %r diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll --- a/llvm/test/CodeGen/ARM/atomic-load-store.ll +++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll @@ -1,27 +1,25 @@ -; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=ARM -; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefix=ARM -; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=THUMBTWO -; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefix=THUMBONE -; RUN: llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4 -; RUN: llc < %s -mtriple=armv6-apple-ios | FileCheck %s -check-prefix=ARMV6 -; RUN: llc < %s -mtriple=thumbv7m-apple-ios | FileCheck %s -check-prefix=THUMBM +; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,ARM +; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s -check-prefixes=CHECK,ARM +; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,THUMBTWO +; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefixes=CHECK,THUMBONE +; RUN: llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefixes=CHECK,ARMV4 +; RUN: llc < %s -mtriple=armv6-apple-ios | FileCheck %s -check-prefixes=CHECK,ARMV6 +; RUN: llc < %s -mtriple=thumbv7m-apple-ios | FileCheck %s -check-prefixes=CHECK,THUMBM define void @test1(i32* %ptr, i32 %val1) { -; ARM-LABEL: test1 +; CHECK-LABEL: test1: ; ARM: dmb {{ish$}} ; ARM-NEXT: str ; ARM-NEXT: dmb {{ish$}} -; THUMBONE-LABEL: test1 -; THUMBONE: __sync_lock_test_and_set_4 -; THUMBTWO-LABEL: test1 +; THUMBONE: ___sync_synchronize +; THUMBONE-NEXT: str +; THUMBONE-NEXT: ___sync_synchronize ; THUMBTWO: dmb {{ish$}} ; THUMBTWO-NEXT: str ; THUMBTWO-NEXT: dmb {{ish$}} -; ARMV6-LABEL: test1 ; ARMV6: mcr p15, #0, {{r[0-9]*}}, c7, c10, #5 ; ARMV6: str ; ARMV6: mcr p15, #0, {{r[0-9]*}}, c7, c10, #5 -; THUMBM-LABEL: test1 ; THUMBM: dmb sy ; THUMBM: str ; THUMBM: dmb sy @@ -30,18 +28,16 @@ } define i32 @test2(i32* %ptr) { -; ARM-LABEL: test2 +; CHECK-LABEL: test2: + ; ARM: ldr ; ARM-NEXT: dmb {{ish$}} -; THUMBONE-LABEL: test2 -; THUMBONE: __sync_val_compare_and_swap_4 -; THUMBTWO-LABEL: test2 +; THUMBONE: ldr +; THUMBONE: __sync_synchronize ; THUMBTWO: ldr ; THUMBTWO-NEXT: dmb {{ish$}} -; ARMV6-LABEL: test2 ; ARMV6: ldr ; ARMV6: mcr p15, #0, {{r[0-9]*}}, c7, c10, #5 -; THUMBM-LABEL: test2 ; THUMBM: ldr ; THUMBM: dmb sy %val = load atomic i32, i32* %ptr seq_cst, align 4 @@ -49,7 +45,8 @@ } define void @test3(i8* %ptr1, i8* %ptr2) { -; ARM-LABEL: test3 +; CHECK-LABEL: test3: + ; ARM-NOT: dmb ; ARM: ldrb ; ARM-NOT: dmb @@ -57,7 +54,6 @@ ; ARM-NOT: dmb ; ARM: bx lr -; THUMBTWO-LABEL: test3 ; THUMBTWO-NOT: dmb ; THUMBTWO: ldrb ; THUMBTWO-NOT: dmb @@ -65,16 +61,16 @@ ; THUMBTWO-NOT: dmb ; THUMBTWO: bx lr -; THUMBONE-LABEL: test3 ; THUMBONE-NOT: dmb +; THUMBONE-NOT: __sync_synchronize ; THUMBONE: ldrb ; THUMBONE-NOT: dmb +; THUMBONE-NOT: __sync_synchronize ; THUMBONE: strb ; THUMBONE-NOT: dmb +; THUMBONE-NOT: __sync_synchronize -; ARMV6-LABEL: test3 ; ARMV6-NOT: mcr -; THUMBM-LABEL: test3 ; THUMBM-NOT: dmb sy %val = load atomic i8, i8* %ptr1 unordered, align 1 store atomic i8 %val, i8* %ptr2 unordered, align 1 @@ -82,26 +78,30 @@ } define void @test4(i8* %ptr1, i8* %ptr2) { -; THUMBONE-LABEL: test4 -; THUMBONE: ___sync_val_compare_and_swap_1 -; THUMBONE: ___sync_lock_test_and_set_1 -; ARMV6-LABEL: test4 -; THUMBM-LABEL: test4 +; CHECK-LABEL: test4 + +; THUMBONE: ldrb +; THUMBONE-NEXT: ___sync_synchronize +; THUMBONE-NEXT: ___sync_synchronize +; THUMBONE-NEXT: strb +; THUMBONE-NEXT: ___sync_synchronize %val = load atomic i8, i8* %ptr1 seq_cst, align 1 store atomic i8 %val, i8* %ptr2 seq_cst, align 1 ret void } define i64 @test_old_load_64bit(i64* %p) { -; ARMV4-LABEL: test_old_load_64bit -; ARMV4: ___sync_val_compare_and_swap_8 +; CHECK-LABEL: test_old_load_64bit + +; ARMV4: ___atomic_load_8 %1 = load atomic i64, i64* %p seq_cst, align 8 ret i64 %1 } define void @test_old_store_64bit(i64* %p, i64 %v) { -; ARMV4-LABEL: test_old_store_64bit -; ARMV4: ___sync_lock_test_and_set_8 +; CHECK-LABEL: test_old_store_64bit + +; ARMV4: ___atomic_store_8 store atomic i64 %v, i64* %p seq_cst, align 8 ret void } diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll --- a/llvm/test/CodeGen/ARM/atomic-op.ll +++ b/llvm/test/CodeGen/ARM/atomic-op.ll @@ -365,8 +365,10 @@ ; CHECK-T1-M0: ldr {{r[0-9]}}, [r1] ; CHECK-T1-M0: dmb -; CHECK-T1: ___sync_val_compare_and_swap_4 -; CHECK-T1: ___sync_val_compare_and_swap_4 +; CHECK-T1: ldr {{r[0-9]}}, [{{r[0-9]}}] +; CHECK-T1: __sync_synchronize +; CHECK-T1: ldr {{r[0-9]}}, [{{r[0-9]}}] +; CHECK-T1: __sync_synchronize ; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r0] ; CHECK-BAREMETAL-NOT: dmb @@ -387,8 +389,10 @@ ; CHECK: dmb ; CHECK: str r3, [r2] -; CHECK-T1: ___sync_lock_test_and_set -; CHECK-T1: ___sync_lock_test_and_set +; CHECK-T1: __sync_synchronize +; CHECK-T1: str {{r[0-9]}}, [{{r[0-9]}}] +; CHECK-T1: __sync_synchronize +; CHECK-T1: str {{r[0-9]}}, [{{r[0-9]}}] ; CHECK-T1-M0: dmb ; CHECK-T1-M0: str r1, [r0] diff --git a/llvm/test/CodeGen/ARM/atomic-ops-m33.ll b/llvm/test/CodeGen/ARM/atomic-ops-m33.ll --- a/llvm/test/CodeGen/ARM/atomic-ops-m33.ll +++ b/llvm/test/CodeGen/ARM/atomic-ops-m33.ll @@ -71,7 +71,7 @@ define void @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64: -; CHECK: bl __sync_fetch_and_add_8 +; CHECK: bl __atomic_fetch_add_8 %old = atomicrmw add i64* @var64, i64 %offset monotonic store i64 %old, i64* @var64 ret void