diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -18752,6 +18752,8 @@ : AtomicExpansionKind::None; } +// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32 +// bits, and up to 64 bits on the non-M profiles. TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { // At -O0, fast-regalloc cannot cope with the live vregs necessary to @@ -18759,9 +18761,11 @@ // on the stack and close enough to the spill slot, this can lead to a // situation where the monitor always gets cleared and the atomic operation // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. + unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits(); bool HasAtomicCmpXchg = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); - if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg) + if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg && + Size <= (Subtarget->isMClass() ? 32U : 64U)) return AtomicExpansionKind::LLSC; return AtomicExpansionKind::None; } diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll --- a/llvm/test/CodeGen/ARM/atomic-64bit.ll +++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll @@ -2,6 +2,8 @@ ; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE ; RUN: llc < %s -mtriple=armebv7 -target-abi apcs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE ; RUN: llc < %s -mtriple=thumbebv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-BE +; RUN: llc < %s -mtriple=armv7m--none-eabi | FileCheck %s --check-prefix=CHECK-M +; RUN: llc < %s -mtriple=armv8m--none-eabi | FileCheck %s --check-prefix=CHECK-M define i64 @test1(i64* %ptr, i64 %val) { ; CHECK-LABEL: test1: @@ -28,6 +30,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_fetch_and_add_8 + %r = atomicrmw add i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -57,6 +61,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_fetch_and_sub_8 + %r = atomicrmw sub i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -86,6 +92,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_fetch_and_and_8 + %r = atomicrmw and i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -115,6 +123,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_fetch_and_or_8 + %r = atomicrmw or i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -144,6 +154,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_fetch_and_xor_8 + %r = atomicrmw xor i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -165,6 +177,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_lock_test_and_set_8 + %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -199,12 +213,15 @@ ; CHECK-THUMB: beq ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_val_compare_and_swap_8 + %pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst %r = extractvalue { i64, i1 } %pair, 0 ret i64 %r } -; Compiles down to a single ldrexd +; Compiles down to a single ldrexd, except on M class devices where ldrexd +; isn't supported. define i64 @test8(i64* %ptr) { ; CHECK-LABEL: test8: ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]] @@ -220,12 +237,15 @@ ; CHECK-THUMB-NOT: strexd ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_val_compare_and_swap_8 + %r = load atomic i64, i64* %ptr seq_cst, align 8 ret i64 %r } ; Compiles down to atomicrmw xchg; there really isn't any more efficient -; way to write it. +; way to write it. Except on M class devices, where ldrexd/strexd aren't +; supported. define void @test9(i64* %ptr, i64 %val) { ; CHECK-LABEL: test9: ; CHECK: dmb {{ish$}} @@ -243,6 +263,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_lock_test_and_set_8 + store atomic i64 %val, i64* %ptr seq_cst, align 8 ret void } @@ -286,6 +308,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_fetch_and_min_8 + %r = atomicrmw min i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -329,6 +353,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_fetch_and_umin_8 + %r = atomicrmw umin i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -372,6 +398,8 @@ ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} +; CHECK-M: __sync_fetch_and_max_8 + %r = atomicrmw max i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -414,6 +442,9 @@ ; CHECK-THUMB: cmp ; CHECK-THUMB: bne ; CHECK-THUMB: dmb {{ish$}} + +; CHECK-M: __sync_fetch_and_umax_8 + %r = atomicrmw umax i64* %ptr, i64 %val seq_cst ret i64 %r }