diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -1566,6 +1566,15 @@ Register DesiredReg = MI.getOperand(3).getReg(); Register NewReg = MI.getOperand(4).getReg(); + if (IsThumb) { + assert(STI->hasV8MBaselineOps() && + "CMP_SWAP not expected to be custom expanded for Thumb1"); + assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) && + "ARMv8-M.baseline does not have t2UXTB/t2UXTH"); + assert(ARM::tGPRRegClass.contains(DesiredReg) && + "DesiredReg used for UXT op must be tGPR"); + } + MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -2779,20 +2788,23 @@ case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; + case ARM::tCMP_SWAP_8: + assert(STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB, + NextMBBI); + case ARM::tCMP_SWAP_16: + assert(STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH, + NextMBBI); + case ARM::CMP_SWAP_8: - if (STI->isThumb()) - return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, - ARM::tUXTB, NextMBBI); - else - return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, - ARM::UXTB, NextMBBI); + assert(!STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB, + NextMBBI); case ARM::CMP_SWAP_16: - if (STI->isThumb()) - return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, - ARM::tUXTH, NextMBBI); - else - return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, - ARM::UXTH, NextMBBI); + assert(!STI->isThumb()); + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, + NextMBBI); case ARM::CMP_SWAP_32: if (STI->isThumb()) return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3299,9 +3299,9 @@ unsigned Opcode; EVT MemTy = cast(N)->getMemoryVT(); if (MemTy == MVT::i8) - Opcode = ARM::CMP_SWAP_8; + Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; else if (MemTy == MVT::i16) - Opcode = ARM::CMP_SWAP_16; + Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; else if (MemTy == MVT::i32) Opcode = ARM::CMP_SWAP_32; else diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -19359,6 +19359,14 @@ if (AI->isFloatingPointOperation()) return AtomicExpansionKind::CmpXChg; + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement atomicrmw without spilling. If the target address is also on the + // stack and close enough to the spill slot, this can lead to a situation + // where the monitor always gets cleared and the atomic operation can never + // succeed. So at -O0 lower this operation to a CAS loop. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps(); return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1766,3 +1766,21 @@ def tLDRConstPool : tAsmPseudo<"ldr${p} $Rt, $immediate", (ins tGPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; + +//===---------------------------------- +// Atomic cmpxchg for -O0 +//===---------------------------------- + +// See ARMInstrInfo.td. These two thumb specific pseudos are required to +// restrict the register class for the UXTB/UXTH ops used in the expansion. + +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", + mayLoad = 1, mayStore = 1 in { +def tCMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), + (ins GPR:$addr, tGPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def tCMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), + (ins GPR:$addr, tGPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; +} diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll --- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll @@ -16,7 +16,7 @@ define i8 @test_xchg_i8() { ; COMMON-LABEL: test_xchg_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_lock_test_and_set_1 entry: @@ -26,7 +26,7 @@ define i8 @test_add_i8() { ; COMMON-LABEL: test_add_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_add_1 entry: @@ -36,7 +36,7 @@ define i8 @test_sub_i8() { ; COMMON-LABEL: test_sub_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_sub_1 entry: @@ -46,7 +46,7 @@ define i8 @test_and_i8() { ; COMMON-LABEL: test_and_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_and_1 entry: @@ -56,7 +56,7 @@ define i8 @test_nand_i8() { ; COMMON-LABEL: test_nand_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_nand_1 entry: @@ -66,7 +66,7 @@ define i8 @test_or_i8() { ; COMMON-LABEL: test_or_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_or_1 entry: @@ -76,7 +76,7 @@ define i8 @test_xor_i8() { ; COMMON-LABEL: test_xor_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_xor_1 entry: @@ -86,7 +86,7 @@ define i8 @test_max_i8() { ; COMMON-LABEL: test_max_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_max_1 entry: @@ -96,7 +96,7 @@ define i8 @test_min_i8() { ; COMMON-LABEL: test_min_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_min_1 entry: @@ -106,7 +106,7 @@ define i8 @test_umax_i8() { ; COMMON-LABEL: test_umax_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_umax_1 entry: @@ -116,7 +116,7 @@ define i8 @test_umin_i8() { ; COMMON-LABEL: test_umin_i8: ; EXPAND32: ldrexb -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexb ; THUMB1: bl __sync_fetch_and_umin_1 entry: @@ -128,7 +128,7 @@ define i16 @test_xchg_i16() { ; COMMON-LABEL: test_xchg_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_lock_test_and_set_2 entry: @@ -138,7 +138,7 @@ define i16 @test_add_i16() { ; COMMON-LABEL: test_add_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_add_2 entry: @@ -148,7 +148,7 @@ define i16 @test_sub_i16() { ; COMMON-LABEL: test_sub_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_sub_2 entry: @@ -158,7 +158,7 @@ define i16 @test_and_i16() { ; COMMON-LABEL: test_and_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_and_2 entry: @@ -168,7 +168,7 @@ define i16 @test_nand_i16() { ; COMMON-LABEL: test_nand_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_nand_2 entry: @@ -178,7 +178,7 @@ define i16 @test_or_i16() { ; COMMON-LABEL: test_or_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_or_2 entry: @@ -188,7 +188,7 @@ define i16 @test_xor_i16() { ; COMMON-LABEL: test_xor_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_xor_2 entry: @@ -198,7 +198,7 @@ define i16 @test_max_i16() { ; COMMON-LABEL: test_max_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_max_2 entry: @@ -208,7 +208,7 @@ define i16 @test_min_i16() { ; COMMON-LABEL: test_min_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_min_2 entry: @@ -218,7 +218,7 @@ define i16 @test_umax_i16() { ; COMMON-LABEL: test_umax_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_umax_2 entry: @@ -228,7 +228,7 @@ define i16 @test_umin_i16() { ; COMMON-LABEL: test_umin_i16: ; EXPAND32: ldrexh -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strexh ; THUMB1: bl __sync_fetch_and_umin_2 entry: @@ -240,7 +240,7 @@ define i32 @test_xchg_i32() { ; COMMON-LABEL: test_xchg_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_lock_test_and_set_4 entry: @@ -250,7 +250,7 @@ define i32 @test_add_i32() { ; COMMON-LABEL: test_add_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_add_4 entry: @@ -260,7 +260,7 @@ define i32 @test_sub_i32() { ; COMMON-LABEL: test_sub_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_sub_4 entry: @@ -270,7 +270,7 @@ define i32 @test_and_i32() { ; COMMON-LABEL: test_and_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_and_4 entry: @@ -280,7 +280,7 @@ define i32 @test_nand_i32() { ; COMMON-LABEL: test_nand_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_nand_4 entry: @@ -290,7 +290,7 @@ define i32 @test_or_i32() { ; COMMON-LABEL: test_or_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_or_4 entry: @@ -300,7 +300,7 @@ define i32 @test_xor_i32() { ; COMMON-LABEL: test_xor_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_xor_4 entry: @@ -310,7 +310,7 @@ define i32 @test_max_i32() { ; COMMON-LABEL: test_max_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_max_4 entry: @@ -320,7 +320,7 @@ define i32 @test_min_i32() { ; COMMON-LABEL: test_min_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_min_4 @@ -331,7 +331,7 @@ define i32 @test_umax_i32() { ; COMMON-LABEL: test_umax_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_umax_4 entry: @@ -341,7 +341,7 @@ define i32 @test_umin_i32() { ; COMMON-LABEL: test_umin_i32: ; EXPAND32: ldrex -; EXPAND32: str +; EXPAND32-NOT: str ; EXPAND32: strex ; THUMB1: bl __sync_fetch_and_umin_4 entry: @@ -352,10 +352,10 @@ define i64 @test_xchg_i64() { ; COMMON-LABEL: test_xchg_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_lock_test_and_set_8 -; BASELINE64: bl __sync_lock_test_and_set_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw xchg i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -363,10 +363,10 @@ define i64 @test_add_i64() { ; COMMON-LABEL: test_add_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_add_8 -; BASELINE64: bl __sync_fetch_and_add_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw add i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -374,10 +374,10 @@ define i64 @test_sub_i64() { ; COMMON-LABEL: test_sub_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_sub_8 -; BASELINE64: bl __sync_fetch_and_sub_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw sub i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -385,10 +385,10 @@ define i64 @test_and_i64() { ; COMMON-LABEL: test_and_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_and_8 -; BASELINE64: bl __sync_fetch_and_and_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw and i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -396,10 +396,10 @@ define i64 @test_nand_i64() { ; COMMON-LABEL: test_nand_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_nand_8 -; BASELINE64: bl __sync_fetch_and_nand_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw nand i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -407,10 +407,10 @@ define i64 @test_or_i64() { ; COMMON-LABEL: test_or_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_or_8 -; BASELINE64: bl __sync_fetch_and_or_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw or i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -418,10 +418,10 @@ define i64 @test_xor_i64() { ; COMMON-LABEL: test_xor_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_xor_8 -; BASELINE64: bl __sync_fetch_and_xor_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw xor i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -430,10 +430,10 @@ define i64 @test_max_i64() { ; COMMON-LABEL: test_max_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_max_8 -; BASELINE64: bl __sync_fetch_and_max_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw max i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -441,10 +441,10 @@ define i64 @test_min_i64() { ; COMMON-LABEL: test_min_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_min_8 -; BASELINE64: bl __sync_fetch_and_min_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw min i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -452,10 +452,10 @@ define i64 @test_umax_i64() { ; COMMON-LABEL: test_umax_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_umax_8 -; BASELINE64: bl __sync_fetch_and_umax_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw umax i64* @atomic_i64, i64 1 monotonic ret i64 %0 @@ -463,10 +463,10 @@ define i64 @test_umin_i64() { ; COMMON-LABEL: test_umin_i64: ; EXPAND64: ldrexd -; EXPAND64: str +; EXPAND64-NOT: str ; EXPAND64: strexd ; THUMB1: bl __sync_fetch_and_umin_8 -; BASELINE64: bl __sync_fetch_and_umin_8 +; BASELINE64: bl __sync_val_compare_and_swap_8 entry: %0 = atomicrmw umin i64* @atomic_i64, i64 1 monotonic ret i64 %0