diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -1026,6 +1026,11 @@ return false; } + /// On some targets, spilling between certain instructions can invalidate + /// semantics used for atomic operations. This function returns a suitable + /// point where the spill can be done. + virtual MachineBasicBlock::iterator findSpillBefore(MachineInstr &MI) const; + //===--------------------------------------------------------------------===// /// Debug information queries. diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -883,8 +883,7 @@ assert(PhysReg != 0 && "Register not assigned"); if (LRI->Reloaded || LRI->LiveOut) { if (!MI.isImplicitDef()) { - MachineBasicBlock::iterator SpillBefore = - std::next((MachineBasicBlock::iterator)MI.getIterator()); + auto SpillBefore = TRI->findSpillBefore(MI); LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: " << LRI->Reloaded << '\n'); bool Kill = LRI->LastUse == nullptr; diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -76,6 +76,11 @@ return true; } +MachineBasicBlock::iterator +TargetRegisterInfo::findSpillBefore(MachineInstr &MI) const { + return std::next((MachineBasicBlock::iterator)MI.getIterator()); +} + void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, MCRegister Reg) const { for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI) diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -135,6 +135,8 @@ unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override; + MachineBasicBlock::iterator findSpillBefore(MachineInstr &MI) const override; + void getOffsetOpcodes(const StackOffset &Offset, SmallVectorImpl &Ops) const override; }; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -781,3 +781,64 @@ return false; return true; } + +/// Select an appropriate instruction to spill before. Normally this is the +/// instruction right after the current one, but in the case of ldrex/strex +/// pairs we need to avoid introducing stores between the exclusive +/// instructions, otherwise we will invalidate them. +MachineBasicBlock::iterator +AArch64RegisterInfo::findSpillBefore(MachineInstr &MI) const { + auto IsExclusiveLoad = [](auto &MI) -> Optional { + switch (MI.getOpcode()) { + case AArch64::LDXPW: + return AArch64::STXPW; + case AArch64::LDXPX: + return AArch64::STXPX; + case AArch64::LDXRB: + return AArch64::STXRB; + case AArch64::LDXRH: + return AArch64::STXRH; + case AArch64::LDXRW: + return AArch64::STXRW; + case AArch64::LDXRX: + return AArch64::STXRX; + default: + return None; + } + }; + + MachineBasicBlock &MBB = *MI.getParent(); + const MachineBasicBlock::reverse_iterator RE = MBB.rend(); + const MachineBasicBlock::iterator E = MBB.end(); + + // Default value, spill before the next instruction + auto SpillBefore = std::next((MachineBasicBlock::iterator)MI.getIterator()); + + // If we are already at the end of the block, spill here + if (SpillBefore == E) + return SpillBefore; + + // See if we are between an exclusive load/store pair + const auto LoadInst = + std::find_if((MachineBasicBlock::reverse_iterator)MI.getReverseIterator(), + RE, IsExclusiveLoad); + + // If there is no exclusive load previously in this block, return the default + if (LoadInst == RE) + return SpillBefore; + + // Find the corresponding exclusive store + const unsigned StoreOp = IsExclusiveLoad(*LoadInst).getValue(); + while (SpillBefore != E && (*SpillBefore).getOpcode() != StoreOp) { + SpillBefore = std::next(SpillBefore); + } + + // If we failed to find an exclusive store, return the default + if (SpillBefore == E) + SpillBefore = std::next((MachineBasicBlock::iterator)MI.getIterator()); + + // We want to spill after the store + SpillBefore = std::next(SpillBefore); + + return SpillBefore; +} \ No newline at end of file diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.h b/llvm/lib/Target/ARM/ARMRegisterInfo.h --- a/llvm/lib/Target/ARM/ARMRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.h @@ -23,6 +23,7 @@ virtual void anchor(); public: ARMRegisterInfo(); + MachineBasicBlock::iterator findSpillBefore(MachineInstr &MI) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp --- a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp @@ -16,3 +16,68 @@ void ARMRegisterInfo::anchor() { } ARMRegisterInfo::ARMRegisterInfo() : ARMBaseRegisterInfo() {} + +/// Select an appropriate instruction to spill before. Normally this is the +/// instruction right after the current one, but in the case of ldrex/strex +/// pairs we need to avoid introducing stores between the exclusive +/// instructions, otherwise we will invalidate them. +MachineBasicBlock::iterator +ARMRegisterInfo::findSpillBefore(MachineInstr &MI) const { + auto IsExclusiveLoad = [](const MachineInstr &MI) -> Optional { + switch (MI.getOpcode()) { + case ARM::t2LDREX: + return ARM::t2STREX; + case ARM::t2LDREXB: + return ARM::t2STREXB; + case ARM::t2LDREXD: + return ARM::t2STREXD; + case ARM::t2LDREXH: + return ARM::t2STREXH; + case ARM::LDREX: + return ARM::STREX; + case ARM::LDREXB: + return ARM::STREXB; + case ARM::LDREXD: + return ARM::STREXD; + case ARM::LDREXH: + return ARM::STREXH; + default: + return None; + } + }; + + MachineBasicBlock &MBB = *MI.getParent(); + const MachineBasicBlock::reverse_iterator RE = MBB.rend(); + const MachineBasicBlock::iterator E = MBB.end(); + + // Default value, spill before the next instruction + auto SpillBefore = std::next((MachineBasicBlock::iterator)MI.getIterator()); + + // If we are already at the end of the block, spill here + if (SpillBefore == E) + return SpillBefore; + + // See if we are between an exclusive load/store pair + const auto LoadInst = + std::find_if((MachineBasicBlock::reverse_iterator)MI.getReverseIterator(), + RE, IsExclusiveLoad); + + // If there is no exclusive load previously in this block, return the default + if (LoadInst == RE) + return SpillBefore; + + // Find the corresponding exclusive store + const unsigned StoreOp = IsExclusiveLoad(*LoadInst).getValue(); + while (SpillBefore != E && (*SpillBefore).getOpcode() != StoreOp) { + SpillBefore = std::next(SpillBefore); + } + + // If we failed to find an exclusive store, return the default + if (SpillBefore == E) + SpillBefore = std::next((MachineBasicBlock::iterator)MI.getIterator()); + + // We want to spill after the store + SpillBefore = std::next(SpillBefore); + + return SpillBefore; +} diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor.ll @@ -0,0 +1,578 @@ +; RUN: llc -O0 -o - %s --mtriple=armv8-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=ARM +; RUN: llc -O0 -o - %s --mtriple=aarch64-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=A64 +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +@atomic_i8 = external global i8 +@atomic_i16 = external global i16 +@atomic_i32 = external global i32 +@atomic_i64 = external global i64 + +@atomic_half = external global half +@atomic_float = external global float +@atomic_double = external global double + + +define i8 @test_xchg_i8() { +entry: + %0 = atomicrmw xchg i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_xchg_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_add_i8() { +entry: + %0 = atomicrmw add i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_add_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_sub_i8() { +entry: + %0 = atomicrmw sub i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_sub_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_and_i8() { +entry: + %0 = atomicrmw and i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_and_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_nand_i8() { +entry: + %0 = atomicrmw nand i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_nand_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_or_i8() { +entry: + %0 = atomicrmw or i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_or_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_xor_i8() { +entry: + %0 = atomicrmw xor i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_xor_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_max_i8() { +entry: + %0 = atomicrmw max i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_max_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_min_i8() { +entry: + %0 = atomicrmw min i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_min_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_umax_i8() { +entry: + %0 = atomicrmw umax i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_umax_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} +define i8 @test_umin_i8() { +entry: + %0 = atomicrmw umin i8* @atomic_i8, i8 1 monotonic + ; CHECK: test_umin_i8: + ; ARM: ldrexb [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrb [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexb [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrb [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i8 %0 +} + + +define i16 @test_xchg_i16() { +entry: + %0 = atomicrmw xchg i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_xchg_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_add_i16() { +entry: + %0 = atomicrmw add i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_add_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_sub_i16() { +entry: + %0 = atomicrmw sub i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_sub_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_and_i16() { +entry: + %0 = atomicrmw and i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_and_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_nand_i16() { +entry: + %0 = atomicrmw nand i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_nand_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_or_i16() { +entry: + %0 = atomicrmw or i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_or_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_xor_i16() { +entry: + %0 = atomicrmw xor i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_xor_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_max_i16() { +entry: + %0 = atomicrmw max i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_max_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_min_i16() { +entry: + %0 = atomicrmw min i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_min_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_umax_i16() { +entry: + %0 = atomicrmw umax i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_umax_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define i16 @test_umin_i16() { +entry: + %0 = atomicrmw umin i16* @atomic_i16, i16 1 monotonic + ; CHECK: test_umin_i16: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxrh [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i16 %0 +} +define half @test_fadd_half() { +entry: + %0 = atomicrmw fadd half* @atomic_half, half 1.0 monotonic + ; CHECK: test_fadd_half: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldaxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stlxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ; the strex ends up in the next machine basic block, so regs will be different but address the same + ret half %0 +} +define half @test_fsub_half() { +entry: + %0 = atomicrmw fsub half* @atomic_half, half 1.0 monotonic + ; CHECK: test_fsub_half: + ; ARM: ldrexh [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldaxrh [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexh {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stlxrh {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ; the strex ends up in the next machine basic block, so regs will be different but address the same + ret half %0 +} + + +define i32 @test_xchg_i32() { +entry: + %0 = atomicrmw xchg i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_xchg_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_add_i32() { +entry: + %0 = atomicrmw add i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_add_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_sub_i32() { +entry: + %0 = atomicrmw sub i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_sub_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_and_i32() { +entry: + %0 = atomicrmw and i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_and_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_nand_i32() { +entry: + %0 = atomicrmw nand i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_nand_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_or_i32() { +entry: + %0 = atomicrmw or i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_or_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_xor_i32() { +entry: + %0 = atomicrmw xor i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_xor_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_max_i32() { +entry: + %0 = atomicrmw max i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_max_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_min_i32() { +entry: + %0 = atomicrmw min i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_min_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_umax_i32() { +entry: + %0 = atomicrmw umax i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_umax_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define i32 @test_umin_i32() { +entry: + %0 = atomicrmw umin i32* @atomic_i32, i32 1 monotonic + ; CHECK: test_umin_i32: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strex [[RA]], {{r[0-9]+}}, [[ADDR]] + ; A64: stxr [[RA]], {{w[0-9]+}}, [[ADDR]] + ret i32 %0 +} +define float @test_fadd_float() { +entry: + %0 = atomicrmw fadd float* @atomic_float, float 1.0 monotonic + ; CHECK: test_fadd_float: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldaxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; the strex ends up in the next machine basic block, so regs will be different but address the same + ; ARM: strex {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stlxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret float %0 +} +define float @test_fsub_float() { +entry: + %0 = atomicrmw fsub float* @atomic_float, float 1.0 monotonic + ; CHECK: test_fsub_float: + ; ARM: ldrex [[RA:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldaxr [[RA:w[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; the strex ends up in the next machine basic block, so regs will be different but address the same + ; ARM: strex {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stlxr {{w[0-9]+}}, {{w[0-9]+}}, [[ADDR]] + ret float %0 +} + + + + +define i64 @test_xchg_i64() { +entry: + %0 = atomicrmw xchg i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_xchg_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_add_i64() { +entry: + %0 = atomicrmw add i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_add_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_sub_i64() { +entry: + %0 = atomicrmw sub i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_sub_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_and_i64() { +entry: + %0 = atomicrmw and i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_and_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_nand_i64() { +entry: + %0 = atomicrmw nand i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_nand_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_or_i64() { +entry: + %0 = atomicrmw or i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_or_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_xor_i64() { +entry: + %0 = atomicrmw xor i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_xor_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_max_i64() { +entry: + %0 = atomicrmw max i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_max_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_min_i64() { +entry: + %0 = atomicrmw min i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_min_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_umax_i64() { +entry: + %0 = atomicrmw umax i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_umax_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define i64 @test_umin_i64() { +entry: + %0 = atomicrmw umin i64* @atomic_i64, i64 1 monotonic + ; CHECK: test_umin_i64: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret i64 %0 +} +define double @test_fadd_double() { +entry: + %0 = atomicrmw fadd double* @atomic_double, double 1.0 monotonic + ; CHECK: test_fadd_double: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldaxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; the strex ends up in the next machine basic block, so regs will be different but address the same + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stlxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret double %0 +} +define double @test_fsub_double() { +entry: + %0 = atomicrmw fsub double* @atomic_double, double 1.0 monotonic + ; CHECK: test_fsub_double: + ; ARM: ldrexd [[RA:r[0-9]+]], [[RB:r[0-9]+]], [[ADDR:.r[0-9]+.]] + ; A64: ldaxr [[RA:x[0-9]+]], [[ADDR:.x[0-9]+.]] + ; CHECK-NOT: str + ; the strex ends up in the next machine basic block, so regs will be different but address the same + ; ARM: strexd {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}, [[ADDR]] + ; A64: stlxr {{w[0-9]+}}, {{x[0-9]+}}, [[ADDR]] + ret double %0 +}