diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2255,6 +2255,11 @@ case ISD::FREM: case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; + case ISD::ATOMIC_LOAD_FADD: + case ISD::ATOMIC_LOAD_FSUB: + R = PromoteFloatRes_ATOMIC_LOAD_FXXX(N); + break; + case ISD::FMA: // FMA is same as FMAD case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break; @@ -2453,6 +2458,21 @@ return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round); } +SDValue DAGTypeLegalizer::PromoteFloatRes_ATOMIC_LOAD_FXXX(SDNode *N) { + AtomicSDNode *A = cast(N); + EVT VT = N->getValueType(0); + + // Load the value as an integer value with the same number of bits. + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue PromotedVal = GetPromotedFloat(A->getVal()); + SDValue NewA = + DAG.getAtomic(A->getOpcode(), SDLoc(N), IVT, A->getChain(), + A->getBasePtr(), PromotedVal, A->getMemOperand()); + ReplaceValueWith(SDValue(A, 1), NewA.getValue(1)); + + return NewA; +} + SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast(N); EVT VT = N->getValueType(0); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -671,6 +671,7 @@ SDValue PromoteFloatRes_FMAD(SDNode *N); SDValue PromoteFloatRes_FPOWI(SDNode *N); SDValue PromoteFloatRes_FP_ROUND(SDNode *N); + SDValue PromoteFloatRes_ATOMIC_LOAD_FXXX(SDNode *N); SDValue PromoteFloatRes_LOAD(SDNode *N); SDValue PromoteFloatRes_SELECT(SDNode *N); SDValue PromoteFloatRes_SELECT_CC(SDNode *N); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -928,6 +928,25 @@ return; } + // Handle DPR to/from GPRPair + const auto *TRI = &getRegisterInfo(); + if (ARM::DPRRegClass.contains(SrcReg) && + ARM::GPRPairRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(ARM::VMOVRRD)) + .addReg(TRI->getSubReg(DestReg, ARM::gsub_0), RegState::Define) + .addReg(TRI->getSubReg(DestReg, ARM::gsub_1), RegState::Define) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); + return; + } else if (ARM::GPRPairRegClass.contains(SrcReg) && + ARM::DPRRegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(ARM::VMOVDRR), DestReg) + .addReg(TRI->getSubReg(SrcReg, ARM::gsub_0), getKillRegState(KillSrc)) + .addReg(TRI->getSubReg(SrcReg, ARM::gsub_1), getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); + return; + } + // Handle register classes that require multiple instructions. unsigned BeginIdx = 0; unsigned SubRegs = 0; @@ -1013,7 +1032,6 @@ assert(Opc && "Impossible reg-to-reg copy"); - const TargetRegisterInfo *TRI = &getRegisterInfo(); MachineInstrBuilder Mov; // Copy register tuples backward when the first Dest reg overlaps with SrcReg. diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -107,6 +107,10 @@ MachineBasicBlock::iterator MBBI, unsigned LdrexOp, unsigned StrexOp, unsigned UxtOp, MachineBasicBlock::iterator &NextMBBI); + bool ExpandAtomicOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const int Size, + unsigned PseudoOp, + MachineBasicBlock::iterator &NextMBBI); bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -1657,16 +1661,270 @@ /// ARM's ldrexd/strexd take a consecutive register pair (represented as a /// single GPRPair register), Thumb's take two separate registers so we need to /// extract the subregs from the pair. -static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, +static void addExclusiveRegPair(MachineInstrBuilder &MIB, Register Reg, unsigned Flags, bool IsThumb, const TargetRegisterInfo *TRI) { if (IsThumb) { - Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); - Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); + Register RegLo = TRI->getSubReg(Reg, ARM::gsub_0); + Register RegHi = TRI->getSubReg(Reg, ARM::gsub_1); MIB.addReg(RegLo, Flags); MIB.addReg(RegHi, Flags); } else - MIB.addReg(Reg.getReg(), Flags); + MIB.addReg(Reg, Flags); +} + +static void +makeAtomicUpdateInstrs(const unsigned PseudoOp, MachineBasicBlock *LoadStoreBB, + const DebugLoc &DL, const ARMBaseInstrInfo *TII, + const Register DestReg, const Register ValReg) { + + auto BasicOp = [&](unsigned Opcode) { + auto MIB = BuildMI(LoadStoreBB, DL, TII->get(Opcode), DestReg) + .addReg(DestReg, RegState::Kill) + .addReg(ValReg) + .add(predOps(ARMCC::AL)); + if (Opcode != ARM::VADDS && Opcode != ARM::VSUBS && Opcode != ARM::VADDD && + Opcode != ARM::VSUBD) + // Floating point operations don't have this. + // Add 's' bit operand (always reg0 for this) + MIB.addReg(0); + }; + auto MinMax = [&](ARMCC::CondCodes Condition) { + BuildMI(LoadStoreBB, DL, TII->get(ARM::CMPrr), DestReg) + .addReg(ValReg) + .add(predOps(ARMCC::AL)); + BuildMI(LoadStoreBB, DL, TII->get(ARM::MOVr), DestReg) + .addReg(ValReg) + .add(predOps(Condition)) + .add(condCodeOp()); // 's' bit + }; + + switch (PseudoOp) { + // No operations (swaps) + case ARM::ATOMIC_SWAP_8: + case ARM::ATOMIC_SWAP_16: + case ARM::ATOMIC_SWAP_32: + case ARM::ATOMIC_SWAP_64: + llvm_unreachable("Swap should be handled at call site."); + return; + + // Basic binary operation + case ARM::ATOMIC_LOAD_ADD_8: + case ARM::ATOMIC_LOAD_ADD_16: + case ARM::ATOMIC_LOAD_ADD_32: + case ARM::ATOMIC_LOAD_ADD_64: + return BasicOp(ARM::ADDrr); + case ARM::ATOMIC_LOAD_SUB_8: + case ARM::ATOMIC_LOAD_SUB_16: + case ARM::ATOMIC_LOAD_SUB_32: + case ARM::ATOMIC_LOAD_SUB_64: + return BasicOp(ARM::SUBrr); + case ARM::ATOMIC_LOAD_AND_8: + case ARM::ATOMIC_LOAD_AND_16: + case ARM::ATOMIC_LOAD_AND_32: + case ARM::ATOMIC_LOAD_AND_64: + return BasicOp(ARM::ANDrr); + case ARM::ATOMIC_LOAD_OR_8: + case ARM::ATOMIC_LOAD_OR_16: + case ARM::ATOMIC_LOAD_OR_32: + case ARM::ATOMIC_LOAD_OR_64: + return BasicOp(ARM::ORRrr); + case ARM::ATOMIC_LOAD_XOR_8: + case ARM::ATOMIC_LOAD_XOR_16: + case ARM::ATOMIC_LOAD_XOR_32: + case ARM::ATOMIC_LOAD_XOR_64: + return BasicOp(ARM::EORrr); + case ARM::ATOMIC_LOAD_FADD_16: + case ARM::ATOMIC_LOAD_FADD_32: + return BasicOp(ARM::VADDS); + case ARM::ATOMIC_LOAD_FADD_64: + return BasicOp(ARM::VADDD); + case ARM::ATOMIC_LOAD_FSUB_16: + case ARM::ATOMIC_LOAD_FSUB_32: + return BasicOp(ARM::VSUBS); + case ARM::ATOMIC_LOAD_FSUB_64: + return BasicOp(ARM::VSUBD); + + // Minimum or maximum operations + case ARM::ATOMIC_LOAD_MAX_8: + case ARM::ATOMIC_LOAD_MAX_16: + case ARM::ATOMIC_LOAD_MAX_32: + case ARM::ATOMIC_LOAD_MAX_64: + case ARM::ATOMIC_LOAD_UMAX_8: + case ARM::ATOMIC_LOAD_UMAX_16: + case ARM::ATOMIC_LOAD_UMAX_32: + case ARM::ATOMIC_LOAD_UMAX_64: + return MinMax(ARMCC::LE); + case ARM::ATOMIC_LOAD_MIN_8: + case ARM::ATOMIC_LOAD_MIN_16: + case ARM::ATOMIC_LOAD_MIN_32: + case ARM::ATOMIC_LOAD_MIN_64: + case ARM::ATOMIC_LOAD_UMIN_8: + case ARM::ATOMIC_LOAD_UMIN_16: + case ARM::ATOMIC_LOAD_UMIN_32: + case ARM::ATOMIC_LOAD_UMIN_64: + return MinMax(ARMCC::GE); + + // NAND + case ARM::ATOMIC_LOAD_NAND_8: + case ARM::ATOMIC_LOAD_NAND_16: + case ARM::ATOMIC_LOAD_NAND_32: + case ARM::ATOMIC_LOAD_NAND_64: + BuildMI(LoadStoreBB, DL, TII->get(ARM::ANDrr), DestReg) + .addReg(DestReg, RegState::Kill) + .addReg(ValReg) + .add(predOps(ARMCC::AL)) + .addReg(0); // 's' bit + BuildMI(LoadStoreBB, DL, TII->get(ARM::MVNr), DestReg) + .addReg(DestReg, RegState::Kill) + .add(predOps(ARMCC::AL)) + .addReg(0); // 's' bit + return; + } + + llvm_unreachable("unexpected opcode"); +} + +bool ARMExpandPseudo::ExpandAtomicOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const int Size, const unsigned PseudoOp, + MachineBasicBlock::iterator &NextMBBI) { + assert(!STI->isThumb() && "atomic pseudo-instructions are ARM only"); + + unsigned LdrexOp; + unsigned StrexOp; + switch (Size) { + case 8: + LdrexOp = ARM::LDREXB; + StrexOp = ARM::STREXB; + break; + case 16: + LdrexOp = ARM::LDREXH; + StrexOp = ARM::STREXH; + break; + case 32: + LdrexOp = ARM::LDREX; + StrexOp = ARM::STREX; + break; + case 64: + LdrexOp = ARM::LDREXD; + StrexOp = ARM::STREXD; + break; + default: + llvm_unreachable("Invalid Size"); + } + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineOperand &Dest = MI.getOperand(0); + MachineOperand &Temp = MI.getOperand(1); + // If Temp is a GPRPair, MiniTempReg is the first of the pair + Register MiniTempReg = + ARM::GPRPairRegClass.contains(Temp.getReg()) + ? (Register)TRI->getSubReg(Temp.getReg(), ARM::gsub_0) + : Temp.getReg(); + assert(ARM::GPRRegClass.contains(MiniTempReg)); + Register AddrReg = MI.getOperand(2).getReg(); + Register ValReg = MI.getOperand(3).getReg(); + + // TempReg is GPR and is used for load/store operations. + // DestReg is either GPR or DPR and is used for arithmetic operations. + + // LoadStoreBB: + // TempReg = LoadExclusive [AddrReg] + // DestReg = mov TempReg + // if xchg: + // TempReg = mov ValReg + // else: + // DestReg = Operation DestReg, ValReg + // TempReg = mov DestReg + // MiniTempReg = StoreExclusive TempReg, [AddrReg] + // cmp MiniTempReg, #0 + // bne LoadStoreBB + // b DoneBB + // DoneBB: + // bx lr + + MachineFunction *MF = MBB.getParent(); + auto *LoadStoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto *DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + MF->insert(++MBB.getIterator(), LoadStoreBB); + MF->insert(++LoadStoreBB->getIterator(), DoneBB); + + MachineInstrBuilder MIB; + // LoadExclusive into temporary general purpose register (pair) + MIB = BuildMI(LoadStoreBB, DL, TII->get(LdrexOp)); + addExclusiveRegPair(MIB, Temp.getReg(), RegState::Define, STI->isThumb(), + TRI); + MIB.addReg(AddrReg); + MIB.add(predOps(ARMCC::AL)); + + // Copy Temp into Dest. For floating point operations this is GPR -> DPR. + TII->copyPhysReg(*LoadStoreBB, LoadStoreBB->end(), DL, Dest.getReg(), + Temp.getReg(), true /* KillSrc */); + + const bool IsXchg = + PseudoOp == ARM::ATOMIC_SWAP_8 || PseudoOp == ARM::ATOMIC_SWAP_16 || + PseudoOp == ARM::ATOMIC_SWAP_32 || PseudoOp == ARM::ATOMIC_SWAP_64; + + if (IsXchg) { + // Copy ValReg into Temp. For floating point operations this is DPR -> GPR. + TII->copyPhysReg(*LoadStoreBB, LoadStoreBB->end(), DL, Temp.getReg(), + ValReg, false /* KillSrc */); + } else { + // Update the value in Dest with the results of the operation + makeAtomicUpdateInstrs(PseudoOp, LoadStoreBB, DL, TII, Dest.getReg(), + ValReg); + + // Copy Dest into Temp. For floating point operations this is DPR -> GPR. + TII->copyPhysReg(*LoadStoreBB, LoadStoreBB->end(), DL, Temp.getReg(), + Dest.getReg(), false /* KillSrc */); + } + + // StoreExclusive Temp to Addr, store success in Temp (or MiniTempReg) + MIB = BuildMI(LoadStoreBB, DL, TII->get(StrexOp)); + addExclusiveRegPair(MIB, MiniTempReg, RegState::Define, STI->isThumb(), TRI); + MIB.addReg(Temp.getReg(), RegState::Kill); + MIB.addReg(AddrReg); + MIB.add(predOps(ARMCC::AL)); + + // Compare to zero + BuildMI(LoadStoreBB, DL, TII->get(ARM::CMPri)) + .addReg(MiniTempReg, RegState::Kill) + .addImm(0) + .add(predOps(ARMCC::AL)); + + // Branch to LoadStoreBB if failed + BuildMI(LoadStoreBB, DL, TII->get(ARM::Bcc)) + .addMBB(LoadStoreBB) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR, RegState::Kill); + + // Branch to DoneBB if success + BuildMI(LoadStoreBB, DL, TII->get(ARM::B)).addMBB(DoneBB); + + LoadStoreBB->addSuccessor(LoadStoreBB); + LoadStoreBB->addSuccessor(DoneBB); + + // Copy remaining instructions in MBB into DoneBB + DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); + DoneBB->transferSuccessors(&MBB); + + MBB.addSuccessor(LoadStoreBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + // Recompute livein lists. + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *DoneBB); + computeAndAddLiveIns(LiveRegs, *LoadStoreBB); + // Do an extra pass around the loop to get loop carried registers right. + LoadStoreBB->clearLiveIns(); + computeAndAddLiveIns(LiveRegs, *LoadStoreBB); + + return true; } /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop. @@ -1708,7 +1966,7 @@ unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); - addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); + addExclusiveRegPair(MIB, Dest.getReg(), RegState::Define, IsThumb, TRI); MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; @@ -1737,7 +1995,7 @@ unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); unsigned Flags = getKillRegState(New.isDead()); - addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI); + addExclusiveRegPair(MIB, New.getReg(), Flags, IsThumb, TRI); MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; @@ -2803,6 +3061,114 @@ case ARM::CMP_SWAP_64: return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); + case ARM::ATOMIC_LOAD_ADD_8: + case ARM::ATOMIC_LOAD_AND_8: + case ARM::ATOMIC_LOAD_MAX_8: + case ARM::ATOMIC_LOAD_MIN_8: + case ARM::ATOMIC_LOAD_NAND_8: + case ARM::ATOMIC_LOAD_OR_8: + case ARM::ATOMIC_LOAD_SUB_8: + case ARM::ATOMIC_LOAD_UMAX_8: + case ARM::ATOMIC_LOAD_UMIN_8: + case ARM::ATOMIC_LOAD_XOR_8: + case ARM::ATOMIC_SWAP_8: + return ExpandAtomicOp(MBB, MBBI, 8, Opcode, NextMBBI); + + case ARM::ATOMIC_LOAD_ADD_16: + case ARM::ATOMIC_LOAD_AND_16: + case ARM::ATOMIC_LOAD_FADD_16: + case ARM::ATOMIC_LOAD_FSUB_16: + case ARM::ATOMIC_LOAD_MAX_16: + case ARM::ATOMIC_LOAD_MIN_16: + case ARM::ATOMIC_LOAD_NAND_16: + case ARM::ATOMIC_LOAD_OR_16: + case ARM::ATOMIC_LOAD_SUB_16: + case ARM::ATOMIC_LOAD_UMAX_16: + case ARM::ATOMIC_LOAD_UMIN_16: + case ARM::ATOMIC_LOAD_XOR_16: + case ARM::ATOMIC_SWAP_16: + return ExpandAtomicOp(MBB, MBBI, 16, Opcode, NextMBBI); + + case ARM::ATOMIC_LOAD_ADD_32: + case ARM::ATOMIC_LOAD_AND_32: + case ARM::ATOMIC_LOAD_FADD_32: + case ARM::ATOMIC_LOAD_FSUB_32: + case ARM::ATOMIC_LOAD_MAX_32: + case ARM::ATOMIC_LOAD_MIN_32: + case ARM::ATOMIC_LOAD_NAND_32: + case ARM::ATOMIC_LOAD_OR_32: + case ARM::ATOMIC_LOAD_SUB_32: + case ARM::ATOMIC_LOAD_UMAX_32: + case ARM::ATOMIC_LOAD_UMIN_32: + case ARM::ATOMIC_LOAD_XOR_32: + case ARM::ATOMIC_SWAP_32: + return ExpandAtomicOp(MBB, MBBI, 32, Opcode, NextMBBI); + + case ARM::ATOMIC_LOAD_ADD_64: + case ARM::ATOMIC_LOAD_AND_64: + case ARM::ATOMIC_LOAD_FADD_64: + case ARM::ATOMIC_LOAD_FSUB_64: + case ARM::ATOMIC_LOAD_MAX_64: + case ARM::ATOMIC_LOAD_MIN_64: + case ARM::ATOMIC_LOAD_NAND_64: + case ARM::ATOMIC_LOAD_OR_64: + case ARM::ATOMIC_LOAD_SUB_64: + case ARM::ATOMIC_LOAD_UMAX_64: + case ARM::ATOMIC_LOAD_UMIN_64: + case ARM::ATOMIC_LOAD_XOR_64: + case ARM::ATOMIC_SWAP_64: + return ExpandAtomicOp(MBB, MBBI, 64, Opcode, NextMBBI); + + case ARM::CallWrapped: { + MachineFunction &MF = *MI.getParent()->getParent(); + DebugLoc dl = MI.getDebugLoc(); + MIBundleBuilder Bundler(MBB, MI); + unsigned OpIdx = 0; + + // If we have immediates before the callee turn them into a FIXED_INST + if (MI.getOperand(OpIdx).isImm()) { + Bundler.append(BuildMI(MF, dl, TII->get(ARM::FIXED_INST)) + .addImm(MI.getOperand(OpIdx).getImm()) + .addImm(MI.getOperand(OpIdx + 1).getImm())); + OpIdx += 2; + } + + // Figure out what kind of call instruction we need + unsigned newOpc; + if (AFI->isThumbFunction()) { + if (MI.getOperand(OpIdx).isReg()) + newOpc = STI->hasV5TOps() ? ARM::tBLXr : ARM::tBX_CALL; + else + newOpc = ARM::tBL; + } else { + if (MI.getOperand(OpIdx).isReg()) + newOpc = STI->hasV5TOps() ? ARM::BLX : ARM::BX_CALL; + else + newOpc = ARM::BL; + } + + // Emit the call + MachineInstrBuilder MIB = BuildMI(MF, dl, TII->get(newOpc)); + if (AFI->isThumbFunction()) + MIB.add(predOps(ARMCC::AL)); + MIB.add(MI.getOperand(OpIdx)); + Bundler.append(MIB); + OpIdx++; + + // If we have immediates after the callee turn them into a FIXED_INST + if (MI.getOperand(OpIdx).isImm()) { + Bundler.append(BuildMI(MF, dl, TII->get(ARM::FIXED_INST)) + .addImm(MI.getOperand(OpIdx).getImm()) + .addImm(MI.getOperand(OpIdx + 1).getImm())); + OpIdx += 2; + } + + finalizeBundle(MBB, Bundler.begin(), Bundler.end()); + MF.eraseCallSiteInfo(&MI); + MI.eraseFromParent(); + return true; + } + case ARM::tBL_PUSHLR: case ARM::BL_PUSHLR: { const bool Thumb = Opcode == ARM::tBL_PUSHLR; diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -310,6 +310,7 @@ void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); void SelectCMP_SWAP(SDNode *N); + void SelectAtomicOp(SDNode *N); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -3318,6 +3319,142 @@ CurDAG->RemoveDeadNode(N); } +/// Expand atomic operations to size- and type-specific pseudo-instructions +void ARMDAGToDAGISel::SelectAtomicOp(SDNode *N) { + EVT MemTy = cast(N)->getMemoryVT(); + const unsigned Opcode = [&]() { + switch (N->getOpcode()) { + case ISD::ATOMIC_SWAP: + if (MemTy == MVT::i8) + return ARM::ATOMIC_SWAP_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_SWAP_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_SWAP_32; + break; + case ISD::ATOMIC_LOAD_ADD: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_ADD_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_ADD_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_ADD_32; + break; + case ISD::ATOMIC_LOAD_SUB: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_SUB_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_SUB_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_SUB_32; + break; + case ISD::ATOMIC_LOAD_AND: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_AND_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_AND_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_AND_32; + break; + case ISD::ATOMIC_LOAD_CLR: + llvm_unreachable("ATOMIC_LOAD_CLR in SelectAtomicOp"); + break; + case ISD::ATOMIC_LOAD_OR: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_OR_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_OR_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_OR_32; + break; + case ISD::ATOMIC_LOAD_XOR: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_XOR_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_XOR_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_XOR_32; + break; + case ISD::ATOMIC_LOAD_NAND: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_NAND_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_NAND_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_NAND_32; + break; + case ISD::ATOMIC_LOAD_MIN: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_MIN_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_MIN_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_MIN_32; + break; + case ISD::ATOMIC_LOAD_MAX: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_MAX_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_MAX_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_MAX_32; + break; + case ISD::ATOMIC_LOAD_UMIN: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_UMIN_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_UMIN_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_UMIN_32; + break; + case ISD::ATOMIC_LOAD_UMAX: + if (MemTy == MVT::i8) + return ARM::ATOMIC_LOAD_UMAX_8; + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_UMAX_16; + if (MemTy == MVT::i32) + return ARM::ATOMIC_LOAD_UMAX_32; + break; + case ISD::ATOMIC_LOAD_FADD: + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_FADD_16; // f16 promoted to f32 + if (MemTy == MVT::f16) + return ARM::ATOMIC_LOAD_FADD_16; + if (MemTy == MVT::f32) + return ARM::ATOMIC_LOAD_FADD_32; + if (MemTy == MVT::f64) + return ARM::ATOMIC_LOAD_FADD_64; + break; + case ISD::ATOMIC_LOAD_FSUB: + if (MemTy == MVT::i16) + return ARM::ATOMIC_LOAD_FSUB_16; // f16 promoted to f32 + if (MemTy == MVT::f16) + return ARM::ATOMIC_LOAD_FSUB_16; + if (MemTy == MVT::f32) + return ARM::ATOMIC_LOAD_FSUB_32; + if (MemTy == MVT::f64) + return ARM::ATOMIC_LOAD_FSUB_64; + break; + } + llvm_unreachable("Unknown AtomicOp type"); + return ARM::INSTRUCTION_LIST_END; + }(); + + SDValue Chain = N->getOperand(0); + SDValue Addr = N->getOperand(1); + SDValue Value = N->getOperand(2); + SDNode *Swap = CurDAG->getMachineNode( + Opcode, SDLoc(N), CurDAG->getVTList(Value.getValueType(), MVT::Other), + {Chain, Addr, Value}); + + MachineMemOperand *MemOp = cast(N)->getMemOperand(); + CurDAG->setNodeMemRefs(cast(Swap), {MemOp}); + + ReplaceUses(SDValue(N, 0), SDValue(Swap, 0)); // Result + ReplaceUses(SDValue(N, 1), SDValue(Swap, 1)); // Chain + CurDAG->RemoveDeadNode(N); +} + static Optional> getContiguousRangeOfSetBits(const APInt &A) { unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; @@ -5028,6 +5165,39 @@ case ISD::ATOMIC_CMP_SWAP: SelectCMP_SWAP(N); return; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_FADD: + case ISD::ATOMIC_LOAD_FSUB: + case ISD::ATOMIC_SWAP: + SelectAtomicOp(N); + return; + + case ARMISD::CALL_WRAPPED: { + std::vector Ops; + // First append the normal operands + SDNode *Glue = N->getGluedNode(); + for (unsigned int i = 1; i < N->getNumOperands() - (Glue ? 1 : 0); i++) { + Ops.push_back(N->getOperand(i)); + } + // Then the chain operand + Ops.push_back(N->getOperand(0)); + // Then the glue operand + if (Glue) + Ops.push_back(N->getOperand(N->getNumOperands() - 1)); + CurDAG->SelectNodeTo(N, ARM::CallWrapped, N->getVTList(), Ops); + return; + } } SelectCode(N); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -19001,6 +19001,11 @@ // and up to 64 bits on the non-M profiles TargetLowering::AtomicExpansionKind ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + // At -O0 expand pseudo-instructions after register allocation to avoid + // inserting spills between ldrex/strex. + if (getTargetMachine().getOptLevel() == 0 && !Subtarget->isThumb()) + return AtomicExpansionKind::None; + if (AI->isFloatingPointOperation()) return AtomicExpansionKind::CmpXChg; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -6424,6 +6424,37 @@ NoItinerary, []>, Sched<[]>; } +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", + mayLoad = 1, mayStore = 1 in +multiclass AtomicRMW { + def _8 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$new), NoItinerary, []>, Sched<[]>; + def _16 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$new), NoItinerary, []>, Sched<[]>; + def _32 : PseudoInst<(outs GPR:$Rd, GPR:$temp), (ins GPR:$addr, GPR:$new), NoItinerary, []>, Sched<[]>; + def _64 : PseudoInst<(outs GPRPair:$Rd, GPR:$temp), (ins GPR:$addr, GPRPair:$new), NoItinerary, []>, Sched<[]>; +} +defm ATOMIC_SWAP : AtomicRMW; +defm ATOMIC_LOAD_ADD : AtomicRMW; +defm ATOMIC_LOAD_SUB : AtomicRMW; +defm ATOMIC_LOAD_AND : AtomicRMW; +defm ATOMIC_LOAD_CLR : AtomicRMW; +defm ATOMIC_LOAD_OR : AtomicRMW; +defm ATOMIC_LOAD_XOR : AtomicRMW; +defm ATOMIC_LOAD_NAND : AtomicRMW; +defm ATOMIC_LOAD_MIN : AtomicRMW; +defm ATOMIC_LOAD_MAX : AtomicRMW; +defm ATOMIC_LOAD_UMIN : AtomicRMW; +defm ATOMIC_LOAD_UMAX : AtomicRMW; +// FADD and FSUB have GPRPair temporary for ldrexd/strexd and the return value of strexd, but DPR for result. +let Constraints = "@earlyclobber $Rd,@earlyclobber $temp", + mayLoad = 1, mayStore = 1 in +multiclass AtomicRMWFloat { + def _16 : PseudoInst<(outs SPR:$Rd, GPR:$temp), (ins GPR:$addr, SPR:$new), NoItinerary, []>, Sched<[]>; + def _32 : PseudoInst<(outs SPR:$Rd, GPR:$temp), (ins GPR:$addr, SPR:$new), NoItinerary, []>, Sched<[]>; + def _64 : PseudoInst<(outs DPR:$Rd, GPRPair:$temp), (ins GPR:$addr, DPR:$new), NoItinerary, []>, Sched<[]>; +} +defm ATOMIC_LOAD_FADD : AtomicRMWFloat; +defm ATOMIC_LOAD_FSUB : AtomicRMWFloat; + def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, [(atomic_fence timm:$ordering, 0)]> { let hasSideEffects = 1; diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_all.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_all.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_all.ll @@ -0,0 +1,1024 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; Test the instruction sequences produced by atomicrmw instructions. In +; particular, ensure there are no stores/spills inserted between the exclusive +; load and stores, which would invalidate the exclusive monitor. + +; atomicrmw xchg for floating point types are not implemented yet, so the tests +; are commented. + +; RUN: llc -O0 -o - %s | FileCheck %s --check-prefix=CHECK +target triple = "armv7-none-eabi" + +@atomic_i8 = external global i8 +@atomic_i16 = external global i16 +@atomic_i32 = external global i32 +@atomic_i64 = external global i64 + +@atomic_half = external global half +@atomic_float = external global float +@atomic_double = external global double + +define i8 @test_xchg_i8() { +; CHECK-LABEL: test_xchg_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB0_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: b .LBB0_2 +; CHECK-NEXT: .LBB0_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw xchg i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_add_i8() { +; CHECK-LABEL: test_add_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB1_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: add r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB1_1 +; CHECK-NEXT: b .LBB1_2 +; CHECK-NEXT: .LBB1_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw add i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_sub_i8() { +; CHECK-LABEL: test_sub_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB2_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: sub r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB2_1 +; CHECK-NEXT: b .LBB2_2 +; CHECK-NEXT: .LBB2_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw sub i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_and_i8() { +; CHECK-LABEL: test_and_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB3_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: and r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB3_1 +; CHECK-NEXT: b .LBB3_2 +; CHECK-NEXT: .LBB3_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw and i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_nand_i8() { +; CHECK-LABEL: test_nand_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB4_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: and r0, r0, r3 +; CHECK-NEXT: mvn r0, r0 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB4_1 +; CHECK-NEXT: b .LBB4_2 +; CHECK-NEXT: .LBB4_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw nand i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_or_i8() { +; CHECK-LABEL: test_or_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB5_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: orr r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB5_1 +; CHECK-NEXT: b .LBB5_2 +; CHECK-NEXT: .LBB5_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw or i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_xor_i8() { +; CHECK-LABEL: test_xor_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB6_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: eor r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB6_1 +; CHECK-NEXT: b .LBB6_2 +; CHECK-NEXT: .LBB6_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw xor i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_max_i8() { +; CHECK-LABEL: test_max_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB7_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movle r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB7_1 +; CHECK-NEXT: b .LBB7_2 +; CHECK-NEXT: .LBB7_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw max i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_min_i8() { +; CHECK-LABEL: test_min_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB8_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movge r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB8_1 +; CHECK-NEXT: b .LBB8_2 +; CHECK-NEXT: .LBB8_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw min i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_umax_i8() { +; CHECK-LABEL: test_umax_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB9_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movle r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB9_1 +; CHECK-NEXT: b .LBB9_2 +; CHECK-NEXT: .LBB9_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw umax i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} +define i8 @test_umin_i8() { +; CHECK-LABEL: test_umin_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i8 +; CHECK-NEXT: movt r2, :upper16:atomic_i8 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB10_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexb r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movge r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexb r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB10_1 +; CHECK-NEXT: b .LBB10_2 +; CHECK-NEXT: .LBB10_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw umin i8* @atomic_i8, i8 1 monotonic + ret i8 %0 +} + + +define i16 @test_xchg_i16() { +; CHECK-LABEL: test_xchg_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB11_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB11_1 +; CHECK-NEXT: b .LBB11_2 +; CHECK-NEXT: .LBB11_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw xchg i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_add_i16() { +; CHECK-LABEL: test_add_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB12_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: add r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB12_1 +; CHECK-NEXT: b .LBB12_2 +; CHECK-NEXT: .LBB12_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw add i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_sub_i16() { +; CHECK-LABEL: test_sub_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB13_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: sub r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB13_1 +; CHECK-NEXT: b .LBB13_2 +; CHECK-NEXT: .LBB13_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw sub i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_and_i16() { +; CHECK-LABEL: test_and_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB14_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: and r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB14_1 +; CHECK-NEXT: b .LBB14_2 +; CHECK-NEXT: .LBB14_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw and i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_nand_i16() { +; CHECK-LABEL: test_nand_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB15_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: and r0, r0, r3 +; CHECK-NEXT: mvn r0, r0 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB15_1 +; CHECK-NEXT: b .LBB15_2 +; CHECK-NEXT: .LBB15_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw nand i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_or_i16() { +; CHECK-LABEL: test_or_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB16_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: orr r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB16_1 +; CHECK-NEXT: b .LBB16_2 +; CHECK-NEXT: .LBB16_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw or i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_xor_i16() { +; CHECK-LABEL: test_xor_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB17_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: eor r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB17_1 +; CHECK-NEXT: b .LBB17_2 +; CHECK-NEXT: .LBB17_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw xor i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_max_i16() { +; CHECK-LABEL: test_max_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB18_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movle r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB18_1 +; CHECK-NEXT: b .LBB18_2 +; CHECK-NEXT: .LBB18_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw max i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_min_i16() { +; CHECK-LABEL: test_min_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB19_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movge r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB19_1 +; CHECK-NEXT: b .LBB19_2 +; CHECK-NEXT: .LBB19_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw min i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_umax_i16() { +; CHECK-LABEL: test_umax_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB20_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movle r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB20_1 +; CHECK-NEXT: b .LBB20_2 +; CHECK-NEXT: .LBB20_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw umax i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} +define i16 @test_umin_i16() { +; CHECK-LABEL: test_umin_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i16 +; CHECK-NEXT: movt r2, :upper16:atomic_i16 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB21_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movge r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strexh r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB21_1 +; CHECK-NEXT: b .LBB21_2 +; CHECK-NEXT: .LBB21_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw umin i16* @atomic_i16, i16 1 monotonic + ret i16 %0 +} + + +define i32 @test_xchg_i32() { +; CHECK-LABEL: test_xchg_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB22_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB22_1 +; CHECK-NEXT: b .LBB22_2 +; CHECK-NEXT: .LBB22_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw xchg i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_add_i32() { +; CHECK-LABEL: test_add_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB23_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: add r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB23_1 +; CHECK-NEXT: b .LBB23_2 +; CHECK-NEXT: .LBB23_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw add i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_sub_i32() { +; CHECK-LABEL: test_sub_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB24_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: sub r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB24_1 +; CHECK-NEXT: b .LBB24_2 +; CHECK-NEXT: .LBB24_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw sub i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_and_i32() { +; CHECK-LABEL: test_and_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB25_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: and r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB25_1 +; CHECK-NEXT: b .LBB25_2 +; CHECK-NEXT: .LBB25_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw and i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_nand_i32() { +; CHECK-LABEL: test_nand_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB26_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: and r0, r0, r3 +; CHECK-NEXT: mvn r0, r0 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB26_1 +; CHECK-NEXT: b .LBB26_2 +; CHECK-NEXT: .LBB26_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw nand i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_or_i32() { +; CHECK-LABEL: test_or_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB27_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: orr r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB27_1 +; CHECK-NEXT: b .LBB27_2 +; CHECK-NEXT: .LBB27_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw or i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_xor_i32() { +; CHECK-LABEL: test_xor_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB28_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: eor r0, r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB28_1 +; CHECK-NEXT: b .LBB28_2 +; CHECK-NEXT: .LBB28_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw xor i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_max_i32() { +; CHECK-LABEL: test_max_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB29_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movle r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB29_1 +; CHECK-NEXT: b .LBB29_2 +; CHECK-NEXT: .LBB29_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw max i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_min_i32() { +; CHECK-LABEL: test_min_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB30_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movge r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB30_1 +; CHECK-NEXT: b .LBB30_2 +; CHECK-NEXT: .LBB30_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw min i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_umax_i32() { +; CHECK-LABEL: test_umax_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB31_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movle r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB31_1 +; CHECK-NEXT: b .LBB31_2 +; CHECK-NEXT: .LBB31_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw umax i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} +define i32 @test_umin_i32() { +; CHECK-LABEL: test_umin_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_i32 +; CHECK-NEXT: movt r2, :upper16:atomic_i32 +; CHECK-NEXT: mov r3, #1 +; CHECK-NEXT: .LBB32_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r1, [r2] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: cmp r0, r3 +; CHECK-NEXT: movge r0, r3 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: strex r1, r1, [r2] +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: bne .LBB32_1 +; CHECK-NEXT: b .LBB32_2 +; CHECK-NEXT: .LBB32_2: @ %entry +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw umin i32* @atomic_i32, i32 1 monotonic + ret i32 %0 +} + + + +define i64 @test_xchg_i64() { +; CHECK-LABEL: test_xchg_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-NEXT: mov r2, #1 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: bl __sync_lock_test_and_set_8 +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = atomicrmw xchg i64* @atomic_i64, i64 1 monotonic + ret i64 %0 +} +define i64 @test_add_i64() { +; CHECK-LABEL: test_add_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-NEXT: mov r2, #1 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: bl __sync_fetch_and_add_8 +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = atomicrmw add i64* @atomic_i64, i64 1 monotonic + ret i64 %0 +} +define i64 @test_sub_i64() { +; CHECK-LABEL: test_sub_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-NEXT: mov r2, #1 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: bl __sync_fetch_and_sub_8 +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = atomicrmw sub i64* @atomic_i64, i64 1 monotonic + ret i64 %0 +} +define i64 @test_and_i64() { +; CHECK-LABEL: test_and_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-NEXT: mov r2, #1 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: bl __sync_fetch_and_and_8 +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = atomicrmw and i64* @atomic_i64, i64 1 monotonic + ret i64 %0 +} +define i64 @test_nand_i64() { +; CHECK-LABEL: test_nand_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-NEXT: mov r2, #1 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: bl __sync_fetch_and_nand_8 +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = atomicrmw nand i64* @atomic_i64, i64 1 monotonic + ret i64 %0 +} +define i64 @test_or_i64() { +; CHECK-LABEL: test_or_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-NEXT: mov r2, #1 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: bl __sync_fetch_and_or_8 +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = atomicrmw or i64* @atomic_i64, i64 1 monotonic + ret i64 %0 +} +define i64 @test_xor_i64() { +; CHECK-LABEL: test_xor_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: movw r0, :lower16:atomic_i64 +; CHECK-NEXT: movt r0, :upper16:atomic_i64 +; CHECK-NEXT: mov r2, #1 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: bl __sync_fetch_and_xor_8 +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = atomicrmw xor i64* @atomic_i64, i64 1 monotonic + ret i64 %0 +} + + +; ; Test floats +; define half @test_xchg_half() { +; entry: +; %0 = atomicrmw xchg half* @atomic_half, half 1.0 monotonic +; ret half %0 +; } +define half @test_fadd_half() { +; CHECK-LABEL: test_fadd_half: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: movw r1, :lower16:atomic_half +; CHECK-NEXT: movt r1, :upper16:atomic_half +; CHECK-NEXT: vmov.f32 s2, #1.000000e+00 +; CHECK-NEXT: .LBB40_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r0, [r1] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vadd.f32 s0, s0, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: strexh r0, r0, [r1] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bne .LBB40_1 +; CHECK-NEXT: b .LBB40_2 +; CHECK-NEXT: .LBB40_2: @ %entry +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = atomicrmw fadd half* @atomic_half, half 1.0 monotonic + ret half %0 +} +define half @test_fsub_half() { +; CHECK-LABEL: test_fsub_half: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: movw r1, :lower16:atomic_half +; CHECK-NEXT: movt r1, :upper16:atomic_half +; CHECK-NEXT: vmov.f32 s2, #1.000000e+00 +; CHECK-NEXT: .LBB41_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexh r0, [r1] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vsub.f32 s0, s0, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: strexh r0, r0, [r1] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bne .LBB41_1 +; CHECK-NEXT: b .LBB41_2 +; CHECK-NEXT: .LBB41_2: @ %entry +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: pop {r11, pc} +entry: + %0 = atomicrmw fsub half* @atomic_half, half 1.0 monotonic + ret half %0 +} +; define float @test_xchg_float() { +; entry: +; %0 = atomicrmw xchg float* @atomic_float, float 1.0 monotonic +; ret float %0 +; } +define float @test_fadd_float() { +; CHECK-LABEL: test_fadd_float: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r1, :lower16:atomic_float +; CHECK-NEXT: movt r1, :upper16:atomic_float +; CHECK-NEXT: vmov.f32 s2, #1.000000e+00 +; CHECK-NEXT: .LBB42_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r0, [r1] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vadd.f32 s0, s0, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: strex r0, r0, [r1] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bne .LBB42_1 +; CHECK-NEXT: b .LBB42_2 +; CHECK-NEXT: .LBB42_2: @ %entry +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw fadd float* @atomic_float, float 1.0 monotonic + ret float %0 +} +define float @test_fsub_float() { +; CHECK-LABEL: test_fsub_float: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r1, :lower16:atomic_float +; CHECK-NEXT: movt r1, :upper16:atomic_float +; CHECK-NEXT: vmov.f32 s2, #1.000000e+00 +; CHECK-NEXT: .LBB43_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrex r0, [r1] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vsub.f32 s0, s0, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: strex r0, r0, [r1] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bne .LBB43_1 +; CHECK-NEXT: b .LBB43_2 +; CHECK-NEXT: .LBB43_2: @ %entry +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw fsub float* @atomic_float, float 1.0 monotonic + ret float %0 +} +; define double @test_xchg_double() { +; entry: +; %0 = atomicrmw xchg double* @atomic_double, double 1.0 monotonic +; ret double %0 +; } +define double @test_fadd_double() { +; CHECK-LABEL: test_fadd_double: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_double +; CHECK-NEXT: movt r2, :upper16:atomic_double +; CHECK-NEXT: vmov.f64 d17, #1.000000e+00 +; CHECK-NEXT: .LBB44_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexd r0, r1, [r2] +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vadd.f64 d16, d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: strexd r0, r0, r1, [r2] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bne .LBB44_1 +; CHECK-NEXT: b .LBB44_2 +; CHECK-NEXT: .LBB44_2: @ %entry +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw fadd double* @atomic_double, double 1.0 monotonic + ret double %0 +} +define double @test_fsub_double() { +; CHECK-LABEL: test_fsub_double: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: movw r2, :lower16:atomic_double +; CHECK-NEXT: movt r2, :upper16:atomic_double +; CHECK-NEXT: vmov.f64 d17, #1.000000e+00 +; CHECK-NEXT: .LBB45_1: @ %entry +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldrexd r0, r1, [r2] +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vsub.f64 d16, d16, d17 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: strexd r0, r0, r1, [r2] +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bne .LBB45_1 +; CHECK-NEXT: b .LBB45_2 +; CHECK-NEXT: .LBB45_2: @ %entry +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr +entry: + %0 = atomicrmw fsub double* @atomic_double, double 1.0 monotonic + ret double %0 +} diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll --- a/llvm/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll +++ b/llvm/test/Transforms/AtomicExpand/ARM/atomicrmw-fp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s +; RUN: opt -O1 -S -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s define float @test_atomicrmw_fadd_f32(float* %ptr, float %value) { ; CHECK-LABEL: @test_atomicrmw_fadd_f32(