diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -161,6 +161,9 @@ def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", "HasPartwordAtomics", "true", "Enable l[bh]arx and st[bh]cx.">; +def FeatureQuadwordAtomic : SubtargetFeature<"quadword-atomics", + "HasQuadwordAtomics", "true", + "Enable lqarx and stqcx.">; def FeatureInvariantFunctionDescriptors : SubtargetFeature<"invariant-function-descriptors", "HasInvariantFunctionDescriptors", "true", @@ -327,6 +330,7 @@ FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic, + FeatureQuadwordAtomic, FeaturePredictableSelectIsExpensive ]; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -573,6 +573,8 @@ ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16, + ATOMIC_SWAP_128, + /// GPRC = TOC_ENTRY GA, TOC /// Loads the entry for GA from the TOC, where the TOC base is given by /// the last operand. @@ -879,6 +881,11 @@ MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; + MachineBasicBlock *EmitQuadwordAtomicBinary(MachineInstr &MI, + MachineBasicBlock *MBB, + unsigned BinOpcode, + unsigned CmpOpcode = 0, + unsigned CmpPred = 0) const; MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, @@ -1224,6 +1231,7 @@ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerQuadwordAtomicOps(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1280,6 +1280,10 @@ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } + if (isPPC64 && Subtarget.hasQuadwordAtomics() && Subtarget.isAIXABI()) { + setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom); + } + setBooleanContents(ZeroOrOneBooleanContent); if (Subtarget.hasAltivec()) { @@ -1603,6 +1607,7 @@ case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; + case PPCISD::ATOMIC_SWAP_128: return "PPCISD::ATOMIC_SWAP_128"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA"; @@ -10466,6 +10471,34 @@ return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO); } +SDValue PPCTargetLowering::LowerQuadwordAtomicOps(SDValue Op, + SelectionDAG &DAG) const { + AtomicSDNode *N = cast(Op.getNode()); + EVT MemVT = N->getMemoryVT(); + MVT VT = MemVT.getSimpleVT(); + assert(VT == MVT::i128 && "Expect quadword atomic operations"); + SDLoc dl(N); + unsigned Opc = N->getOpcode(); + switch (Opc) { + case ISD::ATOMIC_SWAP: { + SDValue Incr = N->getOperand(2); + SDValue RHSHi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i64, Incr, + DAG.getIntPtrConstant(1, dl)); + SDValue RHSLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i64, Incr, + DAG.getIntPtrConstant(0, dl)); + SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other); + SmallVector Ops = {N->getOperand(0), N->getOperand(1), RHSHi, + RHSLo}; + MachineMemOperand *MMO = N->getMemOperand(); + SDValue Res = DAG.getMemIntrinsicNode(PPCISD::ATOMIC_SWAP_128, dl, Tys, Ops, + MemVT, MMO); + return Res; + } + default: + llvm_unreachable("Unhandle quadword operations"); + } +} + SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -10867,6 +10900,16 @@ switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); + case ISD::ATOMIC_SWAP: { + SDValue Res = LowerQuadwordAtomicOps(SDValue(N, 0), DAG); + if (Res) { + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i128, + Res.getValue(1), Res); + Results.push_back(Pair); + Results.push_back(Res.getValue(2)); + } + return; + } case ISD::READCYCLECOUNTER: { SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); @@ -10981,6 +11024,199 @@ return nullptr; } +MachineBasicBlock *PPCTargetLowering::EmitQuadwordAtomicBinary( + MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, + unsigned CmpOpcode, unsigned CmpPred) const { + assert(Subtarget.isPPC64() && + "Quadword lock free atomic operations is not supported"); + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto LoadMnemonic = PPC::LQARX; + auto StoreMnemonic = PPC::STQCX; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator It = ++MBB->getIterator(); + MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *CmpFallThroughMBB = + CmpOpcode ? MF->CreateMachineBasicBlock(BB) : nullptr; + MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(It, LoopMBB); + if (CmpFallThroughMBB) + MF->insert(It, CmpFallThroughMBB); + MF->insert(It, ExitMBB); + ExitMBB->splice(ExitMBB->begin(), MBB, std::next(MI.getIterator()), + MBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(MBB); + // For min/max binary operations + // MBB: + // ... + // LoopMBB: + // lqarx in, ptr + // cmpld in.sub_x0, op.sub_x0 + // %cmphi = crandc cr0.sub_lt, cr0.sub_eq + // %hi_eq = copy cr0.sub_eq + // cmpld in.sub_x1, op.sub_x1 + // %cmplow = crand %hi_eq, cr0.sub_lt + // %res = cror %cmphi, %cmplow + // bc %res, ExitMBB + // CmpFallThroughMBB: + // stqcx op, ptr + // bne- LoopMBB + // ExitMBB: + // ... + + // Otherwise + // MBB: + // ... + // LoopMBB: + // lqarx in, ptr + // addc out.sub_x1, in.sub_x1, op.sub_x1 + // adde out.sub_x0, in.sub_x0, op.sub_x0 + // stqcx out, ptr + // bne- LoopMBB + // ExitMBB: + // ... + MBB->addSuccessor(LoopMBB); + MachineBasicBlock *CurrentMBB = LoopMBB; + Register OutRegSub0 = MI.getOperand(0).getReg(); + Register OutRegSub1 = MI.getOperand(1).getReg(); + Register RAReg = MI.getOperand(2).getReg(); + Register RBReg = MI.getOperand(3).getReg(); + Register DeltaSub0 = MI.getOperand(4).getReg(); + Register DeltaSub1 = MI.getOperand(5).getReg(); + Register PairedLoadReg = MRI.createVirtualRegister(&PPC::G8pRCRegClass); + Register PairedStoreReg = MRI.createVirtualRegister(&PPC::G8pRCRegClass); + + BuildMI(CurrentMBB, DL, TII->get(LoadMnemonic), PairedLoadReg) + .addReg(RAReg) + .addReg(RBReg); + + if (BinOpcode == 0) { + // The return value of min/max is the original value that was stored in + // memory before comparison. + // See https://llvm.org/docs/LangRef.html#atomicrmw-instruction + BuildMI(CurrentMBB, DL, TII->get(TargetOpcode::COPY)) + .addDef(OutRegSub0) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x0); + BuildMI(CurrentMBB, DL, TII->get(TargetOpcode::COPY)) + .addDef(OutRegSub1) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x1); + if (CmpOpcode == PPC::CMPD) { + // Indicate signed comparison. + BuildMI(CurrentMBB, DL, TII->get(PPC::CMPLD)) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x0) + .addUse(DeltaSub0); + Register HiEq = MRI.createVirtualRegister(&PPC::CRBITRCRegClass); + BuildMI(CurrentMBB, DL, TII->get(TargetOpcode::COPY), HiEq) + .addUse(PPC::CR0, 0, PPC::sub_eq); + BuildMI(CurrentMBB, DL, TII->get(PPC::CMPD)) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x0) + .addUse(DeltaSub0); + Register CmpHi = MRI.createVirtualRegister(&PPC::CRBITRCRegClass); + BuildMI(CurrentMBB, DL, TII->get(PPC::CRANDC), CmpHi) + .addUse(PPC::CR0, 0, CmpPred) + .addUse(HiEq); + BuildMI(CurrentMBB, DL, TII->get(PPC::CMPLD)) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x1) + .addUse(DeltaSub1); + Register CmpLow = MRI.createVirtualRegister(&PPC::CRBITRCRegClass); + BuildMI(CurrentMBB, DL, TII->get(PPC::CRAND), CmpLow) + .addUse(HiEq) + .addUse(PPC::CR0, 0, CmpPred); + Register Res = MRI.createVirtualRegister(&PPC::CRBITRCRegClass); + BuildMI(CurrentMBB, DL, TII->get(PPC::CROR), Res) + .addReg(CmpHi) + .addReg(CmpHi); + BuildMI(CurrentMBB, DL, TII->get(PPC::BC)).addReg(Res).addMBB(ExitMBB); + } else if (CmpOpcode == PPC::CMPLD) { + // Indicate unsigned comparison. + BuildMI(CurrentMBB, DL, TII->get(PPC::CMPLD)) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x0) + .addUse(DeltaSub0); + Register HiEq = MRI.createVirtualRegister(&PPC::CRBITRCRegClass); + BuildMI(CurrentMBB, DL, TII->get(TargetOpcode::COPY), HiEq) + .addUse(PPC::CR0, 0, PPC::sub_eq); + Register CmpHi = MRI.createVirtualRegister(&PPC::CRBITRCRegClass); + BuildMI(CurrentMBB, DL, TII->get(PPC::CRANDC), CmpHi) + .addUse(PPC::CR0, 0, CmpPred) + .addUse(HiEq); + BuildMI(CurrentMBB, DL, TII->get(PPC::CMPLD)) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x1) + .addUse(DeltaSub1); + Register CmpLow = MRI.createVirtualRegister(&PPC::CRBITRCRegClass); + BuildMI(CurrentMBB, DL, TII->get(PPC::CRAND), CmpLow) + .addUse(HiEq) + .addUse(PPC::CR0, 0, CmpPred); + Register Res = MRI.createVirtualRegister(&PPC::CRBITRCRegClass); + BuildMI(CurrentMBB, DL, TII->get(PPC::CROR), Res) + .addReg(CmpHi) + .addReg(CmpHi); + BuildMI(CurrentMBB, DL, TII->get(PPC::BC)).addReg(Res).addMBB(ExitMBB); + } + if (CmpOpcode) { + CurrentMBB->addSuccessor(ExitMBB); + CurrentMBB->addSuccessor(CmpFallThroughMBB); + CurrentMBB = CmpFallThroughMBB; + } + BuildMI(CurrentMBB, DL, TII->get(PPC::BUILD_QUADWORD), PairedStoreReg) + .addReg(DeltaSub0) + .addReg(DeltaSub1); + } else { + switch (BinOpcode) { + case PPC::ADD8: + BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8)) + .addDef(OutRegSub1, 0, PPC::sub_gp8_x1) + .addUse(DeltaSub1) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x1); + BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8)) + .addDef(OutRegSub0, 0, PPC::sub_gp8_x0) + .addUse(DeltaSub0) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x0); + break; + case PPC::SUBF8: + BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8)) + .addDef(OutRegSub1, 0, PPC::sub_gp8_x1) + .addUse(DeltaSub1) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x1); + BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8)) + .addDef(OutRegSub0, 0, PPC::sub_gp8_x0) + .addUse(DeltaSub0) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x0); + break; + case PPC::AND8: + case PPC::XOR8: + case PPC::OR8: + BuildMI(CurrentMBB, DL, TII->get(BinOpcode)) + .addDef(OutRegSub0, 0, PPC::sub_gp8_x0) + .addUse(DeltaSub0) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x0); + BuildMI(CurrentMBB, DL, TII->get(BinOpcode)) + .addDef(OutRegSub1, 0, PPC::sub_gp8_x1) + .addUse(DeltaSub1) + .addUse(PairedLoadReg, 0, PPC::sub_gp8_x1); + break; + default: + llvm_unreachable("Unexpected quadword atomic binary opcode"); + } + BuildMI(CurrentMBB, DL, TII->get(PPC::BUILD_QUADWORD), PairedStoreReg) + .addReg(OutRegSub0) + .addReg(OutRegSub1); + } + BuildMI(CurrentMBB, DL, TII->get(StoreMnemonic)) + .addReg(PairedStoreReg) + .addReg(RAReg) + .addReg(RBReg); + BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE) + .addReg(PPC::CR0) + .addMBB(LoopMBB); + CurrentMBB->addSuccessor(LoopMBB); + CurrentMBB->addSuccessor(ExitMBB); + return ExitMBB; +} + MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned AtomicSize, @@ -12372,7 +12608,9 @@ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) .addReg(TmpReg) .addReg(ShiftReg); - } else if (MI.getOpcode() == PPC::FADDrtz) { + } else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I128) { + BB = EmitQuadwordAtomicBinary(MI, BB, 0, 0, 0); + }else if (MI.getOpcode() == PPC::FADDrtz) { // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR // is not modeled at the SelectionDAG level. diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -265,6 +265,13 @@ def ATOMIC_SWAP_I64 : PPCCustomInserterPseudo< (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64", [(set i64:$dst, (atomic_swap_64 ForceXForm:$ptr, i64:$new))]>; + + def ATOMIC_SWAP_I128 : PPCCustomInserterPseudo< + (outs g8rc:$res_hi, g8rc:$res_lo), + (ins memrr:$ptr, g8rc:$incr_hi, g8rc:$incr_lo), + "#ATOMIC_SWAP_I128", + [(set i64:$res_hi, i64:$res_lo, + (PPCatomicSwap_128 ForceXForm:$ptr, i64:$incr_hi, i64:$incr_lo))]>; } // Instructions to support atomic operations @@ -274,7 +281,8 @@ // TODO: Add scheduling info. let hasNoSchedulingInfo = 1 in def LQARX : XForm_1_memOp<31, 276, (outs g8prc:$RTp), (ins memrr:$ptr), - "lqarx $RTp, $ptr", IIC_LdStLQARX, []>; + "lqarx $RTp, $ptr", IIC_LdStLQARX, []>, + Requires<[HasQuadwordAtomics]>; // Instruction to support lock versions of atomics // (EH=1 - see Power ISA 2.07 Book II 4.4.2) @@ -297,7 +305,8 @@ // TODO: Add scheduling info. let hasNoSchedulingInfo = 1 in def STQCX : XForm_1_memOp<31, 182, (outs), (ins g8prc:$RSp, memrr:$dst), - "stqcx. $RSp, $dst", IIC_LdStSTQCX, []>, isRecordForm; + "stqcx. $RSp, $dst", IIC_LdStSTQCX, []>, isRecordForm, + Requires<[HasQuadwordAtomics]>; } let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in @@ -1242,6 +1251,11 @@ Requires<[IsISA3_0]>; } +def BUILD_QUADWORD : PPCPostRAExpPseudo<(outs g8prc:$RTp), + (ins g8rc:$src0, g8rc:$src1), + "#BUILD_QUADWORD", []>, + isPPC64; + let mayLoad = 1, hasNoSchedulingInfo = 1 in { // Full 16-byte load. // Early clobber $RTp to avoid assigned to the same register as RA. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -3114,6 +3114,39 @@ MI.RemoveOperand(0); return true; } + case PPC::BUILD_QUADWORD: { + unsigned DestIdx = MI.getOperand(0).getReg() - PPC::G8p0; + Register DestX0 = PPC::X0 + 2 * DestIdx; + Register DestX1 = DestX0 + 1; + // We have to check we don't early clobber Src registers. + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + // The most tricky case, swapping values. + if (DestX0 == Src1 && DestX1 == Src0) { + BuildMI(MBB, MI, DL, get(PPC::XOR8), DestX0) + .addReg(DestX0) + .addReg(DestX1); + BuildMI(MBB, MI, DL, get(PPC::XOR8), DestX1) + .addReg(DestX0) + .addReg(DestX1); + BuildMI(MBB, MI, DL, get(PPC::XOR8), DestX0) + .addReg(DestX0) + .addReg(DestX1); + } else if (DestX0 != Src0 || DestX1 != Src1) { + if (DestX0 == Src1 || DestX1 != Src0) { + BuildMI(MBB, MI, DL, get(PPC::OR8), DestX1).addReg(Src1).addReg(Src1); + BuildMI(MBB, MI, DL, get(PPC::OR8), DestX0).addReg(Src0).addReg(Src0); + } else { + BuildMI(MBB, MI, DL, get(PPC::OR8), DestX0).addReg(Src0).addReg(Src0); + BuildMI(MBB, MI, DL, get(PPC::OR8), DestX1).addReg(Src1).addReg(Src1); + } + } + MI.setDesc(get(PPC::UNENCODED_NOP)); + MI.RemoveOperand(2); + MI.RemoveOperand(1); + MI.RemoveOperand(0); + return true; + } } return false; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -121,6 +121,15 @@ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0> ]>; +def SDT_PPCI128Atomic2 : SDTypeProfile<2, 3, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>, SDTCisSameAs<0, 3>, + SDTCisSameAs<3, 4> +]>; + +def SDT_PPCI128Atomic3 : SDTypeProfile<2, 5, [ + SDTCisSameAs<0, 1>, SDTCisPtrTy<2>, SDTCisSameAs<3, 4>, SDTCisSameAs<5, 6> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -357,6 +366,9 @@ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore]>; +def PPCatomicSwap_128 : + SDNode<"PPCISD::ATOMIC_SWAP_128", SDT_PPCI128Atomic2, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; // Instructions to set/unset CR bit 6 for SVR4 vararg calls def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, @@ -1170,6 +1182,7 @@ def HasSPE : Predicate<"Subtarget->hasSPE()">; def HasICBT : Predicate<"Subtarget->hasICBT()">; def HasPartwordAtomics : Predicate<"Subtarget->hasPartwordAtomics()">; +def HasQuadwordAtomics : Predicate<"Subtarget->hasQuadwordAtomics()">; def NoNaNsFPMath : Predicate<"Subtarget->getTargetMachine().Options.NoNaNsFPMath">; def NaNsFPMath diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -139,6 +139,7 @@ bool HasICBT; bool HasInvariantFunctionDescriptors; bool HasPartwordAtomics; + bool HasQuadwordAtomics; bool HasDirectMove; bool HasHTM; bool HasFloat128; @@ -301,6 +302,7 @@ bool usePPCPreRASchedStrategy() const { return UsePPCPreRASchedStrategy; } bool usePPCPostRASchedStrategy() const { return UsePPCPostRASchedStrategy; } bool hasPartwordAtomics() const { return HasPartwordAtomics; } + bool hasQuadwordAtomics() const { return HasQuadwordAtomics; } bool hasDirectMove() const { return HasDirectMove; } Align getPlatformStackAlignment() const { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -119,6 +119,7 @@ HasICBT = false; HasInvariantFunctionDescriptors = false; HasPartwordAtomics = false; + HasQuadwordAtomics = false; HasDirectMove = false; HasHTM = false; HasFloat128 = false; diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr8 \ +; RUN: < %s | FileCheck %s + +define i128 @swap(i128* %a, i128 %x) { +; CHECK-LABEL: swap: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sync +; CHECK-NEXT: L..BB0_1: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: lqarx 6, 0, 3 +; CHECK-NEXT: mr 9, 5 +; CHECK-NEXT: mr 8, 4 +; CHECK-NEXT: stqcx. 8, 0, 3 +; CHECK-NEXT: bne 0, L..BB0_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: mr 3, 6 +; CHECK-NEXT: mr 4, 7 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr +entry: + %0 = atomicrmw xchg i128* %a, i128 %x seq_cst, align 16 + ret i128 %0 +}