diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1525,3 +1525,22 @@ Intrinsic<[],[],[]>; } +//===----------------------------------------------------------------------===// +// PowerPC Atomic Intrinsic Definitions. +let TargetPrefix = "ppc" in { + class AtomicRMW128Intrinsic + : Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], + [IntrArgMemOnly, NoCapture>]>; + def int_ppc_atomicrmw_xchg_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_add_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_sub_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_and_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_or_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_xor_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_nand_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_max_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_min_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_umax_i128 : AtomicRMW128Intrinsic; + def int_ppc_atomicrmw_umin_i128 : AtomicRMW128Intrinsic; +} diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -689,6 +689,7 @@ if (PMV.ValueType == PMV.WordType) { PMV.AlignedAddr = Addr; PMV.AlignedAddrAlignment = AddrAlign; + PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0); return PMV; } diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -27,6 +27,7 @@ PPCCallingConv.cpp PPCCCState.cpp PPCCTRLoops.cpp + PPCExpandAtomicPseudoInsts.cpp PPCHazardRecognizers.cpp PPCInstrInfo.cpp PPCISelDAGToDAG.cpp diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -52,6 +52,7 @@ FunctionPass *createPPCBoolRetToIntPass(); FunctionPass *createPPCExpandISELPass(); FunctionPass *createPPCPreEmitPeepholePass(); + FunctionPass *createPPCExpandAtomicPseudoPass(); void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, AsmPrinter &AP); bool LowerPPCMachineOperandToMCOperand(const MachineOperand &MO, @@ -75,6 +76,7 @@ void initializePPCPreEmitPeepholePass(PassRegistry &); void initializePPCTLSDynamicCallPass(PassRegistry &); void initializePPCMIPeepholePass(PassRegistry&); + void initializePPCExpandAtomicPseudoPass(PassRegistry&); extern char &PPCVSXFMAMutateID; diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -161,6 +161,9 @@ def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", "HasPartwordAtomics", "true", "Enable l[bh]arx and st[bh]cx.">; +def FeatureQuadwordAtomic : SubtargetFeature<"quadword-atomics", + "HasQuadwordAtomics", "true", + "Enable lqarx and stqcx.">; def FeatureInvariantFunctionDescriptors : SubtargetFeature<"invariant-function-descriptors", "HasInvariantFunctionDescriptors", "true", @@ -327,6 +330,7 @@ FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic, + FeatureQuadwordAtomic, FeaturePredictableSelectIsExpensive ]; diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp @@ -0,0 +1,214 @@ +//===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands atomic pseudo instructions into +// target instructions post RA. With such method, LL/SC loop is considered as +// a whole blob and make spilling unlikely happens in the LL/SC loop. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/PPCPredicates.h" +#include "PPC.h" +#include "PPCInstrInfo.h" +#include "PPCTargetMachine.h" + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +#define DEBUG_TYPE "ppc-atomic-expand" + +namespace { + +class PPCExpandAtomicPseudo : public MachineFunctionPass { +public: + const PPCInstrInfo *TII; + const PPCRegisterInfo *TRI; + static char ID; + + PPCExpandAtomicPseudo() : MachineFunctionPass(ID) { + initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI, + MachineBasicBlock::iterator &NMBBI); + bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI, + MachineBasicBlock::iterator &NMBBI); +}; + +static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, const DebugLoc &DL, + Register Dest0, Register Dest1, Register Src0, + Register Src1) { + const MCInstrDesc &OR = TII->get(PPC::OR8); + const MCInstrDesc &XOR = TII->get(PPC::XOR8); + if (Dest0 == Src1 && Dest1 == Src0) { + // The most tricky case, swapping values. + BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); + BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1); + BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); + } else if (Dest0 != Src0 || Dest1 != Src1) { + if (Dest0 == Src1 || Dest1 != Src0) { + BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); + BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); + } else { + BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); + BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); + } + } +} + +bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + TII = static_cast(MF.getSubtarget().getInstrInfo()); + TRI = &TII->getRegisterInfo(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock &MBB = *I; + for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); + MBBI != MBBE;) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Changed |= expandMI(MBB, MI, NMBBI); + MBBI = NMBBI; + } + } + if (Changed) + MF.RenumberBlocks(); + return Changed; +} + +bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI, + MachineBasicBlock::iterator &NMBBI) { + switch (MI.getOpcode()) { + case PPC::ATOMIC_SWAP_I128: + case PPC::ATOMIC_LOAD_ADD_I128: + case PPC::ATOMIC_LOAD_SUB_I128: + case PPC::ATOMIC_LOAD_XOR_I128: + case PPC::ATOMIC_LOAD_NAND_I128: + case PPC::ATOMIC_LOAD_AND_I128: + case PPC::ATOMIC_LOAD_OR_I128: + return expandAtomicRMW128(MBB, MI, NMBBI); + default: + return false; + } +} + +bool PPCExpandAtomicPseudo::expandAtomicRMW128( + MachineBasicBlock &MBB, MachineInstr &MI, + MachineBasicBlock::iterator &NMBBI) { + const MCInstrDesc &LL = TII->get(PPC::LQARX); + const MCInstrDesc &SC = TII->get(PPC::STQCX); + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + const BasicBlock *BB = MBB.getBasicBlock(); + // Create layout of control flow. + MachineFunction::iterator MFI = ++MBB.getIterator(); + MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(MFI, LoopMBB); + MF->insert(MFI, ExitMBB); + ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), + MBB.end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + MBB.addSuccessor(LoopMBB); + + // For non-min/max operations, control flow is kinda like: + // MBB: + // ... + // LoopMBB: + // lqarx in, ptr + // addc out.sub_x1, in.sub_x1, op.sub_x1 + // adde out.sub_x0, in.sub_x0, op.sub_x0 + // stqcx out, ptr + // bne- LoopMBB + // ExitMBB: + // ... + Register Old = MI.getOperand(0).getReg(); + Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); + Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); + Register Scratch = MI.getOperand(1).getReg(); + Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); + Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); + Register RA = MI.getOperand(2).getReg(); + Register RB = MI.getOperand(3).getReg(); + Register IncrLo = MI.getOperand(4).getReg(); + Register IncrHi = MI.getOperand(5).getReg(); + unsigned RMWOpcode = MI.getOpcode(); + + MachineBasicBlock *CurrentMBB = &MBB; + PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, + IncrHi, IncrLo); + CurrentMBB = LoopMBB; + BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); + + switch (RMWOpcode) { + case PPC::ATOMIC_SWAP_I128: + // We already done in `MBB`. + break; + case PPC::ATOMIC_LOAD_ADD_I128: + BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo) + .addReg(ScratchLo) + .addReg(OldLo); + BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi) + .addReg(ScratchHi) + .addReg(OldHi); + break; + case PPC::ATOMIC_LOAD_SUB_I128: + BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo) + .addReg(ScratchLo) + .addReg(OldLo); + BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi) + .addReg(ScratchHi) + .addReg(OldHi); + break; + +#define TRIVIAL_ATOMICRMW(Opcode, Instr) \ + case Opcode: \ + BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \ + .addReg(ScratchLo) \ + .addReg(OldLo); \ + BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \ + .addReg(ScratchHi) \ + .addReg(OldHi); \ + break + + TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8); + TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8); + TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8); + TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8); +#undef TRIVIAL_ATOMICRMW + default: + llvm_unreachable("Unhandled atomic RMW operation"); + } + BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); + BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) + .addImm(PPC::PRED_NE) + .addReg(PPC::CR0) + .addMBB(LoopMBB); + CurrentMBB->addSuccessor(LoopMBB); + CurrentMBB->addSuccessor(ExitMBB); + recomputeLiveIns(*LoopMBB); + recomputeLiveIns(*ExitMBB); + NMBBI = MBB.end(); + MI.eraseFromParent(); + return true; +} // namespace +} // namespace + +INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic", + false, false) + +char PPCExpandAtomicPseudo::ID = 0; +FunctionPass *llvm::createPPCExpandAtomicPseudoPass() { + return new PPCExpandAtomicPseudo(); +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -876,6 +876,14 @@ Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override; + TargetLowering::AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + + Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, + Value *AlignedAddr, Value *Incr, + Value *Mask, Value *ShiftAmt, + AtomicOrdering Ord) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1282,6 +1282,9 @@ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } + if (Subtarget.hasQuadwordAtomics()) + setMaxAtomicSizeInBitsSupported(128); + setBooleanContents(ZeroOrOneBooleanContent); if (Subtarget.hasAltivec()) { @@ -12622,6 +12625,17 @@ } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 || MI.getOpcode() == PPC::PROBED_ALLOCA_64) { return emitProbedAlloca(MI, BB); + } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) { + DebugLoc DL = MI.getDebugLoc(); + Register Src = MI.getOperand(2).getReg(); + Register Lo = MI.getOperand(0).getReg(); + Register Hi = MI.getOperand(1).getReg(); + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY)) + .addDef(Lo) + .addUse(Src, 0, PPC::sub_gp8_x1); + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY)) + .addDef(Hi) + .addUse(Src, 0, PPC::sub_gp8_x0); } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -16002,6 +16016,25 @@ MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { + case Intrinsic::ppc_atomicrmw_xchg_i128: + case Intrinsic::ppc_atomicrmw_add_i128: + case Intrinsic::ppc_atomicrmw_sub_i128: + case Intrinsic::ppc_atomicrmw_nand_i128: + case Intrinsic::ppc_atomicrmw_and_i128: + case Intrinsic::ppc_atomicrmw_or_i128: + case Intrinsic::ppc_atomicrmw_xor_i128: + case Intrinsic::ppc_atomicrmw_max_i128: + case Intrinsic::ppc_atomicrmw_min_i128: + case Intrinsic::ppc_atomicrmw_umax_i128: + case Intrinsic::ppc_atomicrmw_umin_i128: + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i128; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = Align(16); + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore | + MachineMemOperand::MOVolatile; + return true; case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_altivec_lvebx: @@ -17402,3 +17435,67 @@ return CC_PPC64_ELF_FIS; } } + +TargetLowering::AtomicExpansionKind +PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::None; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + if (Subtarget.isAIXABI() && Subtarget.hasQuadwordAtomics() && Size == 128) + return AtomicExpansionKind::MaskedIntrinsic; + return AtomicExpansionKind::None; +} + +static Intrinsic::ID +getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) { + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + return Intrinsic::ppc_atomicrmw_xchg_i128; + case AtomicRMWInst::Add: + return Intrinsic::ppc_atomicrmw_add_i128; + case AtomicRMWInst::Sub: + return Intrinsic::ppc_atomicrmw_sub_i128; + case AtomicRMWInst::And: + return Intrinsic::ppc_atomicrmw_and_i128; + case AtomicRMWInst::Or: + return Intrinsic::ppc_atomicrmw_or_i128; + case AtomicRMWInst::Xor: + return Intrinsic::ppc_atomicrmw_xor_i128; + case AtomicRMWInst::Nand: + return Intrinsic::ppc_atomicrmw_nand_i128; + case AtomicRMWInst::Max: + return Intrinsic::ppc_atomicrmw_max_i128; + case AtomicRMWInst::Min: + return Intrinsic::ppc_atomicrmw_min_i128; + case AtomicRMWInst::UMax: + return Intrinsic::ppc_atomicrmw_umax_i128; + case AtomicRMWInst::UMin: + return Intrinsic::ppc_atomicrmw_umin_i128; + } +} + +Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic( + IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, + Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { + assert(Subtarget.hasQuadwordAtomics() && "Only support quadword now"); + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast(AlignedAddr->getType())->getElementType(); + assert(ValTy->getPrimitiveSizeInBits() == 128); + Function *RMW = Intrinsic::getDeclaration( + M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation())); + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo"); + Value *IncrHi = + Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi"); + Value *Addr = + Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi}); + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64"); +} diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -304,6 +304,100 @@ isPPC64, isRecordForm; } +let mayStore = 1, mayLoad = 1 in { +def SPLIT_QUADWORD : PPCCustomInserterPseudo<(outs g8rc:$lo, g8rc:$hi), + (ins g8prc:$src), + "#SPLIT_QUADWORD", []>; +let Defs = [CR0] in { +// Atomic pseudo instructions expanded post-ra. +def ATOMIC_LOAD_ADD_I128 : PPCPostRAExpPseudo<(outs g8prc:$RTp, g8prc:$scratch), + (ins memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi), + "#ATOMIC_LOAD_ADD_I128", + []>; +def ATOMIC_LOAD_SUB_I128 : PPCPostRAExpPseudo<(outs g8prc:$RTp, g8prc:$scratch), + (ins memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi), + "#ATOMIC_LOAD_SUB_I128", + []>; +def ATOMIC_LOAD_XOR_I128 : PPCPostRAExpPseudo<(outs g8prc:$RTp, g8prc:$scratch), + (ins memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi), + "#ATOMIC_LOAD_XOR_I128", + []>; +def ATOMIC_LOAD_AND_I128 : PPCPostRAExpPseudo<(outs g8prc:$RTp, g8prc:$scratch), + (ins memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi), + "#ATOMIC_LOAD_AND_I128", + []>; +def ATOMIC_LOAD_OR_I128 : PPCPostRAExpPseudo<(outs g8prc:$RTp, g8prc:$scratch), + (ins memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi), + "#ATOMIC_LOAD_OR_I128", + []>; +def ATOMIC_LOAD_NAND_I128 : PPCPostRAExpPseudo<(outs g8prc:$RTp, g8prc:$scratch), + (ins memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi), + "#ATOMIC_LOAD_NAND_I128", + []>; +def ATOMIC_SWAP_I128 : PPCPostRAExpPseudo<(outs g8prc:$RTp, g8prc:$scratch), + (ins memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi), + "#ATOMIC_SWAP_I128", + []>; +} +} + +def : Pat<(int_ppc_atomicrmw_add_i128 ForceXForm:$ptr, + i64:$incr_lo, + i64:$incr_hi), + (SPLIT_QUADWORD (ATOMIC_LOAD_ADD_I128 memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi))>; +def : Pat<(int_ppc_atomicrmw_sub_i128 ForceXForm:$ptr, + i64:$incr_lo, + i64:$incr_hi), + (SPLIT_QUADWORD (ATOMIC_LOAD_SUB_I128 memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi))>; +def : Pat<(int_ppc_atomicrmw_xor_i128 ForceXForm:$ptr, + i64:$incr_lo, + i64:$incr_hi), + (SPLIT_QUADWORD (ATOMIC_LOAD_XOR_I128 memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi))>; +def : Pat<(int_ppc_atomicrmw_and_i128 ForceXForm:$ptr, + i64:$incr_lo, + i64:$incr_hi), + (SPLIT_QUADWORD (ATOMIC_LOAD_AND_I128 memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi))>; +def : Pat<(int_ppc_atomicrmw_nand_i128 ForceXForm:$ptr, + i64:$incr_lo, + i64:$incr_hi), + (SPLIT_QUADWORD (ATOMIC_LOAD_NAND_I128 memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi))>; +def : Pat<(int_ppc_atomicrmw_or_i128 ForceXForm:$ptr, + i64:$incr_lo, + i64:$incr_hi), + (SPLIT_QUADWORD (ATOMIC_LOAD_OR_I128 memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi))>; +def : Pat<(int_ppc_atomicrmw_xchg_i128 ForceXForm:$ptr, + i64:$incr_lo, + i64:$incr_hi), + (SPLIT_QUADWORD (ATOMIC_SWAP_I128 memrr:$ptr, + g8rc:$incr_lo, + g8rc:$incr_hi))>; + let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC), "stdat $rS, $rA, $FC", IIC_LdStStore>, isPPC64, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1170,6 +1170,7 @@ def HasSPE : Predicate<"Subtarget->hasSPE()">; def HasICBT : Predicate<"Subtarget->hasICBT()">; def HasPartwordAtomics : Predicate<"Subtarget->hasPartwordAtomics()">; +def HasQuadwordAtomics : Predicate<"Subtarget->hasQuadwordAtomics()">; def NoNaNsFPMath : Predicate<"Subtarget->getTargetMachine().Options.NoNaNsFPMath">; def NaNsFPMath diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -139,6 +139,7 @@ bool HasICBT; bool HasInvariantFunctionDescriptors; bool HasPartwordAtomics; + bool HasQuadwordAtomics; bool HasDirectMove; bool HasHTM; bool HasFloat128; @@ -301,6 +302,7 @@ bool usePPCPreRASchedStrategy() const { return UsePPCPreRASchedStrategy; } bool usePPCPostRASchedStrategy() const { return UsePPCPostRASchedStrategy; } bool hasPartwordAtomics() const { return HasPartwordAtomics; } + bool hasQuadwordAtomics() const { return HasQuadwordAtomics; } bool hasDirectMove() const { return HasDirectMove; } Align getPlatformStackAlignment() const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -123,6 +123,7 @@ initializePPCTLSDynamicCallPass(PR); initializePPCMIPeepholePass(PR); initializePPCLowerMASSVEntriesPass(PR); + initializePPCExpandAtomicPseudoPass(PR); initializeGlobalISel(PR); } @@ -397,6 +398,7 @@ void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; + void addPreEmitPass2() override; // GlobalISEL bool addIRTranslator() override; bool addLegalizeMachineIR() override; @@ -539,6 +541,13 @@ addPass(createPPCBranchSelectionPass()); } +void PPCPassConfig::addPreEmitPass2() { + // Schedule the expansion of AMOs at the last possible moment, avoiding the + // possibility for other passes to break the requirements for forward + // progress in the LL/SC block. + addPass(createPPCExpandAtomicPseudoPass()); +} + TargetTransformInfo PPCTargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(PPCTTIImpl(this, F)); diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-i128.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-i128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/atomicrmw-i128.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr8 \ +; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s + +define i128 @swap(i128* %a, i128 %x) { +; CHECK-LABEL: swap: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sync +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: mr 6, 4 +; CHECK-NEXT: L..BB0_1: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: lqarx 4, 0, 3 +; CHECK-NEXT: stqcx. 6, 0, 3 +; CHECK-NEXT: bne 0, L..BB0_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: lwsync +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: mr 4, 5 +; CHECK-NEXT: blr +entry: + %0 = atomicrmw xchg i128* %a, i128 %x seq_cst, align 16 + ret i128 %0 +} + +define i128 @add(i128* %a, i128 %x) { +; CHECK-LABEL: add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sync +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: mr 6, 4 +; CHECK-NEXT: L..BB1_1: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: lqarx 4, 0, 3 +; CHECK-NEXT: addc 7, 7, 5 +; CHECK-NEXT: adde 6, 6, 4 +; CHECK-NEXT: stqcx. 6, 0, 3 +; CHECK-NEXT: bne 0, L..BB1_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: lwsync +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: mr 4, 5 +; CHECK-NEXT: blr +entry: + %0 = atomicrmw add i128* %a, i128 %x seq_cst, align 16 + ret i128 %0 +} + +define i128 @sub(i128* %a, i128 %x) { +; CHECK-LABEL: sub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sync +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: mr 6, 4 +; CHECK-NEXT: L..BB2_1: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: lqarx 4, 0, 3 +; CHECK-NEXT: subc 7, 5, 7 +; CHECK-NEXT: subfe 6, 6, 4 +; CHECK-NEXT: stqcx. 6, 0, 3 +; CHECK-NEXT: bne 0, L..BB2_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: lwsync +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: mr 4, 5 +; CHECK-NEXT: blr +entry: + %0 = atomicrmw sub i128* %a, i128 %x seq_cst, align 16 + ret i128 %0 +} + +define i128 @and(i128* %a, i128 %x) { +; CHECK-LABEL: and: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sync +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: mr 6, 4 +; CHECK-NEXT: L..BB3_1: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: lqarx 4, 0, 3 +; CHECK-NEXT: and 7, 7, 5 +; CHECK-NEXT: and 6, 6, 4 +; CHECK-NEXT: stqcx. 6, 0, 3 +; CHECK-NEXT: bne 0, L..BB3_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: lwsync +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: mr 4, 5 +; CHECK-NEXT: blr +entry: + %0 = atomicrmw and i128* %a, i128 %x seq_cst, align 16 + ret i128 %0 +} + +define i128 @or(i128* %a, i128 %x) { +; CHECK-LABEL: or: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sync +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: mr 6, 4 +; CHECK-NEXT: L..BB4_1: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: lqarx 4, 0, 3 +; CHECK-NEXT: or 7, 7, 5 +; CHECK-NEXT: or 6, 6, 4 +; CHECK-NEXT: stqcx. 6, 0, 3 +; CHECK-NEXT: bne 0, L..BB4_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: lwsync +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: mr 4, 5 +; CHECK-NEXT: blr +entry: + %0 = atomicrmw or i128* %a, i128 %x seq_cst, align 16 + ret i128 %0 +} + +define i128 @xor(i128* %a, i128 %x) { +; CHECK-LABEL: xor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sync +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: mr 6, 4 +; CHECK-NEXT: L..BB5_1: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: lqarx 4, 0, 3 +; CHECK-NEXT: xor 7, 7, 5 +; CHECK-NEXT: xor 6, 6, 4 +; CHECK-NEXT: stqcx. 6, 0, 3 +; CHECK-NEXT: bne 0, L..BB5_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: lwsync +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: mr 4, 5 +; CHECK-NEXT: blr +entry: + %0 = atomicrmw xor i128* %a, i128 %x seq_cst, align 16 + ret i128 %0 +} + +define i128 @nand(i128* %a, i128 %x) { +; CHECK-LABEL: nand: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sync +; CHECK-NEXT: mr 7, 5 +; CHECK-NEXT: mr 6, 4 +; CHECK-NEXT: L..BB6_1: # %entry +; CHECK-NEXT: # +; CHECK-NEXT: lqarx 4, 0, 3 +; CHECK-NEXT: nand 7, 7, 5 +; CHECK-NEXT: nand 6, 6, 4 +; CHECK-NEXT: stqcx. 6, 0, 3 +; CHECK-NEXT: bne 0, L..BB6_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: lwsync +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: mr 4, 5 +; CHECK-NEXT: blr +entry: + %0 = atomicrmw nand i128* %a, i128 %x seq_cst, align 16 + ret i128 %0 +}