diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -561,6 +561,9 @@ // instruction, so we say that ctlz is cheap to speculate. bool isCheapToSpeculateCtlz() const override { return true; } + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + private: const NVPTXSubtarget &STI; // cache the subtarget here SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -5125,6 +5125,46 @@ } } +NVPTXTargetLowering::AtomicExpansionKind +NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + Type *Ty = AI->getValOperand()->getType(); + if (AI->isFloatingPointOperation()) { + if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) { + if (Ty->isFloatTy()) + return AtomicExpansionKind::None; + if (Ty->isDoubleTy() && STI.hasAtomAddF64()) + return AtomicExpansionKind::None; + } + return AtomicExpansionKind::CmpXChg; + } + + switch (AI->getOperation()) { + default: + return AtomicExpansionKind::CmpXChg; + case AtomicRMWInst::BinOp::Add: + case AtomicRMWInst::BinOp::Sub: + case AtomicRMWInst::BinOp::Max: + case AtomicRMWInst::BinOp::Min: + case AtomicRMWInst::BinOp::UMax: + case AtomicRMWInst::BinOp::UMin: + case AtomicRMWInst::BinOp::Xchg: + case AtomicRMWInst::BinOp::And: + case AtomicRMWInst::BinOp::Or: + case AtomicRMWInst::BinOp::Xor: + assert(Ty->isIntegerTy()); + switch (cast(Ty)->getBitWidth()) { + case 32: + return AtomicExpansionKind::None; + case 64: + return AtomicExpansionKind::None; + default: + return AtomicExpansionKind::CmpXChg; + } + } + + return AtomicExpansionKind::CmpXChg; +} + // Pin NVPTXTargetObjectFile's vtables to this file. NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -330,6 +330,8 @@ addStraightLineScalarOptimizationPasses(); } + addPass(createAtomicExpandPass()); + // === LSR and other generic IR passes === TargetPassConfig::addIRPasses(); // EarlyCSE is not always strong enough to clean up what LSR produces. For