diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -561,6 +561,17 @@ // instruction, so we say that ctlz is cheap to speculate. bool isCheapToSpeculateCtlz() const override { return true; } + AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { + return AtomicExpansionKind::None; + } + + AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { + return AtomicExpansionKind::None; + } + + AtomicExpansionKind + shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + private: const NVPTXSubtarget &STI; // cache the subtarget here SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -5125,6 +5125,46 @@ } } +NVPTXTargetLowering::AtomicExpansionKind +NVPTXTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + Type *Ty = AI->getValOperand()->getType(); + if (AI->isFloatingPointOperation()) { + if (AI->getOperation() == AtomicRMWInst::BinOp::FAdd) { + if (Ty->isFloatTy()) + return AtomicExpansionKind::None; + if (Ty->isDoubleTy() && STI.hasAtomAddF64()) + return AtomicExpansionKind::None; + } + return AtomicExpansionKind::CmpXChg; + } + + switch (AI->getOperation()) { + default: + return AtomicExpansionKind::CmpXChg; + case AtomicRMWInst::BinOp::Add: + case AtomicRMWInst::BinOp::Sub: + case AtomicRMWInst::BinOp::Max: + case AtomicRMWInst::BinOp::Min: + case AtomicRMWInst::BinOp::UMax: + case AtomicRMWInst::BinOp::UMin: + case AtomicRMWInst::BinOp::Xchg: + case AtomicRMWInst::BinOp::And: + case AtomicRMWInst::BinOp::Or: + case AtomicRMWInst::BinOp::Xor: + assert(Ty->isIntegerTy()); + switch (cast(Ty)->getBitWidth()) { + case 32: + return AtomicExpansionKind::None; + case 64: + return AtomicExpansionKind::None; + default: + return AtomicExpansionKind::CmpXChg; + } + } + + return AtomicExpansionKind::CmpXChg; +} + // Pin NVPTXTargetObjectFile's vtables to this file. NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -330,6 +330,8 @@ addStraightLineScalarOptimizationPasses(); } + addPass(createAtomicExpandPass()); + // === LSR and other generic IR passes === TargetPassConfig::addIRPasses(); // EarlyCSE is not always strong enough to clean up what LSR produces. For diff --git a/llvm/test/CodeGen/NVPTX/atomic-expand.ll.ll b/llvm/test/CodeGen/NVPTX/atomic-expand.ll.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/atomic-expand.ll.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=CHECK-SM20 +; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s --check-prefix=CHECK-SM60 +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_60 | %ptxas-verify %} + +; CHECK-LABEL: foo +define void @foo(ptr %0, double %1) { +entry: + ; CHECK-SM20: atom.cas.b64 + ; CHECK-SM60: atom.add.f64 + %2 = atomicrmw fadd ptr %0, double %1 monotonic, align 8 + ret void +}