diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -12,6 +12,7 @@ set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp + NVPTXAtomicLower.cpp NVPTXAsmPrinter.cpp NVPTXAssignValidGlobalNames.cpp NVPTXFrameLowering.cpp diff --git a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.h b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.h @@ -0,0 +1,22 @@ +//===-- NVPTXAtomicLower.h - Lower atomics of local memory ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Lower atomics of local memory to simple load/stores +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXATOMICLOWER_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXATOMICLOWER_H + +namespace llvm { +class FunctionPass; + +extern FunctionPass *createNVPTXAtomicLowerPass(); +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp @@ -0,0 +1,84 @@ +//===-- NVPTXAtomicLower.cpp - Lower atomics of local memory ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Lower atomics of local memory to simple load/stores +// +//===----------------------------------------------------------------------===// + +#include "NVPTXAtomicLower.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" + +#include "MCTargetDesc/NVPTXBaseInfo.h" +using namespace llvm; + +namespace { +// Hoisting the alloca instructions in the non-entry blocks to the entry +// block. +class NVPTXAtomicLower : public FunctionPass { +public: + static char ID; // Pass ID + NVPTXAtomicLower() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + + StringRef getPassName() const override { + return "NVPTX lower atomics of local memory"; + } + + bool runOnFunction(Function &function) override; +}; +} // namespace + +bool NVPTXAtomicLower::runOnFunction(Function &F) { + SmallVector V; + for (auto &BB : F) { + for (auto &I : BB) { + if (auto N = dyn_cast(&I)) { + if (N->getPointerAddressSpace() == ADDRESS_SPACE_LOCAL && + N->getOrdering() == AtomicOrdering::Monotonic) { + V.push_back(N); + } + } + } + } + bool changed = false; + for (auto I : V) { + if (I->getOperation() == llvm::AtomicRMWInst::BinOp::FAdd) { + IRBuilder<> B(I); + changed = true; + auto L = B.CreateLoad(I->getType(), I->getPointerOperand()); + L->setAtomic(I->getOrdering(), I->getSyncScopeID()); + auto SI = B.CreateStore(B.CreateFAdd(L, I->getValOperand()), + I->getPointerOperand()); + SI->setAtomic(I->getOrdering(), I->getSyncScopeID()); + I->eraseFromParent(); + } + } + return changed; +} + +char NVPTXAtomicLower::ID = 0; + +namespace llvm { +void initializeNVPTXAtomicLowerPass(PassRegistry &); +} + +INITIALIZE_PASS( + NVPTXAtomicLower, "nvptx-atomic-lower", + "Hoisting alloca instructions in non-entry blocks to the entry block", + false, false) + +FunctionPass *llvm::createNVPTXAtomicLowerPass() { + return new NVPTXAtomicLower(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -13,6 +13,7 @@ #include "NVPTXTargetMachine.h" #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" +#include "NVPTXAtomicLower.h" #include "NVPTXLowerAggrCopies.h" #include "NVPTXTargetObjectFile.h" #include "NVPTXTargetTransformInfo.h" @@ -252,6 +253,7 @@ addPass(createSROAPass()); addPass(createNVPTXLowerAllocaPass()); addPass(createInferAddressSpacesPass()); + addPass(createNVPTXAtomicLowerPass()); } void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {