diff --git a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h b/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h --- a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h @@ -14,6 +14,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H #define LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H +#include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" namespace llvm { @@ -24,6 +25,11 @@ PreservedAnalyses run(Function &F, FunctionAnalysisManager &); static bool isRequired() { return true; } }; + +/// Convert the given RMWI into primitive load and stores, +/// assuming that doing so is legal. Return true if the lowering +/// succeeds. +bool LowerAtomicRMWInst(AtomicRMWInst *RMWI); } #endif // LLVM_TRANSFORMS_SCALAR_LOWERATOMIC_H diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -12,6 +12,7 @@ set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp + NVPTXAtomicLower.cpp NVPTXAsmPrinter.cpp NVPTXAssignValidGlobalNames.cpp NVPTXFrameLowering.cpp diff --git a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.h b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.h @@ -0,0 +1,22 @@ +//===-- NVPTXAtomicLower.h - Lower atomics of local memory ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Lower atomics of local memory to simple load/stores +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXATOMICLOWER_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXATOMICLOWER_H + +namespace llvm { +class FunctionPass; + +extern FunctionPass *createNVPTXAtomicLowerPass(); +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp @@ -0,0 +1,74 @@ +//===-- NVPTXAtomicLower.cpp - Lower atomics of local memory ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Lower atomics of local memory to simple load/stores +// +//===----------------------------------------------------------------------===// + +#include "NVPTXAtomicLower.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Transforms/Scalar/LowerAtomic.h" + +#include "MCTargetDesc/NVPTXBaseInfo.h" +using namespace llvm; + +namespace { +// Hoisting the alloca instructions in the non-entry blocks to the entry +// block. +class NVPTXAtomicLower : public FunctionPass { +public: + static char ID; // Pass ID + NVPTXAtomicLower() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + } + + StringRef getPassName() const override { + return "NVPTX lower atomics of local memory"; + } + + bool runOnFunction(Function &F) override; +}; +} // namespace + +bool NVPTXAtomicLower::runOnFunction(Function &F) { + SmallVector LocalMemoryAtomics; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (AtomicRMWInst *RMWI = dyn_cast(&I)) { + if (RMWI->getPointerAddressSpace() == ADDRESS_SPACE_LOCAL) { + LocalMemoryAtomics.push_back(RMWI); + } + } + } + } + bool Changed = false; + for (AtomicRMWInst *RMWI : LocalMemoryAtomics) { + Changed |= LowerAtomicRMWInst(RMWI); + } + return Changed; +} + +char NVPTXAtomicLower::ID = 0; + +namespace llvm { +void initializeNVPTXAtomicLowerPass(PassRegistry &); +} + +INITIALIZE_PASS(NVPTXAtomicLower, "nvptx-atomic-lower", + "Lower atomics of local memory to simple load/stores", false, + false) + +FunctionPass *llvm::createNVPTXAtomicLowerPass() { + return new NVPTXAtomicLower(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -13,6 +13,7 @@ #include "NVPTXTargetMachine.h" #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" +#include "NVPTXAtomicLower.h" #include "NVPTXLowerAggrCopies.h" #include "NVPTXTargetObjectFile.h" #include "NVPTXTargetTransformInfo.h" @@ -65,6 +66,7 @@ void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); +void initializeNVPTXAtomicLowerPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); void initializeNVPTXLowerArgsPass(PassRegistry &); @@ -86,6 +88,7 @@ initializeGenericToNVVMPass(PR); initializeNVPTXAllocaHoistingPass(PR); initializeNVPTXAssignValidGlobalNamesPass(PR); + initializeNVPTXAtomicLowerPass(PR); initializeNVPTXLowerArgsPass(PR); initializeNVPTXLowerAllocaPass(PR); initializeNVPTXLowerAggrCopiesPass(PR); @@ -252,6 +255,7 @@ addPass(createSROAPass()); addPass(createNVPTXLowerAllocaPass()); addPass(createInferAddressSpacesPass()); + addPass(createNVPTXAtomicLowerPass()); } void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() { diff --git a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp b/llvm/lib/Transforms/Scalar/LowerAtomic.cpp --- a/llvm/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/llvm/lib/Transforms/Scalar/LowerAtomic.cpp @@ -40,7 +40,7 @@ return true; } -static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) { +bool llvm::LowerAtomicRMWInst(AtomicRMWInst *RMWI) { IRBuilder<> Builder(RMWI); Value *Ptr = RMWI->getPointerOperand(); Value *Val = RMWI->getValOperand(); diff --git a/llvm/test/CodeGen/NVPTX/atomic-lower.ll b/llvm/test/CodeGen/NVPTX/atomic-lower.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/atomic-lower.ll @@ -0,0 +1,14 @@ +; RUN: opt < %s -S -nvptx-atomic-lower | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +define double @kernel(double addrspace(5)* %ptr, double %val) { + %res = atomicrmw fadd double addrspace(5)* %ptr, double %val monotonic, align 8 + ret double %res +; CHECK: %1 = load double, double addrspace(5)* %ptr, align 8 +; CHECK-NEXT: %2 = fadd double %1, %val +; CHECK-NEXT: store double %2, double addrspace(5)* %ptr, align 8 +; CHECK-NEXT: ret double %1 +} +