diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -37,6 +37,7 @@ NVVMIntrRange.cpp NVVMReflect.cpp NVPTXProxyRegErasure.cpp + NVPTXCtorDtorLowering.cpp ) add_llvm_target(NVPTXCodeGen diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -39,6 +39,7 @@ llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); ModulePass *createGenericToNVVMLegacyPass(); +ModulePass *createNVPTXCtorDtorLoweringLegacyPass(); FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion); FunctionPass *createNVVMReflectPass(unsigned int SmVersion); MachineFunctionPass *createNVPTXPrologEpilogPass(); diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -788,16 +788,6 @@ report_fatal_error("Module has aliases, which NVPTX does not support."); return true; // error } - if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) { - report_fatal_error( - "Module has a nontrivial global ctor, which NVPTX does not support."); - return true; // error - } - if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) { - report_fatal_error( - "Module has a nontrivial global dtor, which NVPTX does not support."); - return true; // error - } // We need to call the parent's one explicitly. bool Result = AsmPrinter::doInitialization(M); diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h @@ -0,0 +1,30 @@ +//===-- NVPTXCtorDtorLowering.h --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Module; +class PassRegistry; + +extern char &NVPTXCtorDtorLoweringLegacyPassID; +extern void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &); + +/// Lower llvm.global_ctors and llvm.global_dtors to special kernels. +class NVPTXCtorDtorLoweringPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp @@ -0,0 +1,98 @@ +//===-- NVPTXCtorDtorLowering.cpp - Handle global ctors and dtors --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This pass creates a unified init and fini kernel with the required metadata +//===----------------------------------------------------------------------===// + +#include "NVPTXCtorDtorLowering.h" +#include "NVPTX.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "nvptx-lower-ctor-dtor" + +namespace { + +static bool createInitOrFiniGlobls(Module &M, StringRef GlobalName, + bool IsCtor) { + GlobalVariable *GV = M.getGlobalVariable(GlobalName); + if (!GV || !GV->hasInitializer()) + return false; + ConstantArray *GA = dyn_cast(GV->getInitializer()); + if (!GA || GA->getNumOperands() == 0) + return false; + + // NVPTX has no way to emit variables at specific sections or support for the + // traditional constructor sections. Instead, we emit mangled global names so + // the runtime can build the list manually. + for (Value *V : GA->operands()) { + auto *CS = cast(V); + auto *F = cast(CS->getOperand(1)); + uint64_t Priority = cast(CS->getOperand(0))->getSExtValue(); + std::string PriorityStr = "." + std::to_string(Priority); + std::string NameStr = ((IsCtor ? "__init_array_" : "__fini_array_") + + F->getName() + "_" + std::to_string(Priority)) + .str(); + // PTX does not support exported names with '.' in them. + llvm::transform(NameStr, NameStr.begin(), + [](char c) { return c == '.' ? '_' : c; }); + + auto *GV = new GlobalVariable(M, F->getType(), /*IsConstant=*/true, + GlobalValue::ExternalLinkage, F, NameStr, + nullptr, GlobalValue::NotThreadLocal, + /*AddressSpace=*/4); + // This isn't respected by Nvidia, simply put here for clarity. + GV->setSection(IsCtor ? ".init_array" + PriorityStr + : ".fini_array" + PriorityStr); + GV->setVisibility(GlobalVariable::ProtectedVisibility); + appendToUsed(M, {GV}); + } + + GV->eraseFromParent(); + return true; +} + +static bool lowerCtorsAndDtors(Module &M) { + bool Modified = false; + Modified |= createInitOrFiniGlobls(M, "llvm.global_ctors", /*IsCtor =*/true); + Modified |= createInitOrFiniGlobls(M, "llvm.global_dtors", /*IsCtor =*/false); + return Modified; +} + +class NVPTXCtorDtorLoweringLegacy final : public ModulePass { +public: + static char ID; + NVPTXCtorDtorLoweringLegacy() : ModulePass(ID) {} + bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); } +}; + +} // End anonymous namespace + +PreservedAnalyses NVPTXCtorDtorLoweringPass::run(Module &M, + ModuleAnalysisManager &AM) { + return lowerCtorsAndDtors(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +char NVPTXCtorDtorLoweringLegacy::ID = 0; +char &llvm::NVPTXCtorDtorLoweringLegacyPassID = NVPTXCtorDtorLoweringLegacy::ID; +INITIALIZE_PASS(NVPTXCtorDtorLoweringLegacy, DEBUG_TYPE, + "Lower ctors and dtors for NVPTX", false, false) + +ModulePass *llvm::createNVPTXCtorDtorLoweringLegacyPass() { + return new NVPTXCtorDtorLoweringLegacy(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -15,6 +15,7 @@ #include "NVPTXAliasAnalysis.h" #include "NVPTXAllocaHoisting.h" #include "NVPTXAtomicLower.h" +#include "NVPTXCtorDtorLowering.h" #include "NVPTXLowerAggrCopies.h" #include "NVPTXMachineFunctionInfo.h" #include "NVPTXTargetObjectFile.h" @@ -68,8 +69,10 @@ void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXAtomicLowerPass(PassRegistry &); +void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &); void initializeNVPTXLowerAggrCopiesPass(PassRegistry &); void initializeNVPTXLowerAllocaPass(PassRegistry &); +void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &); void initializeNVPTXLowerArgsPass(PassRegistry &); void initializeNVPTXProxyRegErasurePass(PassRegistry &); void initializeNVVMIntrRangePass(PassRegistry &); @@ -95,6 +98,7 @@ initializeNVPTXAtomicLowerPass(PR); initializeNVPTXLowerArgsPass(PR); initializeNVPTXLowerAllocaPass(PR); + initializeNVPTXCtorDtorLoweringLegacyPass(PR); initializeNVPTXLowerAggrCopiesPass(PR); initializeNVPTXProxyRegErasurePass(PR); initializeNVPTXDAGToDAGISelPass(PR); @@ -249,6 +253,10 @@ PB.registerPipelineParsingCallback( [](StringRef PassName, ModulePassManager &PM, ArrayRef) { + if (PassName == "nvptx-lower-ctor-dtor") { + PM.addPass(NVPTXCtorDtorLoweringPass()); + return true; + } if (PassName == "generic-to-nvvm") { PM.addPass(GenericToNVVMPass()); return true; @@ -369,6 +377,7 @@ } addPass(createAtomicExpandPass()); + addPass(createNVPTXCtorDtorLoweringLegacyPass()); // === LSR and other generic IR passes === TargetPassConfig::addIRPasses(); diff --git a/llvm/test/CodeGen/NVPTX/global-ctor.ll b/llvm/test/CodeGen/NVPTX/global-ctor.ll deleted file mode 100644 --- a/llvm/test/CodeGen/NVPTX/global-ctor.ll +++ /dev/null @@ -1,9 +0,0 @@ -; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s - -; Check that llc dies when given a nonempty global ctor. -@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }] - -; CHECK: ERROR: Module has a nontrivial global ctor -define internal void @foo() { - ret void -} diff --git a/llvm/test/CodeGen/NVPTX/global-dtor.ll b/llvm/test/CodeGen/NVPTX/global-dtor.ll deleted file mode 100644 --- a/llvm/test/CodeGen/NVPTX/global-dtor.ll +++ /dev/null @@ -1,9 +0,0 @@ -; RUN: not --crash llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s - -; Check that llc dies when given a nonempty global dtor. -@llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }] - -; CHECK: ERROR: Module has a nontrivial global dtor -define internal void @foo() { - ret void -} diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -mtriple=nvptx64-- -nvptx-lower-ctor-dtor < %s | FileCheck %s +; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor < %s | FileCheck %s + +; Make sure we get the same result if we run multiple times +; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s +; RUN: llc -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY + +@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] +@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] + +; CHECK-NOT: @llvm.global_ctors +; CHECK-NOT: @llvm.global_dtors + +; CHECK: @__init_array_foo_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1" +; CHECK: @__fini_array_bar_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1" +; CHECK: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_foo_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_bar_1 to ptr)], section "llvm.metadata" + +; VISIBILITY: .visible .const .align 8 .u64 __init_array_foo_1 = foo; +; VISIBILITY: .visible .const .align 8 .u64 __fini_array_bar_1 = bar; + +define internal void @foo() { + ret void +} + +define internal void @bar() { + ret void +}