Diff 519448

clang/lib/Driver/ToolChains/Cuda.h

Show First 20 Lines • Show All 126 Lines • ▼ Show 20 Lines
} // end namespace NVPTX		} // end namespace NVPTX
} // end namespace tools		} // end namespace tools

namespace toolchains {		namespace toolchains {

class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {		class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
public:		public:
NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,		NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::Triple &HostTriple,		const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args,
const llvm::opt::ArgList &Args);		bool Freestanding);

NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,		NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);		const llvm::opt::ArgList &Args);

llvm::opt::DerivedArgList *		llvm::opt::DerivedArgList *
TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,		TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const override;		Action::OffloadKind DeviceOffloadKind) const override;

		void
		addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
		llvm::opt::ArgStringList &CC1Args,
		Action::OffloadKind DeviceOffloadKind) const override;

// Never try to use the integrated assembler with CUDA; always fork out to		// Never try to use the integrated assembler with CUDA; always fork out to
// ptxas.		// ptxas.
bool useIntegratedAs() const override { return false; }		bool useIntegratedAs() const override { return false; }
bool isCrossCompiling() const override { return true; }		bool isCrossCompiling() const override { return true; }
bool isPICDefault() const override { return false; }		bool isPICDefault() const override { return false; }
bool isPIEDefault(const llvm::opt::ArgList &Args) const override {		bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
return false;		return false;
}		}
Show All 10 Lines	public:
unsigned GetDefaultDwarfVersion() const override { return 2; }		unsigned GetDefaultDwarfVersion() const override { return 2; }
unsigned getMaxDwarfVersion() const override { return 2; }		unsigned getMaxDwarfVersion() const override { return 2; }

CudaInstallationDetector CudaInstallation;		CudaInstallationDetector CudaInstallation;

protected:		protected:
Tool *buildAssembler() const override; // ptxas.		Tool *buildAssembler() const override; // ptxas.
Tool *buildLinker() const override; // nvlink.		Tool *buildLinker() const override; // nvlink.

		private:
		bool Freestanding = false;
};		};

class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {		class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {
public:		public:
CudaToolChain(const Driver &D, const llvm::Triple &Triple,		CudaToolChain(const Driver &D, const llvm::Triple &Triple,
const ToolChain &HostTC, const llvm::opt::ArgList &Args);		const ToolChain &HostTC, const llvm::opt::ArgList &Args);

const llvm::Triple *getAuxTriple() const override {		const llvm::Triple *getAuxTriple() const override {
▲ Show 20 Lines • Show All 54 Lines • Show Last 20 Lines

clang/lib/Driver/ToolChains/Cuda.cpp

Show First 20 Lines • Show All 689 Lines • ▼ Show 20 Lines	#undef CASE_CUDA_VERSION
Features.push_back(PtxFeature);		Features.push_back(PtxFeature);
}		}

/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This		/// NVPTX toolchain. Our assembler is ptxas, and our linker is nvlink. This
/// operates as a stand-alone version of the NVPTX tools without the host		/// operates as a stand-alone version of the NVPTX tools without the host
/// toolchain.		/// toolchain.
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,		NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::Triple &HostTriple,		const llvm::Triple &HostTriple,
const ArgList &Args)		const ArgList &Args, bool Freestanding = false)
: ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {		: ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args),
		Freestanding(Freestanding) {
if (CudaInstallation.isValid()) {		if (CudaInstallation.isValid()) {
CudaInstallation.WarnIfUnsupportedVersion();		CudaInstallation.WarnIfUnsupportedVersion();
getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));		getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
}		}
// Lookup binaries into the driver directory, this is used to		// Lookup binaries into the driver directory, this is used to
// discover the 'nvptx-arch' executable.		// discover the 'nvptx-arch' executable.
getProgramPaths().push_back(getDriver().Dir);		getProgramPaths().push_back(getDriver().Dir);
}		}

/// We only need the host triple to locate the CUDA binary utilities, use the		/// We only need the host triple to locate the CUDA binary utilities, use the
/// system's default triple if not provided.		/// system's default triple if not provided.
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,		NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)		const ArgList &Args)
: NVPTXToolChain(D, Triple,		: NVPTXToolChain(D, Triple,
llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args) {}		llvm::Triple(llvm::sys::getDefaultTargetTriple()), Args,
		/Freestanding=/true) {}

llvm::opt::DerivedArgList *		llvm::opt::DerivedArgList *
NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,		NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
StringRef BoundArch,		StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind) const {		Action::OffloadKind DeviceOffloadKind) const {
DerivedArgList *DAL =		DerivedArgList *DAL =
ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind);		ToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
if (!DAL)		if (!DAL)
DAL = new DerivedArgList(Args.getBaseArgs());		DAL = new DerivedArgList(Args.getBaseArgs());

const OptTable &Opts = getDriver().getOpts();		const OptTable &Opts = getDriver().getOpts();

for (Arg *A : Args)		for (Arg *A : Args)
if (!llvm::is_contained(*DAL, A))		if (!llvm::is_contained(*DAL, A))
DAL->append(A);		DAL->append(A);

if (!DAL->hasArg(options::OPT_march_EQ))		if (!DAL->hasArg(options::OPT_march_EQ))
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),		DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
CudaArchToString(CudaArch::CudaDefault));		CudaArchToString(CudaArch::CudaDefault));

return DAL;		return DAL;
}		}

		void NVPTXToolChain::addClangTargetOptions(
		const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
		Action::OffloadKind DeviceOffloadingKind) const {
		// If we are compiling with a standalone NVPTX toolchain we want to try to
		// mimic a standard environment as much as possible. So we enable lowering
		// ctor / dtor functions to global symbols that can be registered.
		if (Freestanding)
		CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
		}

bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {		bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
const Option &O = A->getOption();		const Option &O = A->getOption();
return (O.matches(options::OPT_gN_Group) &&		return (O.matches(options::OPT_gN_Group) &&
!O.matches(options::OPT_gmodules)) \|\|		!O.matches(options::OPT_gmodules)) \|\|
O.matches(options::OPT_g_Flag) \|\|		O.matches(options::OPT_g_Flag) \|\|
O.matches(options::OPT_ggdbN_Group) \|\| O.matches(options::OPT_ggdb) \|\|		O.matches(options::OPT_ggdbN_Group) \|\| O.matches(options::OPT_ggdb) \|\|
O.matches(options::OPT_gdwarf) \|\| O.matches(options::OPT_gdwarf_2) \|\|		O.matches(options::OPT_gdwarf) \|\| O.matches(options::OPT_gdwarf_2) \|\|
O.matches(options::OPT_gdwarf_3) \|\| O.matches(options::OPT_gdwarf_4) \|\|		O.matches(options::OPT_gdwarf_3) \|\| O.matches(options::OPT_gdwarf_4) \|\|
▲ Show 20 Lines • Show All 269 Lines • Show Last 20 Lines

clang/test/Driver/cuda-cross-compiling.c

	Show First 20 Lines • Show All 62 Lines • ▼ Show 20 Lines
	// Test the generated arguments default to a value with no architecture.			// Test the generated arguments default to a value with no architecture.
	//			//
	// RUN: %clang -target nvptx64-nvidia-cuda -### %s 2>&1 \			// RUN: %clang -target nvptx64-nvidia-cuda -### %s 2>&1 \
	// RUN: \| FileCheck -check-prefix=DEFAULT %s			// RUN: \| FileCheck -check-prefix=DEFAULT %s

	// DEFAULT: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.}} "-target-cpu" "sm_35" "-target-feature" "+ptx{{[0-9]+}}" {{.}} "-o" "[[PTX:.+]].s"			// DEFAULT: -cc1" "-triple" "nvptx64-nvidia-cuda" "-S" {{.}} "-target-cpu" "sm_35" "-target-feature" "+ptx{{[0-9]+}}" {{.}} "-o" "[[PTX:.+]].s"
	// DEFAULT-NEXT: ptxas{{.*}}"-m64" "-O0" "--gpu-name" "sm_35" "--output-file" "[[CUBIN:.+]].cubin" "[[PTX]].s" "-c"			// DEFAULT-NEXT: ptxas{{.*}}"-m64" "-O0" "--gpu-name" "sm_35" "--output-file" "[[CUBIN:.+]].cubin" "[[PTX]].s" "-c"
	// DEFAULT-NEXT: nvlink{{.}}"-o" "a.out" "-arch" "sm_35" {{.}} "[[CUBIN]].cubin"			// DEFAULT-NEXT: nvlink{{.}}"-o" "a.out" "-arch" "sm_35" {{.}} "[[CUBIN]].cubin"

				//
				// Test to ensure that we enable handling global constructors in a freestanding
				// Nvidia compilation.
				//
				// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 %s -### 2>&1 \
				// RUN: \| FileCheck -check-prefix=LOWERING %s

				// LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"

llvm/lib/Target/NVPTX/CMakeLists.txt

Show All 31 Lines	set(NVPTXCodeGen_sources
NVPTXReplaceImageHandles.cpp		NVPTXReplaceImageHandles.cpp
NVPTXSubtarget.cpp		NVPTXSubtarget.cpp
NVPTXTargetMachine.cpp		NVPTXTargetMachine.cpp
NVPTXTargetTransformInfo.cpp		NVPTXTargetTransformInfo.cpp
NVPTXUtilities.cpp		NVPTXUtilities.cpp
NVVMIntrRange.cpp		NVVMIntrRange.cpp
NVVMReflect.cpp		NVVMReflect.cpp
NVPTXProxyRegErasure.cpp		NVPTXProxyRegErasure.cpp
		NVPTXCtorDtorLowering.cpp
)		)

add_llvm_target(NVPTXCodeGen		add_llvm_target(NVPTXCodeGen
${NVPTXCodeGen_sources}		${NVPTXCodeGen_sources}

LINK_COMPONENTS		LINK_COMPONENTS
Analysis		Analysis
AsmPrinter		AsmPrinter
Show All 21 Lines

llvm/lib/Target/NVPTX/NVPTX.h

Show All 33 Lines	enum CondCodes {
GE		GE
};		};
}		}

FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,		FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);		llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();		ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMLegacyPass();		ModulePass *createGenericToNVVMLegacyPass();
		ModulePass *createNVPTXCtorDtorLoweringLegacyPass();
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);		FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
FunctionPass *createNVVMReflectPass(unsigned int SmVersion);		FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
MachineFunctionPass *createNVPTXPrologEpilogPass();		MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();		MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();		FunctionPass *createNVPTXImageOptimizerPass();
FunctionPass *createNVPTXLowerArgsPass();		FunctionPass *createNVPTXLowerArgsPass();
FunctionPass *createNVPTXLowerAllocaPass();		FunctionPass *createNVPTXLowerAllocaPass();
MachineFunctionPass *createNVPTXPeephole();		MachineFunctionPass *createNVPTXPeephole();
▲ Show 20 Lines • Show All 146 Lines • Show Last 20 Lines

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Show First 20 Lines • Show All 86 Lines • ▼ Show 20 Lines
#include <cstring>		#include <cstring>
#include <new>		#include <new>
#include <string>		#include <string>
#include <utility>		#include <utility>
#include <vector>		#include <vector>

using namespace llvm;		using namespace llvm;

		static cl::opt<bool>
		LowerCtorDtor("nvptx-lower-global-ctor-dtor",
		cl::desc("Lower GPU ctor / dtors to globals on the device."),
		cl::init(false), cl::Hidden);

#define DEPOTNAME "__local_depot"		#define DEPOTNAME "__local_depot"

/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V		/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
/// depends.		/// depends.
static void		static void
DiscoverDependentGlobals(const Value *V,		DiscoverDependentGlobals(const Value *V,
DenseSet<const GlobalVariable *> &Globals) {		DenseSet<const GlobalVariable *> &Globals) {
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))		if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
▲ Show 20 Lines • Show All 680 Lines • ▼ Show 20 Lines	void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
OutStreamer->emitRawText(OS1.str());		OutStreamer->emitRawText(OS1.str());
}		}

bool NVPTXAsmPrinter::doInitialization(Module &M) {		bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (M.alias_size()) {		if (M.alias_size()) {
report_fatal_error("Module has aliases, which NVPTX does not support.");		report_fatal_error("Module has aliases, which NVPTX does not support.");
return true; // error		return true; // error
}		}
if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {		if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
		!LowerCtorDtor) {
report_fatal_error(		report_fatal_error(
"Module has a nontrivial global ctor, which NVPTX does not support.");		"Module has a nontrivial global ctor, which NVPTX does not support.");
return true; // error		return true; // error
}		}
if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {		if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
		!LowerCtorDtor) {
report_fatal_error(		report_fatal_error(
"Module has a nontrivial global dtor, which NVPTX does not support.");		"Module has a nontrivial global dtor, which NVPTX does not support.");
return true; // error		return true; // error
}		}

// We need to call the parent's one explicitly.		// We need to call the parent's one explicitly.
bool Result = AsmPrinter::doInitialization(M);		bool Result = AsmPrinter::doInitialization(M);

▲ Show 20 Lines • Show All 1,408 Lines • Show Last 20 Lines

llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h

This file was added.

				//===-- NVPTXCtorDtorLowering.h --------------------------------- C++ --===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//

				#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H
				#define LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H

				#include "llvm/IR/PassManager.h"

				namespace llvm {
				class Module;
				class PassRegistry;

				extern char &NVPTXCtorDtorLoweringLegacyPassID;
				extern void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);

				/// Lower llvm.global_ctors and llvm.global_dtors to special kernels.
				class NVPTXCtorDtorLoweringPass
				: public PassInfoMixin<NVPTXCtorDtorLoweringPass> {
				public:
				PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
				};

				} // namespace llvm

				#endif // LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H

llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp

This file was added.

				//===-- NVPTXCtorDtorLowering.cpp - Handle global ctors and dtors --------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				///
				/// \file
				/// This pass creates a unified init and fini kernel with the required metadata
				//===----------------------------------------------------------------------===//

				#include "NVPTXCtorDtorLowering.h"
				#include "NVPTX.h"
				#include "llvm/IR/Constants.h"
				#include "llvm/IR/Function.h"
				#include "llvm/IR/GlobalVariable.h"
				#include "llvm/IR/IRBuilder.h"
				#include "llvm/IR/Module.h"
				#include "llvm/IR/Value.h"
				#include "llvm/Pass.h"
				#include "llvm/Support/CommandLine.h"
				#include "llvm/Transforms/Utils/ModuleUtils.h"

				using namespace llvm;

				#define DEBUG_TYPE "nvptx-lower-ctor-dtor"

				static cl::opt<std::string>
				GlobalStr("nvptx-lower-global-ctor-dtor-id",
				cl::desc("Override unique ID of ctor/dtor globals."),
				cl::init(""), cl::Hidden);
				traUnsubmitted Not Done Reply Inline Actions We're not overriding the name, but rather the unique suffix for the names we generate. Perhaps rephrase along the lines of "Override unique ID for ctor/dtor globals" ? tra: We're not overriding the name, but rather the unique suffix for the names we generate. Perhaps…

				namespace {

				static std::string getHash(StringRef Str) {
				llvm::MD5 Hasher;
				llvm::MD5::MD5Result Hash;
				Hasher.update(Str);
				Hasher.final(Hash);
				return llvm::utohexstr(Hash.low(), /LowerCase=/true);
				}

				static bool createInitOrFiniGlobls(Module &M, StringRef GlobalName,
				bool IsCtor) {
				GlobalVariable *GV = M.getGlobalVariable(GlobalName);
				if (!GV \|\| !GV->hasInitializer())
				return false;
				ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer());
				if (!GA \|\| GA->getNumOperands() == 0)
				return false;

				// NVPTX has no way to emit variables at specific sections or support for
				// the traditional constructor sections. Instead, we emit mangled global
				// names so the runtime can build the list manually.
				for (Value *V : GA->operands()) {
				auto *CS = cast<ConstantStruct>(V);
				auto *F = cast<Constant>(CS->getOperand(1));
				uint64_t Priority = cast<ConstantInt>(CS->getOperand(0))->getSExtValue();
				traUnsubmitted Not Done Reply Inline Actions Source file name may be a little bit better, though it's still easy to clash if someone does `cd A; clang ./foo.c; cd ../B; clang ./foo.c` and the file name uses relative paths. I think we'll need a way to override this unique suffix explicitly as an escape hatch for cases where someone runs into a clash. tra: Source file name may be a little bit better, though it's still easy to clash if someone does…
				jhuber6AuthorUnsubmitted Done Reply Inline Actions I figured it'd be good enough since this is admittedly very niche. So someone would need to have a file called `foo.c` that also had a constructor called `foo` in it. For it to clash. Isn't it too late to grab the source filename while we're in the backend lowering stage? jhuber6: I figured it'd be good enough since this is admittedly very niche. So someone would need to…
				traUnsubmitted Not Done Reply Inline Actions someone would need to have a file called foo.c that also had a constructor called foo in it Unlikely != impossible. It's a trade-off between the hassle of implementing the plan B and the hassle of debugging and working around the clash for someone who runs into this. On one hand that's indeed unlikely to happen, but, given enough exposure, someone/somewhere will run into it and they will likely be ill-equipped to even tell what's going on. In general, compiler options are logistically much easier to deal with compared to having to change the source code. Isn't it too late to grab the source filename while we're in the backend lowering stage? The module already has the file name recorded and available via `llvm::Module::getSourceFileName()`, so it's as easy to get as the module name. tra: > someone would need to have a file called foo.c that also had a constructor called foo in it…
				traUnsubmitted Not Done Reply Inline Actions On the second thought, do you think we'll ever end up running this pass with a module created purely in memory w/o having a source file name. Or, perhaps even without the module name either? Even the hash of the IR itself will not be sufficient. Users are allowed to compile and link completely identical TUs as long as they don't have conflicting names. I can imagine some sort of "plugin" module with only private symbols, but which has initializers to register stuff on startup. Two identical instances of such a module should be able to work, but they would end up with identical hash in this scheme. I do not see any way to automatically disambiguate them, short of using random numbers, but that would make compilation results unstable. I still think we need to be able to provide the uniquifier manually via an option. tra: On the second thought, do you think we'll ever end up running this pass with a module created…
				jhuber6AuthorUnsubmitted Done Reply Inline Actions Yeah, I'm assuming they would just get a name conflict in that case. We can definitely add a special option that just adds some noise. jhuber6: Yeah, I'm assuming they would just get a name conflict in that case. We can definitely add a…
				std::string PriorityStr = "." + std::to_string(Priority);
				// We append a semi-unique hash and the priority to the global name.
				std::string GlobalID =
				!GlobalStr.empty() ? GlobalStr : getHash(M.getSourceFileName());
				std::string NameStr =
				((IsCtor ? "__init_array_object_" : "__fini_array_object_") +
				F->getName() + "_" + GlobalID + "_" + std::to_string(Priority))
				.str();
				// PTX does not support exported names with '.' in them.
				llvm::transform(NameStr, NameStr.begin(),
				[](char c) { return c == '.' ? '_' : c; });

				auto GV = new GlobalVariable(M, F->getType(), /IsConstant=*/true,
				GlobalValue::ExternalLinkage, F, NameStr,
				nullptr, GlobalValue::NotThreadLocal,
				/AddressSpace=/4);
				// This isn't respected by Nvidia, simply put here for clarity.
				GV->setSection(IsCtor ? ".init_array" + PriorityStr
				: ".fini_array" + PriorityStr);
				GV->setVisibility(GlobalVariable::ProtectedVisibility);
				appendToUsed(M, {GV});
				}

				GV->eraseFromParent();
				return true;
				}

				static bool lowerCtorsAndDtors(Module &M) {
				bool Modified = false;
				Modified \|= createInitOrFiniGlobls(M, "llvm.global_ctors", /IsCtor =/true);
				Modified \|= createInitOrFiniGlobls(M, "llvm.global_dtors", /IsCtor =/false);
				return Modified;
				}

				class NVPTXCtorDtorLoweringLegacy final : public ModulePass {
				public:
				static char ID;
				NVPTXCtorDtorLoweringLegacy() : ModulePass(ID) {}
				bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); }
				};

				} // End anonymous namespace

				PreservedAnalyses NVPTXCtorDtorLoweringPass::run(Module &M,
				ModuleAnalysisManager &AM) {
				return lowerCtorsAndDtors(M) ? PreservedAnalyses::none()
				: PreservedAnalyses::all();
				}

				char NVPTXCtorDtorLoweringLegacy::ID = 0;
				char &llvm::NVPTXCtorDtorLoweringLegacyPassID = NVPTXCtorDtorLoweringLegacy::ID;
				INITIALIZE_PASS(NVPTXCtorDtorLoweringLegacy, DEBUG_TYPE,
				"Lower ctors and dtors for NVPTX", false, false)

				ModulePass *llvm::createNVPTXCtorDtorLoweringLegacyPass() {
				return new NVPTXCtorDtorLoweringLegacy();
				}

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Show All 9 Lines
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "NVPTXTargetMachine.h"		#include "NVPTXTargetMachine.h"
#include "NVPTX.h"		#include "NVPTX.h"
#include "NVPTXAliasAnalysis.h"		#include "NVPTXAliasAnalysis.h"
#include "NVPTXAllocaHoisting.h"		#include "NVPTXAllocaHoisting.h"
#include "NVPTXAtomicLower.h"		#include "NVPTXAtomicLower.h"
		#include "NVPTXCtorDtorLowering.h"
#include "NVPTXLowerAggrCopies.h"		#include "NVPTXLowerAggrCopies.h"
#include "NVPTXMachineFunctionInfo.h"		#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXTargetObjectFile.h"		#include "NVPTXTargetObjectFile.h"
#include "NVPTXTargetTransformInfo.h"		#include "NVPTXTargetTransformInfo.h"
#include "TargetInfo/NVPTXTargetInfo.h"		#include "TargetInfo/NVPTXTargetInfo.h"
#include "llvm/ADT/STLExtras.h"		#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"		#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"		#include "llvm/CodeGen/Passes.h"
Show All 37 Lines	static cl::opt<bool> UseShortPointersOpt(
cl::init(false), cl::Hidden);		cl::init(false), cl::Hidden);

namespace llvm {		namespace llvm {

void initializeGenericToNVVMLegacyPassPass(PassRegistry &);		void initializeGenericToNVVMLegacyPassPass(PassRegistry &);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);		void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);		void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXAtomicLowerPass(PassRegistry &);		void initializeNVPTXAtomicLowerPass(PassRegistry &);
		void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);		void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);		void initializeNVPTXLowerAllocaPass(PassRegistry &);
		void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
void initializeNVPTXLowerArgsPass(PassRegistry &);		void initializeNVPTXLowerArgsPass(PassRegistry &);
void initializeNVPTXProxyRegErasurePass(PassRegistry &);		void initializeNVPTXProxyRegErasurePass(PassRegistry &);
void initializeNVVMIntrRangePass(PassRegistry &);		void initializeNVVMIntrRangePass(PassRegistry &);
void initializeNVVMReflectPass(PassRegistry &);		void initializeNVVMReflectPass(PassRegistry &);
void initializeNVPTXAAWrapperPassPass(PassRegistry &);		void initializeNVPTXAAWrapperPassPass(PassRegistry &);
void initializeNVPTXExternalAAWrapperPass(PassRegistry &);		void initializeNVPTXExternalAAWrapperPass(PassRegistry &);

} // end namespace llvm		} // end namespace llvm
Show All 9 Lines	extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
initializeNVVMReflectPass(PR);		initializeNVVMReflectPass(PR);
initializeNVVMIntrRangePass(PR);		initializeNVVMIntrRangePass(PR);
initializeGenericToNVVMLegacyPassPass(PR);		initializeGenericToNVVMLegacyPassPass(PR);
initializeNVPTXAllocaHoistingPass(PR);		initializeNVPTXAllocaHoistingPass(PR);
initializeNVPTXAssignValidGlobalNamesPass(PR);		initializeNVPTXAssignValidGlobalNamesPass(PR);
initializeNVPTXAtomicLowerPass(PR);		initializeNVPTXAtomicLowerPass(PR);
initializeNVPTXLowerArgsPass(PR);		initializeNVPTXLowerArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);		initializeNVPTXLowerAllocaPass(PR);
		initializeNVPTXCtorDtorLoweringLegacyPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);		initializeNVPTXLowerAggrCopiesPass(PR);
initializeNVPTXProxyRegErasurePass(PR);		initializeNVPTXProxyRegErasurePass(PR);
initializeNVPTXDAGToDAGISelPass(PR);		initializeNVPTXDAGToDAGISelPass(PR);
initializeNVPTXAAWrapperPassPass(PR);		initializeNVPTXAAWrapperPassPass(PR);
initializeNVPTXExternalAAWrapperPass(PR);		initializeNVPTXExternalAAWrapperPass(PR);
}		}

static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {		static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
▲ Show 20 Lines • Show All 138 Lines • ▼ Show 20 Lines	if (AAName == "nvptx-aa") {
return true;		return true;
}		}
return false;		return false;
});		});

PB.registerPipelineParsingCallback(		PB.registerPipelineParsingCallback(
[](StringRef PassName, ModulePassManager &PM,		[](StringRef PassName, ModulePassManager &PM,
ArrayRef<PassBuilder::PipelineElement>) {		ArrayRef<PassBuilder::PipelineElement>) {
		if (PassName == "nvptx-lower-ctor-dtor") {
		PM.addPass(NVPTXCtorDtorLoweringPass());
		return true;
		}
if (PassName == "generic-to-nvvm") {		if (PassName == "generic-to-nvvm") {
PM.addPass(GenericToNVVMPass());		PM.addPass(GenericToNVVMPass());
return true;		return true;
}		}
return false;		return false;
});		});

PB.registerPipelineStartEPCallback(		PB.registerPipelineStartEPCallback(
▲ Show 20 Lines • Show All 104 Lines • ▼ Show 20 Lines	void NVPTXPassConfig::addIRPasses() {
// before the address space inference passes.		// before the address space inference passes.
addPass(createNVPTXLowerArgsPass());		addPass(createNVPTXLowerArgsPass());
if (getOptLevel() != CodeGenOpt::None) {		if (getOptLevel() != CodeGenOpt::None) {
addAddressSpaceInferencePasses();		addAddressSpaceInferencePasses();
addStraightLineScalarOptimizationPasses();		addStraightLineScalarOptimizationPasses();
}		}

addPass(createAtomicExpandPass());		addPass(createAtomicExpandPass());
		addPass(createNVPTXCtorDtorLoweringLegacyPass());

// === LSR and other generic IR passes ===		// === LSR and other generic IR passes ===
TargetPassConfig::addIRPasses();		TargetPassConfig::addIRPasses();
// EarlyCSE is not always strong enough to clean up what LSR produces. For		// EarlyCSE is not always strong enough to clean up what LSR produces. For
// example, GVN can combine		// example, GVN can combine
//		//
// %0 = add %a, %b		// %0 = add %a, %b
// %1 = add %b, %a		// %1 = add %b, %a
▲ Show 20 Lines • Show All 113 Lines • Show Last 20 Lines

llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

This file was added.

				; RUN: opt -S -mtriple=nvptx64-- -nvptx-lower-ctor-dtor < %s \| FileCheck %s
				; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor < %s \| FileCheck %s
				; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor \
				; RUN: -nvptx-lower-global-ctor-dtor-id=unique_id < %s \| FileCheck %s --check-prefix=GLOBAL

				; Make sure we get the same result if we run multiple times
				; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s \| FileCheck %s
				; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s \| FileCheck %s -check-prefix=VISIBILITY

				@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
				@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]

				; CHECK-NOT: @llvm.global_ctors
				; CHECK-NOT: @llvm.global_dtors

				; CHECK: @__init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
				; CHECK: @__fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
				; CHECK: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_[[HASH]]_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_[[HASH]]_1 to ptr)], section "llvm.metadata"
				; GLOBAL: @__init_array_object_foo_unique_id_1 = protected addrspace(4) constant ptr @foo, section ".init_array.1"
				; GLOBAL: @__fini_array_object_bar_unique_id_1 = protected addrspace(4) constant ptr @bar, section ".fini_array.1"
				; GLOBAL: @llvm.used = appending global [2 x ptr] [ptr addrspacecast (ptr addrspace(4) @__init_array_object_foo_unique_id_1 to ptr), ptr addrspacecast (ptr addrspace(4) @__fini_array_object_bar_unique_id_1 to ptr)], section "llvm.metadata"

				; VISIBILITY: .visible .const .align 8 .u64 __init_array_object_foo_[[HASH:[0-9a-f]+]]_1 = foo;
				; VISIBILITY: .visible .const .align 8 .u64 __fini_array_object_bar_[[HASH:[0-9a-f]+]]_1 = bar;

				define internal void @foo() {
				ret void
				}

				define internal void @bar() {
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

[NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtors
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 519448

clang/lib/Driver/ToolChains/Cuda.h

clang/lib/Driver/ToolChains/Cuda.cpp

clang/test/Driver/cuda-cross-compiling.c

llvm/lib/Target/NVPTX/CMakeLists.txt

llvm/lib/Target/NVPTX/NVPTX.h

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h

llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

This is an archive of the discontinued LLVM Phabricator instance.

[NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtorsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 519448

clang/lib/Driver/ToolChains/Cuda.h

clang/lib/Driver/ToolChains/Cuda.cpp

clang/test/Driver/cuda-cross-compiling.c

llvm/lib/Target/NVPTX/CMakeLists.txt

llvm/lib/Target/NVPTX/NVPTX.h

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h

llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll

[NVPTX] Add NVPTXCtorDtorLoweringPass to handle global ctors / dtors
ClosedPublic