Diff 167980

include/clang/Basic/LangOptions.def

	Show First 20 Lines • Show All 205 Lines • ▼ Show 20 Lines
	LANGOPT(OpenMPCUDAForceFullRuntime , 1, 0, "Force to use full runtime in all constructs when offloading to CUDA devices")			LANGOPT(OpenMPCUDAForceFullRuntime , 1, 0, "Force to use full runtime in all constructs when offloading to CUDA devices")
	LANGOPT(OpenMPHostCXXExceptions , 1, 0, "C++ exceptions handling in the host code.")			LANGOPT(OpenMPHostCXXExceptions , 1, 0, "C++ exceptions handling in the host code.")
	LANGOPT(RenderScript , 1, 0, "RenderScript")			LANGOPT(RenderScript , 1, 0, "RenderScript")

	LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")			LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
	LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code")			LANGOPT(CUDAAllowVariadicFunctions, 1, 0, "allowing variadic functions in CUDA device code")
	LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__")			LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__")
	LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")			LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions")
	LANGOPT(CUDARelocatableDeviceCode, 1, 0, "generate relocatable device code")			LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code")

	LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")			LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
	LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")			LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
	LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable")			LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable")
	LANGOPT(NewAlignOverride , 32, 0, "maximum alignment guaranteed by '::operator new(size_t)'")			LANGOPT(NewAlignOverride , 32, 0, "maximum alignment guaranteed by '::operator new(size_t)'")
	LANGOPT(ConceptsTS , 1, 0, "enable C++ Extensions for Concepts")			LANGOPT(ConceptsTS , 1, 0, "enable C++ Extensions for Concepts")
	BENIGN_LANGOPT(ModulesCodegen , 1, 0, "Modules code generation")			BENIGN_LANGOPT(ModulesCodegen , 1, 0, "Modules code generation")
	BENIGN_LANGOPT(ModulesDebugInfo , 1, 0, "Modules debug info")			BENIGN_LANGOPT(ModulesDebugInfo , 1, 0, "Modules debug info")
	▲ Show 20 Lines • Show All 100 Lines • Show Last 20 Lines

include/clang/Driver/Options.td

	Show First 20 Lines • Show All 578 Lines • ▼ Show 20 Lines
	def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group<i_Group>,			def ptxas_path_EQ : Joined<["--"], "ptxas-path=">, Group<i_Group>,
	HelpText<"Path to ptxas (used for compiling CUDA code)">;			HelpText<"Path to ptxas (used for compiling CUDA code)">;
	def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">,			def fcuda_flush_denormals_to_zero : Flag<["-"], "fcuda-flush-denormals-to-zero">,
	Flags<[CC1Option]>, HelpText<"Flush denormal floating point values to zero in CUDA device mode.">;			Flags<[CC1Option]>, HelpText<"Flush denormal floating point values to zero in CUDA device mode.">;
	def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">;			def fno_cuda_flush_denormals_to_zero : Flag<["-"], "fno-cuda-flush-denormals-to-zero">;
	def fcuda_approx_transcendentals : Flag<["-"], "fcuda-approx-transcendentals">,			def fcuda_approx_transcendentals : Flag<["-"], "fcuda-approx-transcendentals">,
	Flags<[CC1Option]>, HelpText<"Use approximate transcendental functions">;			Flags<[CC1Option]>, HelpText<"Use approximate transcendental functions">;
	def fno_cuda_approx_transcendentals : Flag<["-"], "fno-cuda-approx-transcendentals">;			def fno_cuda_approx_transcendentals : Flag<["-"], "fno-cuda-approx-transcendentals">;
	def fcuda_rdc : Flag<["-"], "fcuda-rdc">, Flags<[CC1Option]>,			def fgpu_rdc : Flag<["-"], "fgpu-rdc">, Flags<[CC1Option]>,
	HelpText<"Generate relocatable device code, also known as separate compilation mode.">;			HelpText<"Generate relocatable device code, also known as separate compilation mode.">;
	def fno_cuda_rdc : Flag<["-"], "fno-cuda-rdc">;			def fno_gpu_rdc : Flag<["-"], "fno-gpu-rdc">;
				def : Flag<["-"], "fcuda-rdc">, Alias<fgpu_rdc>;
				def : Flag<["-"], "fno-cuda-rdc">, Alias<fno_gpu_rdc>;
				traUnsubmitted Not Done Reply Inline Actions Considering that -f[no-]cuda-rdc has been around for a while, we should still keep it around as an alias to -f[no-]gpu-rdc tra: Considering that -f[no-]cuda-rdc has been around for a while, we should still keep it around as…
	def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,			def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
	HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">;			HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">;
	def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;			def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
	def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group<Link_Group>,			def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group<Link_Group>,
	HelpText<"HIP device library path">;			HelpText<"HIP device library path">;
	def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group<Link_Group>,			def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group<Link_Group>,
	HelpText<"HIP device library">;			HelpText<"HIP device library">;
	def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">,			def fhip_dump_offload_linker_script : Flag<["-"], "fhip-dump-offload-linker-script">,
	▲ Show 20 Lines • Show All 2,452 Lines • Show Last 20 Lines

include/clang/Driver/Types.def

	Show First 20 Lines • Show All 95 Lines • ▼ Show 20 Lines
	TYPE("remap", Remap, INVALID, "remap", "")			TYPE("remap", Remap, INVALID, "remap", "")
	TYPE("precompiled-header", PCH, INVALID, "gch", "A")			TYPE("precompiled-header", PCH, INVALID, "gch", "A")
	TYPE("object", Object, INVALID, "o", "")			TYPE("object", Object, INVALID, "o", "")
	TYPE("treelang", Treelang, INVALID, nullptr, "u")			TYPE("treelang", Treelang, INVALID, nullptr, "u")
	TYPE("image", Image, INVALID, "out", "")			TYPE("image", Image, INVALID, "out", "")
	TYPE("dSYM", dSYM, INVALID, "dSYM", "A")			TYPE("dSYM", dSYM, INVALID, "dSYM", "A")
	TYPE("dependencies", Dependencies, INVALID, "d", "")			TYPE("dependencies", Dependencies, INVALID, "d", "")
	TYPE("cuda-fatbin", CUDA_FATBIN, INVALID, "fatbin","A")			TYPE("cuda-fatbin", CUDA_FATBIN, INVALID, "fatbin","A")
				TYPE("hip-fatbin", HIP_FATBIN, INVALID, "hipfb", "A")
	TYPE("none", Nothing, INVALID, nullptr, "u")			TYPE("none", Nothing, INVALID, nullptr, "u")

lib/AST/Decl.cpp

Show First 20 Lines • Show All 2,453 Lines • ▼ Show 20 Lines	bool VarDecl::isKnownToBeDefined() const {
const auto &LangOpts = getASTContext().getLangOpts();		const auto &LangOpts = getASTContext().getLangOpts();
// In CUDA mode without relocatable device code, variables of form 'extern		// In CUDA mode without relocatable device code, variables of form 'extern
// __shared__ Foo foo[]' are pointers to the base of the GPU core's shared		// __shared__ Foo foo[]' are pointers to the base of the GPU core's shared
// memory pool. These are never undefined variables, even if they appear		// memory pool. These are never undefined variables, even if they appear
// inside of an anon namespace or static function.		// inside of an anon namespace or static function.
//		//
// With CUDA relocatable device code enabled, these variables don't get		// With CUDA relocatable device code enabled, these variables don't get
// special handling; they're treated like regular extern variables.		// special handling; they're treated like regular extern variables.
if (LangOpts.CUDA && !LangOpts.CUDARelocatableDeviceCode &&		if (LangOpts.CUDA && !LangOpts.GPURelocatableDeviceCode &&
hasExternalStorage() && hasAttr<CUDASharedAttr>() &&		hasExternalStorage() && hasAttr<CUDASharedAttr>() &&
isa<IncompleteArrayType>(getType()))		isa<IncompleteArrayType>(getType()))
return true;		return true;

return hasDefinition();		return hasDefinition();
}		}

bool VarDecl::isNoDestroy(const ASTContext &Ctx) const {		bool VarDecl::isNoDestroy(const ASTContext &Ctx) const {
▲ Show 20 Lines • Show All 2,224 Lines • Show Last 20 Lines

lib/CodeGen/CGCUDANV.cpp

Show First 20 Lines • Show All 131 Lines • ▼ Show 20 Lines	CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
if (CGM.getLangOpts().HIP)		if (CGM.getLangOpts().HIP)
return ((Twine("__hip") + Twine(FuncName)).str());		return ((Twine("__hip") + Twine(FuncName)).str());
return ((Twine("__cuda") + Twine(FuncName)).str());		return ((Twine("__cuda") + Twine(FuncName)).str());
}		}

CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)		CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),		: CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
TheModule(CGM.getModule()),		TheModule(CGM.getModule()),
RelocatableDeviceCode(CGM.getLangOpts().CUDARelocatableDeviceCode) {		RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode) {
CodeGen::CodeGenTypes &Types = CGM.getTypes();		CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();		ASTContext &Ctx = CGM.getContext();

IntTy = CGM.IntTy;		IntTy = CGM.IntTy;
SizeTy = CGM.SizeTy;		SizeTy = CGM.SizeTy;
VoidTy = CGM.VoidTy;		VoidTy = CGM.VoidTy;

CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));		CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
▲ Show 20 Lines • Show All 199 Lines • ▼ Show 20 Lines	llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// struct { int magic, int version, void * gpu_binary, void * dont_care };		// struct { int magic, int version, void * gpu_binary, void * dont_care };
llvm::StructType *FatbinWrapperTy =		llvm::StructType *FatbinWrapperTy =
llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);		llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);

// Register GPU binary with the CUDA runtime, store returned handle in a		// Register GPU binary with the CUDA runtime, store returned handle in a
// global variable and save a reference in GpuBinaryHandle to be cleaned up		// global variable and save a reference in GpuBinaryHandle to be cleaned up
// in destructor on exit. Then associate all known kernels with the GPU binary		// in destructor on exit. Then associate all known kernels with the GPU binary
// handle so CUDA runtime can figure out what to call on the GPU side.		// handle so CUDA runtime can figure out what to call on the GPU side.
std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary;		std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr;
if (!IsHIP) {		if (!CudaGpuBinaryFileName.empty()) {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =		llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);		llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {		if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
CGM.getDiags().Report(diag::err_cannot_open_file)		CGM.getDiags().Report(diag::err_cannot_open_file)
<< CudaGpuBinaryFileName << EC.message();		<< CudaGpuBinaryFileName << EC.message();
return nullptr;		return nullptr;
}		}
CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());		CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
Show All 17 Lines	llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
unsigned FatMagic;		unsigned FatMagic;
if (IsHIP) {		if (IsHIP) {
FatbinConstantName = ".hip_fatbin";		FatbinConstantName = ".hip_fatbin";
FatbinSectionName = ".hipFatBinSegment";		FatbinSectionName = ".hipFatBinSegment";

ModuleIDSectionName = "__hip_module_id";		ModuleIDSectionName = "__hip_module_id";
ModuleIDPrefix = "__hip_";		ModuleIDPrefix = "__hip_";

// For HIP, create an external symbol __hip_fatbin in section .hip_fatbin.		if (CudaGpuBinary) {
// The external symbol is supposed to contain the fat binary but will be		// If fatbin is available from early finalization, create a string
// populated somewhere else, e.g. by lld through link script.		// literal containing the fat binary loaded from the given file.
		FatBinStr = makeConstantString(CudaGpuBinary->getBuffer(), "",
		FatbinConstantName, 8);
		} else {
		// If fatbin is not available, create an external symbol
		// __hip_fatbin in section .hip_fatbin. The external symbol is supposed
		// to contain the fat binary but will be populated somewhere else,
		// e.g. by lld through link script.
FatBinStr = new llvm::GlobalVariable(		FatBinStr = new llvm::GlobalVariable(
CGM.getModule(), CGM.Int8Ty,		CGM.getModule(), CGM.Int8Ty,
/isConstant=/true, llvm::GlobalValue::ExternalLinkage, nullptr,		/isConstant=/true, llvm::GlobalValue::ExternalLinkage, nullptr,
"__hip_fatbin", nullptr,		"__hip_fatbin", nullptr,
llvm::GlobalVariable::NotThreadLocal);		llvm::GlobalVariable::NotThreadLocal);
cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);		cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
		}

FatMagic = HIPFatMagic;		FatMagic = HIPFatMagic;
} else {		} else {
if (RelocatableDeviceCode)		if (RelocatableDeviceCode)
FatbinConstantName = CGM.getTriple().isMacOSX()		FatbinConstantName = CGM.getTriple().isMacOSX()
? "__NV_CUDA,__nv_relfatbin"		? "__NV_CUDA,__nv_relfatbin"
: "__nv_relfatbin";		: "__nv_relfatbin";
else		else
Show All 34 Lines	llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
// There is only one HIP fat binary per linked module, however there are		// There is only one HIP fat binary per linked module, however there are
// multiple constructor functions. Make sure the fat binary is registered		// multiple constructor functions. Make sure the fat binary is registered
// only once. The constructor functions are executed by the dynamic loader		// only once. The constructor functions are executed by the dynamic loader
// before the program gains control. The dynamic loader cannot execute the		// before the program gains control. The dynamic loader cannot execute the
// constructor functions concurrently since doing that would not guarantee		// constructor functions concurrently since doing that would not guarantee
// thread safety of the loaded program. Therefore we can assume sequential		// thread safety of the loaded program. Therefore we can assume sequential
// execution of constructor functions here.		// execution of constructor functions here.
if (IsHIP) {		if (IsHIP) {
		auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage :
		llvm::GlobalValue::LinkOnceAnyLinkage;
llvm::BasicBlock *IfBlock =		llvm::BasicBlock *IfBlock =
llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);		llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
llvm::BasicBlock *ExitBlock =		llvm::BasicBlock *ExitBlock =
llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc);		llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc);
// The name, size, and initialization pattern of this variable is part		// The name, size, and initialization pattern of this variable is part
// of HIP ABI.		// of HIP ABI.
GpuBinaryHandle = new llvm::GlobalVariable(		GpuBinaryHandle = new llvm::GlobalVariable(
TheModule, VoidPtrPtrTy, /isConstant=/false,		TheModule, VoidPtrPtrTy, /isConstant=/false,
llvm::GlobalValue::LinkOnceAnyLinkage,		Linkage,
/Initializer=/llvm::ConstantPointerNull::get(VoidPtrPtrTy),		/Initializer=/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
"__hip_gpubin_handle");		"__hip_gpubin_handle");
GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());		GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getQuantity());
// Prevent the weak symbol in different shared libraries being merged.		// Prevent the weak symbol in different shared libraries being merged.
		if (Linkage != llvm::GlobalValue::InternalLinkage)
GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);		GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
Address GpuBinaryAddr(		Address GpuBinaryAddr(
GpuBinaryHandle,		GpuBinaryHandle,
CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));		CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
{		{
auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);		auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
llvm::Constant *Zero =		llvm::Constant *Zero =
llvm::Constant::getNullValue(HandleValue->getType());		llvm::Constant::getNullValue(HandleValue->getType());
llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);		llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
▲ Show 20 Lines • Show All 150 Lines • Show Last 20 Lines

lib/Driver/Driver.cpp

Show First 20 Lines • Show All 2,480 Lines • ▼ Show 20 Lines	getDeviceDependences(OffloadAction::DeviceDependences &DA,
return ABRT_Success;		return ABRT_Success;
}		}
};		};
/// \brief HIP action builder. It injects device code in the host backend		/// \brief HIP action builder. It injects device code in the host backend
/// action.		/// action.
class HIPActionBuilder final : public CudaActionBuilderBase {		class HIPActionBuilder final : public CudaActionBuilderBase {
/// The linker inputs obtained for each device arch.		/// The linker inputs obtained for each device arch.
SmallVector<ActionList, 8> DeviceLinkerInputs;		SmallVector<ActionList, 8> DeviceLinkerInputs;
		bool Relocatable;

public:		public:
HIPActionBuilder(Compilation &C, DerivedArgList &Args,		HIPActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)		const Driver::InputList &Inputs)
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}		: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP),
		Relocatable(false) {}

bool canUseBundlerUnbundler() const override { return true; }		bool canUseBundlerUnbundler() const override { return true; }

ActionBuilderReturnCode		ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,		getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,		phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) override {		PhasesTy &Phases) override {
// amdgcn does not support linking of object files, therefore we skip		// amdgcn does not support linking of object files, therefore we skip
// backend and assemble phases to output LLVM IR.		// backend and assemble phases to output LLVM IR. Except for generating
if (CudaDeviceActions.empty() \|\| CurPhase == phases::Backend \|\|		// non-relocatable device coee, where we generate fat binary for device
		// code and pass to host in Backend phase.
		if (CudaDeviceActions.empty() \|\|
		(CurPhase == phases::Backend && Relocatable) \|\|
CurPhase == phases::Assemble)		CurPhase == phases::Assemble)
return ABRT_Success;		return ABRT_Success;

assert((CurPhase == phases::Link \|\|		assert(((CurPhase == phases::Link && Relocatable) \|\|
CudaDeviceActions.size() == GpuArchList.size()) &&		CudaDeviceActions.size() == GpuArchList.size()) &&
"Expecting one action per GPU architecture.");		"Expecting one action per GPU architecture.");
assert(!CompileHostOnly &&		assert(!CompileHostOnly &&
"Not expecting CUDA actions in host-only compilation.");		"Not expecting CUDA actions in host-only compilation.");

		if (!Relocatable && CurPhase == phases::Backend) {
		// If we are in backend phase, we attempt to generate the fat binary.
		// We compile each arch to IR and use a link action to generate code
		// object containing ISA. Then we use a special "link" action to create
		// a fat binary containing all the code objects for different GPU's.
		// The fat binary is then an input to the host action.
		for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
		// Create a link action to link device IR with device library
		// and generate ISA.
		ActionList AL;
		AL.push_back(CudaDeviceActions[I]);
		CudaDeviceActions[I] =
		C.MakeAction<LinkJobAction>(AL, types::TY_Image);

		// OffloadingActionBuilder propagates device arch until an offload
		// action. Since the next action for creating fatbin does
		// not have device arch, whereas the above link action and its input
		// have device arch, an offload action is needed to stop the null
		// device arch of the next action being propagated to the above link
		// action.
		OffloadAction::DeviceDependences DDep;
		DDep.add(CudaDeviceActions[I], ToolChains.front(),
		CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
		CudaDeviceActions[I] = C.MakeAction<OffloadAction>(
		DDep, CudaDeviceActions[I]->getType());
		}
		// Create HIP fat binary with a special "link" action.
		CudaFatBinary =
		C.MakeAction<LinkJobAction>(CudaDeviceActions,
		types::TY_HIP_FATBIN);

		DA.add(CudaFatBinary, ToolChains.front(), /BoundArch=/nullptr,
		AssociatedOffloadKind);
		// Clear the fat binary, it is already a dependence to an host
		// action.
		CudaFatBinary = nullptr;

		// Remove the CUDA actions as they are already connected to an host
		// action or fat binary.
		CudaDeviceActions.clear();

		return ABRT_Success;
		} else if (CurPhase == phases::Link) {
// Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.		// Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.
// This happens to each device action originated from each input file.		// This happens to each device action originated from each input file.
// Later on, device actions in DeviceLinkerInputs are used to create		// Later on, device actions in DeviceLinkerInputs are used to create
// device link actions in appendLinkDependences and the created device		// device link actions in appendLinkDependences and the created device
// link actions are passed to the offload action as device dependence.		// link actions are passed to the offload action as device dependence.
if (CurPhase == phases::Link) {
DeviceLinkerInputs.resize(CudaDeviceActions.size());		DeviceLinkerInputs.resize(CudaDeviceActions.size());
auto LI = DeviceLinkerInputs.begin();		auto LI = DeviceLinkerInputs.begin();
for (auto *A : CudaDeviceActions) {		for (auto *A : CudaDeviceActions) {
LI->push_back(A);		LI->push_back(A);
++LI;		++LI;
}		}

// We will pass the device action as a host dependence, so we don't		// We will pass the device action as a host dependence, so we don't
Show All 16 Lines	void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
for (auto &LI : DeviceLinkerInputs) {		for (auto &LI : DeviceLinkerInputs) {
auto *DeviceLinkAction =		auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);		C.MakeAction<LinkJobAction>(LI, types::TY_Image);
DA.add(DeviceLinkAction, ToolChains[0],		DA.add(DeviceLinkAction, ToolChains[0],
CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);		CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
++I;		++I;
}		}
}		}

		bool initialize() override {
		Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
		options::OPT_fno_gpu_rdc, /Default=/false);

		return CudaActionBuilderBase::initialize();
		}
};		};

/// OpenMP action builder. The host bitcode is passed to the device frontend		/// OpenMP action builder. The host bitcode is passed to the device frontend
/// and all the device linked images are passed to the host link phase.		/// and all the device linked images are passed to the host link phase.
class OpenMPActionBuilder final : public DeviceActionBuilder {		class OpenMPActionBuilder final : public DeviceActionBuilder {
/// The OpenMP actions for the current input.		/// The OpenMP actions for the current input.
ActionList OpenMPDeviceActions;		ActionList OpenMPDeviceActions;

▲ Show 20 Lines • Show All 2,043 Lines • Show Last 20 Lines

lib/Driver/ToolChains/Clang.cpp

Show First 20 Lines • Show All 4,914 Lines • ▼ Show 20 Lines	for (const char *OriginalArg : OriginalArgs) {
EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);		EscapeSpacesAndBackslashes(OriginalArg, EscapedArg);
Flags += " ";		Flags += " ";
Flags += EscapedArg;		Flags += EscapedArg;
}		}
CmdArgs.push_back("-dwarf-debug-flags");		CmdArgs.push_back("-dwarf-debug-flags");
CmdArgs.push_back(Args.MakeArgString(Flags));		CmdArgs.push_back(Args.MakeArgString(Flags));
}		}

if (IsCuda) {
// Host-side cuda compilation receives all device-side outputs in a single		// Host-side cuda compilation receives all device-side outputs in a single
// fatbin as Inputs[1]. Include the binary with -fcuda-include-gpubinary.		// fatbin as Inputs[1]. Include the binary with -fcuda-include-gpubinary.
if (CudaDeviceInput) {		if ((IsCuda \|\| IsHIP) && CudaDeviceInput) {
CmdArgs.push_back("-fcuda-include-gpubinary");		CmdArgs.push_back("-fcuda-include-gpubinary");
CmdArgs.push_back(CudaDeviceInput->getFilename());		CmdArgs.push_back(CudaDeviceInput->getFilename());
		if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
		CmdArgs.push_back("-fgpu-rdc");
}		}

if (Args.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, false))		if (IsCuda) {
CmdArgs.push_back("-fcuda-rdc");
if (Args.hasFlag(options::OPT_fcuda_short_ptr,		if (Args.hasFlag(options::OPT_fcuda_short_ptr,
options::OPT_fno_cuda_short_ptr, false))		options::OPT_fno_cuda_short_ptr, false))
CmdArgs.push_back("-fcuda-short-ptr");		CmdArgs.push_back("-fcuda-short-ptr");
}		}

// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path		// OpenMP offloading device jobs take the argument -fopenmp-host-ir-file-path
// to specify the result of the compile phase on the host, so the meaningful		// to specify the result of the compile phase on the host, so the meaningful
// device declarations can be identified. Also, -fopenmp-is-device is passed		// device declarations can be identified. Also, -fopenmp-is-device is passed
▲ Show 20 Lines • Show All 977 Lines • Show Last 20 Lines

lib/Driver/ToolChains/CommonArgs.cpp

Show All 9 Lines
#include "CommonArgs.h"		#include "CommonArgs.h"
#include "Arch/AArch64.h"		#include "Arch/AArch64.h"
#include "Arch/ARM.h"		#include "Arch/ARM.h"
#include "Arch/Mips.h"		#include "Arch/Mips.h"
#include "Arch/PPC.h"		#include "Arch/PPC.h"
#include "Arch/SystemZ.h"		#include "Arch/SystemZ.h"
#include "Arch/X86.h"		#include "Arch/X86.h"
#include "Hexagon.h"		#include "Hexagon.h"
		#include "HIP.h"
#include "InputInfo.h"		#include "InputInfo.h"
#include "clang/Basic/CharInfo.h"		#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LangOptions.h"		#include "clang/Basic/LangOptions.h"
#include "clang/Basic/ObjCRuntime.h"		#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/Version.h"		#include "clang/Basic/Version.h"
#include "clang/Basic/VirtualFileSystem.h"		#include "clang/Basic/VirtualFileSystem.h"
#include "clang/Config/config.h"		#include "clang/Config/config.h"
#include "clang/Driver/Action.h"		#include "clang/Driver/Action.h"
▲ Show 20 Lines • Show All 1,306 Lines • ▼ Show 20 Lines	void tools::AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
const InputInfoList &Inputs, const ArgList &Args,		const InputInfoList &Inputs, const ArgList &Args,
ArgStringList &CmdArgs, const JobAction &JA,		ArgStringList &CmdArgs, const JobAction &JA,
const Tool &T) {		const Tool &T) {

// If this is not a HIP host toolchain, we don't need to do anything.		// If this is not a HIP host toolchain, we don't need to do anything.
if (!JA.isHostOffloading(Action::OFK_HIP))		if (!JA.isHostOffloading(Action::OFK_HIP))
return;		return;

		InputInfoList DeviceInputs;
		for (const auto &II : Inputs) {
		const Action *A = II.getAction();
		// Is this a device linking action?
		if (A && isa<LinkJobAction>(A) && A->isDeviceOffloading(Action::OFK_HIP)) {
		DeviceInputs.push_back(II);
		}
		}

		if (DeviceInputs.empty())
		return;

// Create temporary linker script. Keep it if save-temps is enabled.		// Create temporary linker script. Keep it if save-temps is enabled.
const char *LKS;		const char *LKS;
SmallString<256> Name = llvm::sys::path::filename(Output.getFilename());		SmallString<256> Name = llvm::sys::path::filename(Output.getFilename());
if (C.getDriver().isSaveTempsEnabled()) {		if (C.getDriver().isSaveTempsEnabled()) {
llvm::sys::path::replace_extension(Name, "lk");		llvm::sys::path::replace_extension(Name, "lk");
LKS = C.getArgs().MakeArgString(Name.c_str());		LKS = C.getArgs().MakeArgString(Name.c_str());
} else {		} else {
llvm::sys::path::replace_extension(Name, "");		llvm::sys::path::replace_extension(Name, "");
Show All 11 Lines	void tools::AddHIPLinkerScript(const ToolChain &TC, Compilation &C,

// Get the HIP offload tool chain.		// Get the HIP offload tool chain.
auto HIPTC = static_cast<const toolchains::CudaToolChain >(		auto HIPTC = static_cast<const toolchains::CudaToolChain >(
C.getSingleOffloadToolChain<Action::OFK_HIP>());		C.getSingleOffloadToolChain<Action::OFK_HIP>());
assert(HIPTC->getTriple().getArch() == llvm::Triple::amdgcn &&		assert(HIPTC->getTriple().getArch() == llvm::Triple::amdgcn &&
"Wrong platform");		"Wrong platform");
(void)HIPTC;		(void)HIPTC;

// Construct clang-offload-bundler command to bundle object files for		// The output file name needs to persist through the compilation, therefore
// for different GPU archs.		// it needs to be created through MakeArgString.
ArgStringList BundlerArgs;		std::string BundleFileName = C.getDriver().GetTemporaryPath("BUNDLE", "hipfb");
BundlerArgs.push_back(Args.MakeArgString("-type=o"));

// ToDo: Remove the dummy host binary entry which is required by
// clang-offload-bundler.
std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
std::string BundlerInputArg = "-inputs=/dev/null";

for (const auto &II : Inputs) {
const Action *A = II.getAction();
// Is this a device linking action?
if (A && isa<LinkJobAction>(A) && A->isDeviceOffloading(Action::OFK_HIP)) {
BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" +
StringRef(A->getOffloadingArch()).str();
BundlerInputArg = BundlerInputArg + "," + II.getFilename();
}
}
BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));

std::string BundleFileName = C.getDriver().GetTemporaryPath("BUNDLE", "o");
const char *BundleFile =		const char *BundleFile =
C.addTempFile(C.getArgs().MakeArgString(BundleFileName.c_str()));		C.addTempFile(C.getArgs().MakeArgString(BundleFileName.c_str()));
auto BundlerOutputArg =		AMDGCN::constructHIPFatbinCommand(C, JA, BundleFile, DeviceInputs, Args, T);
Args.MakeArgString(std::string("-outputs=").append(BundleFile));
BundlerArgs.push_back(BundlerOutputArg);

SmallString<128> BundlerPath(C.getDriver().Dir);
llvm::sys::path::append(BundlerPath, "clang-offload-bundler");
const char *Bundler = Args.MakeArgString(BundlerPath);
C.addCommand(llvm::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));

// Add commands to embed target binaries. We ensure that each section and		// Add commands to embed target binaries. We ensure that each section and
// image is 16-byte aligned. This is not mandatory, but increases the		// image is 16-byte aligned. This is not mandatory, but increases the
// likelihood of data to be aligned with a cache block in several main host		// likelihood of data to be aligned with a cache block in several main host
// machines.		// machines.
LksStream << "/*\n";		LksStream << "/*\n";
LksStream << " HIP Offload Linker Script\n";		LksStream << " HIP Offload Linker Script\n";
LksStream << " * Automatically generated by Clang *\n";		LksStream << " * Automatically generated by Clang *\n";
▲ Show 20 Lines • Show All 59 Lines • Show Last 20 Lines

lib/Driver/ToolChains/Cuda.cpp

Show First 20 Lines • Show All 392 Lines • ▼ Show 20 Lines	void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,

bool Relocatable = false;		bool Relocatable = false;
if (JA.isOffloading(Action::OFK_OpenMP))		if (JA.isOffloading(Action::OFK_OpenMP))
// In OpenMP we need to generate relocatable code.		// In OpenMP we need to generate relocatable code.
Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,		Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
options::OPT_fnoopenmp_relocatable_target,		options::OPT_fnoopenmp_relocatable_target,
/Default=/true);		/Default=/true);
else if (JA.isOffloading(Action::OFK_Cuda))		else if (JA.isOffloading(Action::OFK_Cuda))
Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,		Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
options::OPT_fno_cuda_rdc, /Default=/false);		options::OPT_fno_gpu_rdc, /Default=/false);

if (Relocatable)		if (Relocatable)
CmdArgs.push_back("-c");		CmdArgs.push_back("-c");

const char *Exec;		const char *Exec;
if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))		if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
Exec = A->getValue();		Exec = A->getValue();
else		else
▲ Show 20 Lines • Show All 193 Lines • ▼ Show 20 Lines	if (DeviceOffloadingKind == Action::OFK_Cuda) {
if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,		if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
options::OPT_fno_cuda_flush_denormals_to_zero, false))		options::OPT_fno_cuda_flush_denormals_to_zero, false))
CC1Args.push_back("-fcuda-flush-denormals-to-zero");		CC1Args.push_back("-fcuda-flush-denormals-to-zero");

if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,		if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
options::OPT_fno_cuda_approx_transcendentals, false))		options::OPT_fno_cuda_approx_transcendentals, false))
CC1Args.push_back("-fcuda-approx-transcendentals");		CC1Args.push_back("-fcuda-approx-transcendentals");

if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,		if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))		false))
CC1Args.push_back("-fcuda-rdc");		CC1Args.push_back("-fgpu-rdc");
}		}

if (DriverArgs.hasArg(options::OPT_nocudalib))		if (DriverArgs.hasArg(options::OPT_nocudalib))
return;		return;

std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);		std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);

if (LibDeviceFile.empty()) {		if (LibDeviceFile.empty()) {
▲ Show 20 Lines • Show All 223 Lines • Show Last 20 Lines

lib/Driver/ToolChains/HIP.h

	Show All 13 Lines
	#include "clang/Driver/Tool.h"			#include "clang/Driver/Tool.h"

	namespace clang {			namespace clang {
	namespace driver {			namespace driver {

	namespace tools {			namespace tools {

	namespace AMDGCN {			namespace AMDGCN {
				// Construct command for creating HIP fatbin.
				void constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
				StringRef OutputFileName, const InputInfoList &Inputs,
				const llvm::opt::ArgList &TCArgs, const Tool& T);

	// Runs llvm-link/opt/llc/lld, which links multiple LLVM bitcode, together with			// Runs llvm-link/opt/llc/lld, which links multiple LLVM bitcode, together with
	// device library, then compiles it to ISA in a shared object.			// device library, then compiles it to ISA in a shared object.
	class LLVM_LIBRARY_VISIBILITY Linker : public Tool {			class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
	public:			public:
	Linker(const ToolChain &TC) : Tool("AMDGCN::Linker", "amdgcn-link", TC) {}			Linker(const ToolChain &TC) : Tool("AMDGCN::Linker", "amdgcn-link", TC) {}

	bool hasIntegratedCPP() const override { return false; }			bool hasIntegratedCPP() const override { return false; }

	▲ Show 20 Lines • Show All 94 Lines • Show Last 20 Lines

lib/Driver/ToolChains/HIP.cpp

Show First 20 Lines • Show All 178 Lines • ▼ Show 20 Lines	ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined",
"-shared", "-o", Output.getFilename(),		"-shared", "-o", Output.getFilename(),
InputFileName};		InputFileName};
SmallString<128> LldPath(C.getDriver().Dir);		SmallString<128> LldPath(C.getDriver().Dir);
llvm::sys::path::append(LldPath, "lld");		llvm::sys::path::append(LldPath, "lld");
const char *Lld = Args.MakeArgString(LldPath);		const char *Lld = Args.MakeArgString(LldPath);
C.addCommand(llvm::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs));		C.addCommand(llvm::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs));
}		}

		// Construct a clang-offload-bundler command to bundle code objects for
		// different GPU's into a HIP fat binary.
		void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
		StringRef OutputFileName, const InputInfoList &Inputs,
		const llvm::opt::ArgList &Args, const Tool& T) {
		// Construct clang-offload-bundler command to bundle object files for
		// for different GPU archs.
		ArgStringList BundlerArgs;
		BundlerArgs.push_back(Args.MakeArgString("-type=o"));

		// ToDo: Remove the dummy host binary entry which is required by
		// clang-offload-bundler.
		std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
		std::string BundlerInputArg = "-inputs=/dev/null";

		for (const auto &II : Inputs) {
		const auto* A = II.getAction();
		BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" +
		StringRef(A->getOffloadingArch()).str();
		BundlerInputArg = BundlerInputArg + "," + II.getFilename();
		}
		BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
		BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));

		auto BundlerOutputArg =
		Args.MakeArgString(std::string("-outputs=").append(OutputFileName));
		BundlerArgs.push_back(BundlerOutputArg);

		SmallString<128> BundlerPath(C.getDriver().Dir);
		llvm::sys::path::append(BundlerPath, "clang-offload-bundler");
		const char *Bundler = Args.MakeArgString(BundlerPath);
		C.addCommand(llvm::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
		}

// For amdgcn the inputs of the linker job are device bitcode and output is		// For amdgcn the inputs of the linker job are device bitcode and output is
// object file. It calls llvm-link, opt, llc, then lld steps.		// object file. It calls llvm-link, opt, llc, then lld steps.
void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,		void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,		const InputInfo &Output,
const InputInfoList &Inputs,		const InputInfoList &Inputs,
const ArgList &Args,		const ArgList &Args,
const char *LinkingOutput) const {		const char *LinkingOutput) const {

		if (JA.getType() == types::TY_HIP_FATBIN)
		return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);

assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn &&		assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn &&
"Unsupported target");		"Unsupported target");

std::string SubArchName = JA.getOffloadingArch();		std::string SubArchName = JA.getOffloadingArch();
assert(StringRef(SubArchName).startswith("gfx") && "Unsupported sub arch");		assert(StringRef(SubArchName).startswith("gfx") && "Unsupported sub arch");

// Prefix for temporary file name.		// Prefix for temporary file name.
std::string Prefix =		std::string Prefix =
Show All 36 Lines	void HIPToolChain::addClangTargetOptions(
if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,		if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
options::OPT_fno_cuda_flush_denormals_to_zero, false))		options::OPT_fno_cuda_flush_denormals_to_zero, false))
CC1Args.push_back("-fcuda-flush-denormals-to-zero");		CC1Args.push_back("-fcuda-flush-denormals-to-zero");

if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,		if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
options::OPT_fno_cuda_approx_transcendentals, false))		options::OPT_fno_cuda_approx_transcendentals, false))
CC1Args.push_back("-fcuda-approx-transcendentals");		CC1Args.push_back("-fcuda-approx-transcendentals");

if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,		if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))		false))
CC1Args.push_back("-fcuda-rdc");		CC1Args.push_back("-fgpu-rdc");

// Default to "hidden" visibility, as object level linking will not be		// Default to "hidden" visibility, as object level linking will not be
// supported for the foreseeable future.		// supported for the foreseeable future.
if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,		if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
options::OPT_fvisibility_ms_compat))		options::OPT_fvisibility_ms_compat))
CC1Args.append({"-fvisibility", "hidden"});		CC1Args.append({"-fvisibility", "hidden"});
}		}

▲ Show 20 Lines • Show All 99 Lines • Show Last 20 Lines

lib/Frontend/CompilerInvocation.cpp

Show First 20 Lines • Show All 2,214 Lines • ▼ Show 20 Lines	if (Args.hasArg(OPT_fcuda_allow_variadic_functions))
Opts.CUDAAllowVariadicFunctions = 1;		Opts.CUDAAllowVariadicFunctions = 1;

if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))		if (Args.hasArg(OPT_fno_cuda_host_device_constexpr))
Opts.CUDAHostDeviceConstexpr = 0;		Opts.CUDAHostDeviceConstexpr = 0;

if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals))		if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals))
Opts.CUDADeviceApproxTranscendentals = 1;		Opts.CUDADeviceApproxTranscendentals = 1;

Opts.CUDARelocatableDeviceCode = Args.hasArg(OPT_fcuda_rdc);		Opts.GPURelocatableDeviceCode = Args.hasArg(OPT_fgpu_rdc);

if (Opts.ObjC1) {		if (Opts.ObjC1) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {		if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
StringRef value = arg->getValue();		StringRef value = arg->getValue();
if (Opts.ObjCRuntime.tryParse(value))		if (Opts.ObjCRuntime.tryParse(value))
Diags.Report(diag::err_drv_unknown_objc_runtime) << value;		Diags.Report(diag::err_drv_unknown_objc_runtime) << value;
}		}

▲ Show 20 Lines • Show All 1,053 Lines • Show Last 20 Lines

lib/Sema/SemaDeclAttr.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 4,137 Lines • ▼ Show 20 Lines
	}			}

	static void handleSharedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {			static void handleSharedAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
	if (checkAttrMutualExclusion<CUDAConstantAttr>(S, D, AL))			if (checkAttrMutualExclusion<CUDAConstantAttr>(S, D, AL))
	return;			return;
	const auto *VD = cast<VarDecl>(D);			const auto *VD = cast<VarDecl>(D);
	// extern __shared__ is only allowed on arrays with no length (e.g.			// extern __shared__ is only allowed on arrays with no length (e.g.
	// "int x[]").			// "int x[]").
	if (!S.getLangOpts().CUDARelocatableDeviceCode && VD->hasExternalStorage() &&			if (!S.getLangOpts().GPURelocatableDeviceCode && VD->hasExternalStorage() &&
	!isa<IncompleteArrayType>(VD->getType())) {			!isa<IncompleteArrayType>(VD->getType())) {
	S.Diag(AL.getLoc(), diag::err_cuda_extern_shared) << VD;			S.Diag(AL.getLoc(), diag::err_cuda_extern_shared) << VD;
	return;			return;
	}			}
	if (S.getLangOpts().CUDA && VD->hasLocalStorage() &&			if (S.getLangOpts().CUDA && VD->hasLocalStorage() &&
	S.CUDADiagIfHostCode(AL.getLoc(), diag::err_cuda_host_shared)			S.CUDADiagIfHostCode(AL.getLoc(), diag::err_cuda_host_shared)
	<< S.CurrentCUDATarget())			<< S.CurrentCUDATarget())
	return;			return;
	▲ Show 20 Lines • Show All 3,813 Lines • Show Last 20 Lines

test/CodeGenCUDA/device-stub.cu

// RUN: echo "GPU binary would be here" > %t		// RUN: echo "GPU binary would be here" > %t
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \		// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - \		// RUN: -fcuda-include-gpubinary %t -o - \
// RUN: \| FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,CUDA,CUDANORDC		// RUN: \| FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,CUDA,CUDANORDC
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \		// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \		// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS \
// RUN: \| FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,CUDANOGLOBALS		// RUN: \| FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,CUDANOGLOBALS
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \		// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - \		// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - \
// RUN: \| FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,RDC,CUDA,CUDARDC		// RUN: \| FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,RDC,CUDA,CUDARDC
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \		// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - \
// RUN: \| FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN		// RUN: \| FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN

// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \		// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -x hip\		// RUN: -fcuda-include-gpubinary %t -o - -x hip\
// RUN: \| FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP		// RUN: \| FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \		// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \		// RUN: -fcuda-include-gpubinary %t -o - -DNOGLOBALS -x hip \
// RUN: \| FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS		// RUN: \| FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=NOGLOBALS,HIPNOGLOBALS
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \		// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \
// RUN: -fcuda-rdc -fcuda-include-gpubinary %t -o - -x hip \		// RUN: -fgpu-rdc -fcuda-include-gpubinary %t -o - -x hip \
// RUN: \| FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP		// RUN: \| FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,NORDC,HIP,HIPEF
// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\		// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - -x hip\
// RUN: \| FileCheck -allow-deprecated-dag-overlap %s -check-prefix=NOGPUBIN		// RUN: \| FileCheck -allow-deprecated-dag-overlap %s -check-prefixes=ALL,NORDC,HIP,HIPNEF

#include "Inputs/cuda.h"		#include "Inputs/cuda.h"

#ifndef NOGLOBALS		#ifndef NOGLOBALS
// ALL-DAG: @device_var = internal global i32		// ALL-DAG: @device_var = internal global i32
__device__ int device_var;		__device__ int device_var;

// ALL-DAG: @constant_var = internal global i32		// ALL-DAG: @constant_var = internal global i32
Show All 26 Lines	void use_pointers() {
p = &ext_constant_var;		p = &ext_constant_var;
p = &ext_host_var;		p = &ext_host_var;
}		}

// Make sure that all parts of GPU code init/cleanup are there:		// Make sure that all parts of GPU code init/cleanup are there:
// * constant unnamed string with the kernel name		// * constant unnamed string with the kernel name
// ALL: private unnamed_addr constant{{.}}kernelfunc{{.}}\00"		// ALL: private unnamed_addr constant{{.}}kernelfunc{{.}}\00"
// * constant unnamed string with GPU binary		// * constant unnamed string with GPU binary
// HIP: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
// CUDA: @[[FATBIN:.]] = private constant{{.GPU binary would be here.*}}\00",		// CUDA: @[[FATBIN:.]] = private constant{{.GPU binary would be here.*}}\00",
		// HIPEF: @[[FATBIN:.]] = private constant{{.GPU binary would be here.*}}\00",
		// HIPNEF: @[[FATBIN:__hip_fatbin]] = external constant i8, section ".hip_fatbin"
// CUDANORDC-SAME: section ".nv_fatbin", align 8		// CUDANORDC-SAME: section ".nv_fatbin", align 8
// CUDARDC-SAME: section "__nv_relfatbin", align 8		// CUDARDC-SAME: section "__nv_relfatbin", align 8
// * constant struct that wraps GPU binary		// * constant struct that wraps GPU binary
// ALL: @__[[PREFIX:cuda\|hip]]_fatbin_wrapper = internal constant		// ALL: @__[[PREFIX:cuda\|hip]]_fatbin_wrapper = internal constant
// ALL-SAME: { i32, i32, i8, i8 }		// ALL-SAME: { i32, i32, i8, i8 }
// CUDA-SAME: { i32 1180844977, i32 1,		// CUDA-SAME: { i32 1180844977, i32 1,
// HIP-SAME: { i32 1212764230, i32 1,		// HIP-SAME: { i32 1212764230, i32 1,
// CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),		// CUDA-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
// HIP-SAME: i8* @[[FATBIN]],		// HIPEF-SAME: i8* getelementptr inbounds ({{.*}}@[[FATBIN]], i64 0, i64 0),
		// HIPNEF-SAME: i8* @[[FATBIN]],
// ALL-SAME: i8* null }		// ALL-SAME: i8* null }
// CUDA-SAME: section ".nvFatBinSegment"		// CUDA-SAME: section ".nvFatBinSegment"
// HIP-SAME: section ".hipFatBinSegment"		// HIP-SAME: section ".hipFatBinSegment"
// * variable to save GPU binary handle after initialization		// * variable to save GPU binary handle after initialization
// CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null		// CUDANORDC: @__[[PREFIX]]_gpubin_handle = internal global i8** null
// HIP: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null		// HIPNEF: @__[[PREFIX]]_gpubin_handle = linkonce hidden global i8** null
// * constant unnamed string with NVModuleID		// * constant unnamed string with NVModuleID
// RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant		// RDC: [[MODULE_ID_GLOBAL:@.*]] = private constant
// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32		// CUDARDC-SAME: c"[[MODULE_ID:.+]]\00", section "__nv_module_id", align 32
// * Make sure our constructor was added to global ctor list.		// * Make sure our constructor was added to global ctor list.
// ALL: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor		// ALL: @llvm.global_ctors = appending global {{.*}}@__[[PREFIX]]_module_ctor
// * Alias to global symbol containing the NVModuleID.		// * Alias to global symbol containing the NVModuleID.
// RDC: @__fatbinwrap[[MODULE_ID]] = alias { i32, i32, i8, i8 }		// RDC: @__fatbinwrap[[MODULE_ID]] = alias { i32, i32, i8, i8 }
// RDC-SAME: { i32, i32, i8, i8 }* @__[[PREFIX]]_fatbin_wrapper		// RDC-SAME: { i32, i32, i8, i8 }* @__[[PREFIX]]_fatbin_wrapper
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines
// HIP-NEXT: store i8 null, i8* @__hip_gpubin_handle		// HIP-NEXT: store i8 null, i8* @__hip_gpubin_handle
// HIP-NEXT: br label %exit		// HIP-NEXT: br label %exit
// HIP: exit:		// HIP: exit:

// There should be no __[[PREFIX]]_register_globals if we have no		// There should be no __[[PREFIX]]_register_globals if we have no
// device-side globals, but we still need to register GPU binary.		// device-side globals, but we still need to register GPU binary.
// Skip GPU binary string first.		// Skip GPU binary string first.
// CUDANOGLOBALS: @{{.}} = private constant{{.}}		// CUDANOGLOBALS: @{{.}} = private constant{{.}}
// HIPNOGLOBALS: @{{.}} = external constant{{.}}		// HIPNOGLOBALS: @{{.}} = internal constant{{.}}
// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals		// NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
// NOGLOBALS: define internal void @__[[PREFIX:cuda\|hip]]_module_ctor		// NOGLOBALS: define internal void @__[[PREFIX:cuda\|hip]]_module_ctor
// NOGLOBALS: call{{.}}[[PREFIX]]RegisterFatBinary{{.}}__[[PREFIX]]_fatbin_wrapper		// NOGLOBALS: call{{.}}[[PREFIX]]RegisterFatBinary{{.}}__[[PREFIX]]_fatbin_wrapper
// NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals		// NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor		// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary		// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary

// There should be no constructors/destructors if we have no GPU binary.		// There should be no constructors/destructors if we have no GPU binary.
// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals		// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals
// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_ctor		// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_ctor
// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_dtor		// NOGPUBIN-NOT: define internal void @__[[PREFIX]]_module_dtor

test/Driver/cuda-external-tools.cu

	Show All 13 Lines
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
	// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu -O3 -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
	// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu -O4 -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
	// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu -Ofast -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT3 %s
	// Generating relocatable device code			// Generating relocatable device code
	// RUN: %clang -### -target x86_64-linux-gnu -fcuda-rdc -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s

	// With debugging enabled, ptxas should be run with with no ptxas optimizations.			// With debugging enabled, ptxas should be run with with no ptxas optimizations.
	// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -g -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug -O2 -g -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,DBG %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,DBG %s

	// --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug.			// --no-cuda-noopt-device-debug overrides --cuda-noopt-device-debug.
	// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug \			// RUN: %clang -### -target x86_64-linux-gnu --cuda-noopt-device-debug \
	Show All 10 Lines
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s
	// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu -Oz -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT2 %s

	// Regular compile targeting sm_35.			// Regular compile targeting sm_35.
	// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
	// Separate compilation targeting sm_35.			// Separate compilation targeting sm_35.
	// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fcuda-rdc -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s

	// 32-bit compile.			// 32-bit compile.
	// RUN: %clang -### -target i386-linux-gnu -c %s 2>&1 \			// RUN: %clang -### -target i386-linux-gnu -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s
	// 32-bit compile when generating relocatable device code.			// 32-bit compile when generating relocatable device code.
	// RUN: %clang -### -target i386-linux-gnu -fcuda-rdc -c %s 2>&1 \			// RUN: %clang -### -target i386-linux-gnu -fgpu-rdc -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s

	// Compile with -fintegrated-as. This should still cause us to invoke ptxas.			// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
	// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
	// Check that we still pass -c when generating relocatable device code.			// Check that we still pass -c when generating relocatable device code.
	// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fcuda-rdc -c %s 2>&1 \			// RUN: %clang -### -target x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s

	// Check -Xcuda-ptxas and -Xcuda-fatbinary			// Check -Xcuda-ptxas and -Xcuda-fatbinary
	// RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \			// RUN: %clang -### -target x86_64-linux-gnu -c -Xcuda-ptxas -foo1 \
	// RUN: -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \			// RUN: -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,SM20,PTXAS-EXTRA,FATBINARY-EXTRA %s			// RUN: \| FileCheck -check-prefixes=CHECK,SM20,PTXAS-EXTRA,FATBINARY-EXTRA %s

	// MacOS spot-checks			// MacOS spot-checks
	// RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \			// RUN: %clang -### -target x86_64-apple-macosx -O0 -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,OPT0 %s
	// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \			// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
	// RUN: %clang -### -target i386-apple-macosx -c %s 2>&1 \			// RUN: %clang -### -target i386-apple-macosx -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH32,SM20 %s

	// Check relocatable device code generation on MacOS.			// Check relocatable device code generation on MacOS.
	// RUN: %clang -### -target x86_64-apple-macosx -O0 -fcuda-rdc -c %s 2>&1 \			// RUN: %clang -### -target x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM20,RDC %s
	// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fcuda-rdc -c %s 2>&1 \			// RUN: %clang -### -target x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
	// RUN: %clang -### -target i386-apple-macosx -fcuda-rdc -c %s 2>&1 \			// RUN: %clang -### -target i386-apple-macosx -fgpu-rdc -c %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s			// RUN: \| FileCheck -check-prefixes=CHECK,ARCH32,SM20,RDC %s

	// Check that CLANG forwards the -v flag to PTXAS.			// Check that CLANG forwards the -v flag to PTXAS.
	// RUN: %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \			// RUN: %clang -### -save-temps -no-canonical-prefixes -v %s 2>&1 \
	// RUN: \| FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s			// RUN: \| FileCheck -check-prefix=CHK-PTXAS-VERBOSE %s

	// Match clang job that produces PTX assembly.			// Match clang job that produces PTX assembly.
	// CHECK: "-cc1"			// CHECK: "-cc1"
	// ARCH64-SAME: "-triple" "nvptx64-nvidia-cuda"			// ARCH64-SAME: "-triple" "nvptx64-nvidia-cuda"
	// ARCH32-SAME: "-triple" "nvptx-nvidia-cuda"			// ARCH32-SAME: "-triple" "nvptx-nvidia-cuda"
				// RDC-SAME: "-fgpu-rdc"
				// CHECK-NOT: "-fgpu-rdc"
	// SM20-SAME: "-target-cpu" "sm_20"			// SM20-SAME: "-target-cpu" "sm_20"
	// SM35-SAME: "-target-cpu" "sm_35"			// SM35-SAME: "-target-cpu" "sm_35"
	// SM20-SAME: "-o" "[[PTXFILE:[^"]*]]"			// SM20-SAME: "-o" "[[PTXFILE:[^"]*]]"
	// SM35-SAME: "-o" "[[PTXFILE:[^"]*]]"			// SM35-SAME: "-o" "[[PTXFILE:[^"]*]]"
	// RDC-SAME: "-fcuda-rdc"
	// CHECK-NOT: "-fcuda-rdc"

	// Match the call to ptxas (which assembles PTX to SASS).			// Match the call to ptxas (which assembles PTX to SASS).
	// CHECK: ptxas			// CHECK: ptxas
	// ARCH64-SAME: "-m64"			// ARCH64-SAME: "-m64"
	// ARCH32-SAME: "-m32"			// ARCH32-SAME: "-m32"
	// OPT0-SAME: "-O0"			// OPT0-SAME: "-O0"
	// OPT0-NOT: "-g"			// OPT0-NOT: "-g"
	// OPT1-SAME: "-O1"			// OPT1-SAME: "-O1"
	Show All 27 Lines
	// FATBINARY-EXTRA-SAME: "-bar1"			// FATBINARY-EXTRA-SAME: "-bar1"
	// FATBINARY-EXTRA-SAME: "-bar2"			// FATBINARY-EXTRA-SAME: "-bar2"

	// Match the clang job for host compilation.			// Match the clang job for host compilation.
	// CHECK: "-cc1"			// CHECK: "-cc1"
	// ARCH64-SAME: "-triple" "x86_64-			// ARCH64-SAME: "-triple" "x86_64-
	// ARCH32-SAME: "-triple" "i386-			// ARCH32-SAME: "-triple" "i386-
	// CHECK-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"			// CHECK-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
	// RDC-SAME: "-fcuda-rdc"			// RDC-SAME: "-fgpu-rdc"
	// CHECK-NOT: "-fcuda-rdc"			// CHECK-NOT: "-fgpu-rdc"

	// CHK-PTXAS-VERBOSE: ptxas{{.*}}" "-v"			// CHK-PTXAS-VERBOSE: ptxas{{.*}}" "-v"

test/Driver/cuda-phases.cu

	// Tests the phases generated for a CUDA offloading target for different			// Tests the phases generated for a CUDA offloading target for different
	// combinations of:			// combinations of:
	// - Number of gpu architectures;			// - Number of gpu architectures;
	// - Host/device-only compilation;			// - Host/device-only compilation;
	// - User-requested final phase - binary or assembly.			// - User-requested final phase - binary or assembly.

	// REQUIRES: clang-driver			// REQUIRES: clang-driver
	// REQUIRES: powerpc-registered-target			// REQUIRES: powerpc-registered-target
	// REQUIRES: nvptx-registered-target			// REQUIRES: nvptx-registered-target
	// REQUIRES: amdgpu-registered-target			// REQUIRES: amdgpu-registered-target
	//			//
	// Test single gpu architecture with complete compilation.			// Test single gpu architecture with complete compilation.
	//			//
				// Test CUDA NVPTX phases.
	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \			// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
	// RUN: --cuda-gpu-arch=sm_30 %s 2>&1 \			// RUN: --cuda-gpu-arch=sm_30 %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=BIN,BIN_NV %s			// RUN: \| FileCheck -check-prefixes=BIN,BIN_NV %s
				//
				// Test HIP AMDGPU -fgpu-rdc phases.
				// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
				// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
				// RUN: \| FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_RDC %s
				//
				// Test HIP AMDGPU -fno-gpu-rdc phases (default).
	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \			// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
	// RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \			// RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=BIN,BIN_AMD %s			// RUN: \| FileCheck -check-prefixes=BIN,BIN_AMD,BIN_AMD_NRDC %s
				//
	// BIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])			// BIN_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
	// BIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])			// BIN_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
	// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])			// BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
	// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])			// BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
	// BIN_NV-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]])			// BIN_NV-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]])
	// BIN_AMD-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:gfx803]])			// BIN_AMD-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:gfx803]])
	// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])			// BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
	// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])			// BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
	// BIN_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])			// BIN_NV-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
	// BIN_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])			// BIN_NV-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
	// BIN_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object			// BIN_NV-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object
	// BIN_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler			// BIN_NV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler
	// BIN_NV-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]])			// BIN_NV-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]])
	// BIN_NV-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir			// BIN_NV-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir
	// BIN_NV-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])			// BIN_NV-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
	// BIN_AMD-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])			// BIN_AMD_RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
				// BIN_AMD_NRDC-DAG: [[P6:[0-9]+]]: linker, {[[P5]]}, image, (device-hip, [[ARCH]])
				// BIN_AMD_NRDC-DAG: [[P7:[0-9]+]]: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, image
				// BIN_AMD_NRDC-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, hip-fatbin, (device-hip)
				// BIN_AMD_NRDC-DAG: [[P11:[0-9]+]]: offload, "host-hip (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-hip (amdgcn-amd-amdhsa)" {[[P8]]}, ir
				// BIN_AMD_NRDC-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
	// BIN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])			// BIN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
	// BIN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])			// BIN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
	// BIN_AMD-DAG: [[P15:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH]])			// BIN_AMD_RDC-DAG: [[P15:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH]])
	// BIN_AMD-DAG: [[P16:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P14]]},			// BIN_AMD_RDC-DAG: [[P16:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P14]]},
	// BIN_AMD-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P15]]}, object			// BIN_AMD_RDC-DAG-SAME: "device-[[T]] ([[TRIPLE:amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P15]]}, object

	//			//
	// Test single gpu architecture up to the assemble phase.			// Test single gpu architecture up to the assemble phase.
	//			//
	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \			// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
	// RUN: --cuda-gpu-arch=sm_30 %s -S 2>&1 \			// RUN: --cuda-gpu-arch=sm_30 %s -S 2>&1 \
	// RUN: \| FileCheck -check-prefixes=ASM,ASM_NV %s			// RUN: \| FileCheck -check-prefixes=ASM,ASM_NV %s
	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \			// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
	// RUN: --cuda-gpu-arch=gfx803 %s -S 2>&1 \			// RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s -S 2>&1 \
				// RUN: \| FileCheck -check-prefixes=ASM,ASM_AMD %s
				// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
				// RUN: --cuda-gpu-arch=gfx803 -fcuda-rdc %s -S 2>&1 \
	// RUN: \| FileCheck -check-prefixes=ASM,ASM_AMD %s			// RUN: \| FileCheck -check-prefixes=ASM,ASM_AMD %s
	// ASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])			// ASM_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
	// ASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])			// ASM_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
	// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])			// ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
	// ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])			// ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
	// ASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])			// ASM_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
	// ASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda\|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler			// ASM_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda\|amdgcn-amd-amdhsa]]:[[ARCH]])" {[[P3]]}, assembler
	// ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]])			// ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]])
	// ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]])			// ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]])
	// ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]])			// ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]])
	// ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]])			// ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]])

	//			//
	// Test two gpu architectures with complete compilation.			// Test two gpu architectures with complete compilation.
	//			//
	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \			// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
	// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \			// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=BIN2,BIN2_NV %s			// RUN: \| FileCheck -check-prefixes=BIN2,BIN2_NV %s
	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \			// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
	// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \			// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s 2>&1 \
	// RUN: \| FileCheck -check-prefixes=BIN2,BIN2_AMD %s			// RUN: \| FileCheck -check-prefixes=BIN2,BIN2_AMD %s
	// BIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])			// BIN2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
	// BIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])			// BIN2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (host-[[T]])
	// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])			// BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
	// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])			// BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
	// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30\|gfx803]])			// BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30\|gfx803]])
	// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])			// BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
	// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])			// BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
	Show All 22 Lines

	//			//
	// Test two gpu architecturess up to the assemble phase.			// Test two gpu architecturess up to the assemble phase.
	//			//
	// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \			// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
	// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \			// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
	// RUN: \| FileCheck -check-prefixes=ASM2,ASM2_NV %s			// RUN: \| FileCheck -check-prefixes=ASM2,ASM2_NV %s
	// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \			// RUN: %clang -x hip -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
	// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -S 2>&1 \			// RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %s -S 2>&1 \
	// RUN: \| FileCheck -check-prefixes=ASM2,ASM2_AMD %s			// RUN: \| FileCheck -check-prefixes=ASM2,ASM2_AMD %s
	// ASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]])			// ASM2_NV-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]])
	// ASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])			// ASM2_AMD-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
	// ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])			// ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
	// ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]])			// ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]])
	// ASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]])			// ASM2_NV-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]])
	// ASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda\|amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P3]]}, assembler			// ASM2_NV-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda\|amdgcn-amd-amdhsa]]:[[ARCH1]])" {[[P3]]}, assembler
	// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35\|gfx900]])			// ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35\|gfx900]])
	▲ Show 20 Lines • Show All 157 Lines • Show Last 20 Lines

test/Driver/hip-output-file-name.hip

	// REQUIRES: clang-driver			// REQUIRES: clang-driver
	// REQUIRES: x86-registered-target			// REQUIRES: x86-registered-target
	// REQUIRES: amdgpu-registered-target			// REQUIRES: amdgpu-registered-target

	// RUN: %clang -### -c -target x86_64-linux-gnu \			// RUN: %clang -### -c -target x86_64-linux-gnu -fgpu-rdc \
	// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \			// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
	// RUN: 2>&1 \| FileCheck %s			// RUN: 2>&1 \| FileCheck %s

	// CHECK: {{.}}clang-offload-bundler{{.}}"-outputs=hip-output-file-name.o"			// CHECK: {{.}}clang-offload-bundler{{.}}"-outputs=hip-output-file-name.o"

test/Driver/hip-toolchain-no-rdc.hip

				// REQUIRES: clang-driver
				// REQUIRES: x86-registered-target
				// REQUIRES: amdgpu-registered-target

				// RUN: %clang -### -target x86_64-linux-gnu -fno-gpu-rdc \
				// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
				// RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \
				// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
				// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \
				// RUN: -fuse-ld=lld \
				// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
				// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
				// RUN: 2>&1 \| FileCheck -check-prefixes=CHECK %s

				//
				// Compile device code in a.cu to code object for gfx803.
				//

				// CHECK: [[CLANG:".clang."]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
				// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
				// CHECK-SAME: {{.}} "-main-file-name" "a.cu" {{.}} "-target-cpu" "gfx803"
				// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden"
				// CHECK-SAME: {{.}} "-o" [[A_BC_803:".bc"]] "-x" "hip"
				// CHECK-SAME: {{.}} [[A_SRC:".a.cu"]]

				// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC_803]]
				// CHECK-SAME: "{{.}}lib1.bc" "{{.}}lib2.bc"
				// CHECK-SAME: "-o" [[LINKED_BC_DEV_A_803:".-gfx803-linked-.bc"]]

				// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_A_803]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-mcpu=gfx803"
				// CHECK-SAME: "-o" [[OPT_BC_DEV_A_803:".-gfx803-optimized.bc"]]

				// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_803]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" [[OBJ_DEV_A_803:".-gfx803-.o"]]

				// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
				// CHECK-SAME: "-o" "[[IMG_DEV_A_803:.*out]]" [[OBJ_DEV_A_803]]

				//
				// Compile device code in a.cu to code object for gfx900.
				//

				// CHECK: [[CLANG:".clang."]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
				// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
				// CHECK-SAME: {{.}} "-main-file-name" "a.cu" {{.}} "-target-cpu" "gfx900"
				// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden"
				// CHECK-SAME: {{.}} "-o" [[A_BC_900:".bc"]] "-x" "hip"
				// CHECK-SAME: {{.*}} [[A_SRC]]

				// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC_900]]
				// CHECK-SAME: "{{.}}lib1.bc" "{{.}}lib2.bc"
				// CHECK-SAME: "-o" [[LINKED_BC_DEV_A_900:".-gfx900-linked-.bc"]]

				// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_A_900]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-mcpu=gfx900"
				// CHECK-SAME: "-o" [[OPT_BC_DEV_A_900:".-gfx900-optimized.bc"]]

				// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_A_900]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-filetype=obj" "-mcpu=gfx900" "-o" [[OBJ_DEV_A_900:".-gfx900-.o"]]

				// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
				// CHECK-SAME: "-o" "[[IMG_DEV_A_900:.*out]]" [[OBJ_DEV_A_900]]

				//
				// Bundle and embed device code in host object for a.cu.
				//

				// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
				// CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
				// CHECK-SAME: "-inputs={{.}},[[IMG_DEV_A_803]],[[IMG_DEV_A_900]]" "-outputs=[[BUNDLE_A:.hipfb]]"

				// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
				// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" "-emit-obj"
				// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
				// CHECK-SAME: {{.}} "-o" [[A_OBJ_HOST:".o"]] "-x" "hip"
				// CHECK-SAME: {{.*}} [[A_SRC]]
				// CHECK-SAME: {{.*}} "-fcuda-include-gpubinary" "[[BUNDLE_A]]"

				//
				// Compile device code in b.hip to code object for gfx803.
				//

				// CHECK: [[CLANG:".clang."]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
				// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
				// CHECK-SAME: {{.}} "-main-file-name" "b.hip" {{.}} "-target-cpu" "gfx803"
				// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden"
				// CHECK-SAME: {{.}} "-o" [[B_BC_803:".bc"]] "-x" "hip"
				// CHECK-SAME: {{.}} [[B_SRC:".b.hip"]]

				// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[B_BC_803]]
				// CHECK-SAME: "{{.}}lib1.bc" "{{.}}lib2.bc"
				// CHECK-SAME: "-o" [[LINKED_BC_DEV_B_803:".-gfx803-linked-.bc"]]

				// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_B_803]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-mcpu=gfx803"
				// CHECK-SAME: "-o" [[OPT_BC_DEV_B_803:".-gfx803-optimized.bc"]]

				// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_803]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" [[OBJ_DEV_B_803:".-gfx803-.o"]]

				// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
				// CHECK-SAME: "-o" "[[IMG_DEV_B_803:.*out]]" [[OBJ_DEV_B_803]]

				//
				// Compile device code in b.hip to code object for gfx900.
				//

				// CHECK: [[CLANG:".clang."]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
				// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
				// CHECK-SAME: {{.}} "-main-file-name" "b.hip" {{.}} "-target-cpu" "gfx900"
				// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden"
				// CHECK-SAME: {{.}} "-o" [[B_BC_900:".bc"]] "-x" "hip"
				// CHECK-SAME: {{.*}} [[B_SRC]]

				// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[B_BC_900]]
				// CHECK-SAME: "{{.}}lib1.bc" "{{.}}lib2.bc"
				// CHECK-SAME: "-o" [[LINKED_BC_DEV_B_900:".-gfx900-linked-.bc"]]

				// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV_B_900]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-mcpu=gfx900"
				// CHECK-SAME: "-o" [[OPT_BC_DEV_B_900:".-gfx900-optimized.bc"]]

				// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV_B_900]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-filetype=obj" "-mcpu=gfx900" "-o" [[OBJ_DEV_B_900:".-gfx900-.o"]]

				// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
				// CHECK-SAME: "-o" "[[IMG_DEV_B_900:.*out]]" [[OBJ_DEV_B_900]]

				//
				// Bundle and embed device code in host object for b.hip.
				//

				// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
				// CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
				// CHECK-SAME: "-inputs={{.}},[[IMG_DEV_B_803]],[[IMG_DEV_B_900]]" "-outputs=[[BUNDLE_A:.hipfb]]"

				// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
				// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" "-emit-obj"
				// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
				// CHECK-SAME: {{.}} "-o" [[B_OBJ_HOST:".o"]] "-x" "hip"
				// CHECK-SAME: {{.*}} [[B_SRC]]
				// CHECK-SAME: {{.*}} "-fcuda-include-gpubinary" "[[BUNDLE_A]]"

				//
				// Link host objects.
				//

				// CHECK: [[LD:".ld."]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]]
				// CHECK-NOT: "-T" "{{.*}}.lk"

test/Driver/hip-toolchain-rdc.hip

				// REQUIRES: clang-driver
				// REQUIRES: x86-registered-target
				// REQUIRES: amdgpu-registered-target

				// RUN: %clang -### -target x86_64-linux-gnu \
				// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
				// RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \
				// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
				// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \
				// RUN: -fuse-ld=lld -fgpu-rdc \
				// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
				// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
				// RUN: 2>&1 \| FileCheck %s

				// CHECK: [[CLANG:".clang."]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
				// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
				// CHECK-SAME: {{.}} "-main-file-name" "a.cu" {{.}} "-target-cpu" "gfx803"
				// CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fvisibility" "hidden"
				// CHECK-SAME: {{.}} "-o" [[A_BC:".bc"]] "-x" "hip"
				// CHECK-SAME: {{.}} [[A_SRC:".a.cu"]]

				// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
				// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
				// CHECK-SAME: {{.}} "-main-file-name" "b.hip" {{.}} "-target-cpu" "gfx803"
				// CHECK-SAME: "-fcuda-is-device" "-fgpu-rdc" "-fvisibility" "hidden"
				// CHECK-SAME: {{.}} "-o" [[B_BC:".bc"]] "-x" "hip"
				// CHECK-SAME: {{.}} [[B_SRC:".b.hip"]]

				// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC]] [[B_BC]]
				// CHECK-SAME: "{{.}}lib1.bc" "{{.}}lib2.bc"
				// CHECK-SAME: "-o" [[LINKED_BC_DEV1:".-gfx803-linked-.bc"]]

				// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV1]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-mcpu=gfx803"
				// CHECK-SAME: "-o" [[OPT_BC_DEV1:".-gfx803-optimized.bc"]]

				// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV1]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" [[OBJ_DEV1:".-gfx803-.o"]]

				// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
				// CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[OBJ_DEV1]]

				// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
				// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
				// CHECK-SAME: {{.}} "-main-file-name" "a.cu" {{.}} "-target-cpu" "gfx900"
				// CHECK-SAME: "-fcuda-is-device" {{.}} "-o" [[A_BC:".bc"]] "-x" "hip"
				// CHECK-SAME: {{.*}} [[A_SRC]]

				// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
				// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
				// CHECK-SAME: {{.}} "-main-file-name" "b.hip" {{.}} "-target-cpu" "gfx900"
				// CHECK-SAME: "-fcuda-is-device" {{.}} "-o" [[B_BC:".bc"]] "-x" "hip"
				// CHECK-SAME: {{.*}} [[B_SRC]]

				// CHECK: [[LLVM_LINK]] [[A_BC]] [[B_BC]]
				// CHECK-SAME: "{{.}}lib1.bc" "{{.}}lib2.bc"
				// CHECK-SAME: "-o" [[LINKED_BC_DEV2:".-gfx900-linked-.bc"]]

				// CHECK: [[OPT]] [[LINKED_BC_DEV2]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-mcpu=gfx900"
				// CHECK-SAME: "-o" [[OPT_BC_DEV2:".-gfx900-optimized.bc"]]

				// CHECK: [[LLC]] [[OPT_BC_DEV2]] "-mtriple=amdgcn-amd-amdhsa"
				// CHECK-SAME: "-filetype=obj" "-mcpu=gfx900" "-o" [[OBJ_DEV2:".-gfx900-.o"]]

				// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared"
				// CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[OBJ_DEV2]]

				// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
				// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" "-emit-obj"
				// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
				// CHECK-SAME: {{.}} "-o" [[A_OBJ_HOST:".o"]] "-x" "hip"
				// CHECK-SAME: {{.*}} [[A_SRC]]

				// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
				// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" "-emit-obj"
				// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
				// CHECK-SAME: {{.}} "-o" [[B_OBJ_HOST:".o"]] "-x" "hip"
				// CHECK-SAME: {{.*}} [[B_SRC]]

				// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
				// CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
				// CHECK-SAME: "-inputs={{.}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.hipfb]]"

				// CHECK: [[LD:".ld."]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]]
				// CHECK-SAME: {{.}} "-T" "{{.}}.lk"

test/Driver/hip-toolchain.hip

	// REQUIRES: clang-driver
	// REQUIRES: x86-registered-target
	// REQUIRES: amdgpu-registered-target

	// RUN: %clang -### -target x86_64-linux-gnu \
	// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
	// RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \
	// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \
	// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \
	// RUN: -fuse-ld=lld \
	// RUN: %S/Inputs/hip_multiple_inputs/a.cu \
	// RUN: %S/Inputs/hip_multiple_inputs/b.hip \
	// RUN: 2>&1 \| FileCheck %s

	// CHECK: [[CLANG:".clang."]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
	// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
	// CHECK-SAME: {{.}} "-main-file-name" "a.cu" {{.}} "-target-cpu" "gfx803"
	// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden"
	// CHECK-SAME: {{.}} "-o" [[A_BC:".bc"]] "-x" "hip"
	// CHECK-SAME: {{.}} [[A_SRC:".a.cu"]]

	// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
	// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
	// CHECK-SAME: {{.}} "-main-file-name" "b.hip" {{.}} "-target-cpu" "gfx803"
	// CHECK-SAME: "-fcuda-is-device" "-fvisibility" "hidden"
	// CHECK-SAME: {{.}} "-o" [[B_BC:".bc"]] "-x" "hip"
	// CHECK-SAME: {{.}} [[B_SRC:".b.hip"]]

	// CHECK: [[LLVM_LINK:"*.llvm-link"]] [[A_BC]] [[B_BC]]
	// CHECK-SAME: "{{.}}lib1.bc" "{{.}}lib2.bc"
	// CHECK-SAME: "-o" [[LINKED_BC_DEV1:".-gfx803-linked-.bc"]]

	// CHECK: [[OPT:".*opt"]] [[LINKED_BC_DEV1]] "-mtriple=amdgcn-amd-amdhsa"
	// CHECK-SAME: "-mcpu=gfx803"
	// CHECK-SAME: "-o" [[OPT_BC_DEV1:".-gfx803-optimized.bc"]]

	// CHECK: [[LLC: ".*llc"]] [[OPT_BC_DEV1]] "-mtriple=amdgcn-amd-amdhsa"
	// CHECK-SAME: "-filetype=obj" "-mcpu=gfx803" "-o" [[OBJ_DEV1:".-gfx803-.o"]]

	// CHECK: [[LLD: ".*lld"]] "-flavor" "gnu" "--no-undefined" "-shared"
	// CHECK-SAME: "-o" "[[IMG_DEV1:.*out]]" [[OBJ_DEV1]]

	// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
	// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
	// CHECK-SAME: {{.}} "-main-file-name" "a.cu" {{.}} "-target-cpu" "gfx900"
	// CHECK-SAME: "-fcuda-is-device" {{.}} "-o" [[A_BC:".bc"]] "-x" "hip"
	// CHECK-SAME: {{.*}} [[A_SRC]]

	// CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa"
	// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" "-emit-llvm-bc"
	// CHECK-SAME: {{.}} "-main-file-name" "b.hip" {{.}} "-target-cpu" "gfx900"
	// CHECK-SAME: "-fcuda-is-device" {{.}} "-o" [[B_BC:".bc"]] "-x" "hip"
	// CHECK-SAME: {{.*}} [[B_SRC]]

	// CHECK: [[LLVM_LINK]] [[A_BC]] [[B_BC]]
	// CHECK-SAME: "{{.}}lib1.bc" "{{.}}lib2.bc"
	// CHECK-SAME: "-o" [[LINKED_BC_DEV2:".-gfx900-linked-.bc"]]

	// CHECK: [[OPT]] [[LINKED_BC_DEV2]] "-mtriple=amdgcn-amd-amdhsa"
	// CHECK-SAME: "-mcpu=gfx900"
	// CHECK-SAME: "-o" [[OPT_BC_DEV2:".-gfx900-optimized.bc"]]

	// CHECK: [[LLC]] [[OPT_BC_DEV2]] "-mtriple=amdgcn-amd-amdhsa"
	// CHECK-SAME: "-filetype=obj" "-mcpu=gfx900" "-o" [[OBJ_DEV2:".-gfx900-.o"]]

	// CHECK: [[LLD]] "-flavor" "gnu" "--no-undefined" "-shared"
	// CHECK-SAME: "-o" "[[IMG_DEV2:.*out]]" [[OBJ_DEV2]]

	// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
	// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" "-emit-obj"
	// CHECK-SAME: {{.*}} "-main-file-name" "a.cu"
	// CHECK-SAME: {{.}} "-o" [[A_OBJ_HOST:".o"]] "-x" "hip"
	// CHECK-SAME: {{.*}} [[A_SRC]]

	// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu"
	// CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" "-emit-obj"
	// CHECK-SAME: {{.*}} "-main-file-name" "b.hip"
	// CHECK-SAME: {{.}} "-o" [[B_OBJ_HOST:".o"]] "-x" "hip"
	// CHECK-SAME: {{.*}} [[B_SRC]]

	// CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o"
	// CHECK-SAME: "-targets={{.*}},hip-amdgcn-amd-amdhsa-gfx803,hip-amdgcn-amd-amdhsa-gfx900"
	// CHECK-SAME: "-inputs={{.}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.o]]"

	// CHECK: [[LD:".ld."]] {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]]
	// CHECK-SAME: {{.}} "-T" "{{.}}.lk"

test/SemaCUDA/extern-shared.cu

	// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -verify %s			// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -verify %s
	// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fcuda-is-device -verify %s			// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fcuda-is-device -verify %s

	// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fcuda-rdc -verify=rdc %s			// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fgpu-rdc -verify=rdc %s
	// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fcuda-is-device -fcuda-rdc -verify=rdc %s			// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fcuda-is-device -fgpu-rdc -verify=rdc %s

	// Most of these declarations are fine in separate compilation mode.			// Most of these declarations are fine in separate compilation mode.

	#include "Inputs/cuda.h"			#include "Inputs/cuda.h"

	__device__ void foo() {			__device__ void foo() {
	extern __shared__ int x; // expected-error {{__shared__ variable 'x' cannot be 'extern'}}			extern __shared__ int x; // expected-error {{__shared__ variable 'x' cannot be 'extern'}}
	extern __shared__ int arr[]; // ok			extern __shared__ int arr[]; // ok
	Show All 30 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[HIP] Support early finalization of device code for -fno-gpu-rdc
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 167980

include/clang/Basic/LangOptions.def

include/clang/Driver/Options.td

include/clang/Driver/Types.def

lib/AST/Decl.cpp

lib/CodeGen/CGCUDANV.cpp

lib/Driver/Driver.cpp

lib/Driver/ToolChains/Clang.cpp

lib/Driver/ToolChains/CommonArgs.cpp

lib/Driver/ToolChains/Cuda.cpp

lib/Driver/ToolChains/HIP.h

lib/Driver/ToolChains/HIP.cpp

lib/Frontend/CompilerInvocation.cpp

lib/Sema/SemaDeclAttr.cpp

test/CodeGenCUDA/device-stub.cu

test/Driver/cuda-external-tools.cu

test/Driver/cuda-phases.cu

test/Driver/hip-output-file-name.hip

test/Driver/hip-toolchain-no-rdc.hip

test/Driver/hip-toolchain-rdc.hip

test/Driver/hip-toolchain.hip

test/SemaCUDA/extern-shared.cu

This is an archive of the discontinued LLVM Phabricator instance.

[HIP] Support early finalization of device code for -fno-gpu-rdcClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 167980

include/clang/Basic/LangOptions.def

include/clang/Driver/Options.td

include/clang/Driver/Types.def

lib/AST/Decl.cpp

lib/CodeGen/CGCUDANV.cpp

lib/Driver/Driver.cpp

lib/Driver/ToolChains/Clang.cpp

lib/Driver/ToolChains/CommonArgs.cpp

lib/Driver/ToolChains/Cuda.cpp

lib/Driver/ToolChains/HIP.h

lib/Driver/ToolChains/HIP.cpp

lib/Frontend/CompilerInvocation.cpp

lib/Sema/SemaDeclAttr.cpp

test/CodeGenCUDA/device-stub.cu

test/Driver/cuda-external-tools.cu

test/Driver/cuda-phases.cu

test/Driver/hip-output-file-name.hip

test/Driver/hip-toolchain-no-rdc.hip

test/Driver/hip-toolchain-rdc.hip

test/Driver/hip-toolchain.hip

test/SemaCUDA/extern-shared.cu

[HIP] Support early finalization of device code for -fno-gpu-rdc
ClosedPublic