Diff 34230

include/clang/Basic/LangOptions.def

	Show First 20 Lines • Show All 160 Lines • ▼ Show 20 Lines
	LANGOPT(NativeHalfType , 1, 0, "Native half type support")			LANGOPT(NativeHalfType , 1, 0, "Native half type support")
	LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")			LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
	LANGOPT(CUDA , 1, 0, "CUDA")			LANGOPT(CUDA , 1, 0, "CUDA")
	LANGOPT(OpenMP , 1, 0, "OpenMP support")			LANGOPT(OpenMP , 1, 0, "OpenMP support")
	LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls")			LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls")
	LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device")			LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device")
	LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions")			LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions")
	LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)")			LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)")
				LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes")

	LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")			LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
	LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")			LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")
	LANGOPT(ConceptsTS , 1, 0, "enable C++ Extensions for Concepts")			LANGOPT(ConceptsTS , 1, 0, "enable C++ Extensions for Concepts")
	BENIGN_LANGOPT(ElideConstructors , 1, 1, "C++ copy constructor elision")			BENIGN_LANGOPT(ElideConstructors , 1, 1, "C++ copy constructor elision")
	BENIGN_LANGOPT(DumpRecordLayouts , 1, 0, "dumping the layout of IRgen'd records")			BENIGN_LANGOPT(DumpRecordLayouts , 1, 0, "dumping the layout of IRgen'd records")
	BENIGN_LANGOPT(DumpRecordLayoutsSimple , 1, 0, "dumping the layout of IRgen'd records in a simple form")			BENIGN_LANGOPT(DumpRecordLayoutsSimple , 1, 0, "dumping the layout of IRgen'd records in a simple form")
	BENIGN_LANGOPT(DumpVTableLayouts , 1, 0, "dumping the layouts of emitted vtables")			BENIGN_LANGOPT(DumpVTableLayouts , 1, 0, "dumping the layouts of emitted vtables")
	▲ Show 20 Lines • Show All 64 Lines • Show Last 20 Lines

include/clang/Driver/CC1Options.td

	Show First 20 Lines • Show All 653 Lines • ▼ Show 20 Lines
	def fcuda_allow_host_calls_from_host_device : Flag<["-"],			def fcuda_allow_host_calls_from_host_device : Flag<["-"],
	"fcuda-allow-host-calls-from-host-device">,			"fcuda-allow-host-calls-from-host-device">,
	HelpText<"Allow host device functions to call host functions">;			HelpText<"Allow host device functions to call host functions">;
	def fcuda_disable_target_call_checks : Flag<["-"],			def fcuda_disable_target_call_checks : Flag<["-"],
	"fcuda-disable-target-call-checks">,			"fcuda-disable-target-call-checks">,
	HelpText<"Disable all cross-target (host, device, etc.) call checks in CUDA">;			HelpText<"Disable all cross-target (host, device, etc.) call checks in CUDA">;
	def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">,			def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">,
	HelpText<"Incorporate CUDA device-side binary into host object file.">;			HelpText<"Incorporate CUDA device-side binary into host object file.">;
				def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">,
				HelpText<"Enable function overloads based on CUDA target attributes.">;

	} // let Flags = [CC1Option]			} // let Flags = [CC1Option]


	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// cc1as-only Options			// cc1as-only Options
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	Show All 23 Lines

include/clang/Sema/Sema.h

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,594 Lines • ▼ Show 20 Lines	enum CUDAFunctionTarget {
CFT_Global,		CFT_Global,
CFT_Host,		CFT_Host,
CFT_HostDevice,		CFT_HostDevice,
CFT_InvalidTarget		CFT_InvalidTarget
};		};

CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);		CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);

		enum CUDAFunctionPreference {
		CFP_Never, // Invalid caller/callee combination.
		CFP_LastResort, // Lowest priority. Only in effect if
		elibenUnsubmitted Done Reply Inline Actions The comment should explain exactly how it depends on the option eliben: The comment should explain exactly how it depends on the option
		traAuthorUnsubmitted Not Done Reply Inline Actions Rephrased. tra: Rephrased.
		// LangOpts.CUDADisableTargetCallChecks is true.
		CFP_Fallback, // Low priority caller/callee combination
		CFP_Best, // Preferred caller/callee combination
		};

		/// Identifies relative preference of a given Caller/Callee
		elibenUnsubmitted Done Reply Inline Actions Document this function [I realize the other CUDA functions are not documented here :-(, but hey this is new code so it should follow the rules] eliben: Document this function [I realize the other CUDA functions are not documented here :-(, but…
		/// combination, based on their host/device attributes.
		/// \param Caller function which needs address of \p Callee.
		/// nullptr in case of global context.
		/// \param Callee target function
		///
		/// \returns preference value for particular Caller/Callee combination.
		CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller,
		const FunctionDecl *Callee);

bool CheckCUDATarget(const FunctionDecl Caller, const FunctionDecl Callee);		bool CheckCUDATarget(const FunctionDecl Caller, const FunctionDecl Callee);

		/// Finds a function in \p Matches with highest calling priority
		/// from \p Caller context and erases all functions with lower
		/// calling priority.
		void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
		SmallVectorImpl<FunctionDecl *> &Matches);
		void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
		SmallVectorImpl<DeclAccessPair> &Matches);
		void EraseUnwantedCUDAMatches(
		const FunctionDecl *Caller,
		SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches);

/// Given a implicit special member, infer its CUDA target from the		/// Given a implicit special member, infer its CUDA target from the
/// calls it needs to make to underlying base/field special members.		/// calls it needs to make to underlying base/field special members.
/// \param ClassDecl the class for which the member is being created.		/// \param ClassDecl the class for which the member is being created.
/// \param CSM the kind of special member.		/// \param CSM the kind of special member.
/// \param MemberDecl the special member itself.		/// \param MemberDecl the special member itself.
/// \param ConstRHS true if this is a copy operation with a const object on		/// \param ConstRHS true if this is a copy operation with a const object on
/// its RHS.		/// its RHS.
/// \param Diagnose true if this call should emit diagnostics.		/// \param Diagnose true if this call should emit diagnostics.
▲ Show 20 Lines • Show All 487 Lines • Show Last 20 Lines

lib/AST/ItaniumMangle.cpp

	Show First 20 Lines • Show All 473 Lines • ▼ Show 20 Lines
	void CXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) {			void CXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) {
	// <encoding> ::= <function name> <bare-function-type>			// <encoding> ::= <function name> <bare-function-type>
	mangleName(FD);			mangleName(FD);

	// Don't mangle in the type if this isn't a decl we should typically mangle.			// Don't mangle in the type if this isn't a decl we should typically mangle.
	if (!Context.shouldMangleDeclName(FD))			if (!Context.shouldMangleDeclName(FD))
	return;			return;

				// __host__ __device__ functions co-exist with both __host__ and
				// __device__ functions, so they need a different mangled name.
				// Attributes are supposed to appear in alphabetic order, so we need
				// to put 'enable_if' in-between 'device' and 'host'.
				if (FD->hasAttr<CUDADeviceAttr>() && FD->hasAttr<CUDAHostAttr>())
				Out << "Ua6device";

	if (FD->hasAttr<EnableIfAttr>()) {			if (FD->hasAttr<EnableIfAttr>()) {
	FunctionTypeDepthState Saved = FunctionTypeDepth.push();			FunctionTypeDepthState Saved = FunctionTypeDepth.push();
	Out << "Ua9enable_ifI";			Out << "Ua9enable_ifI";
	// FIXME: specific_attr_iterator iterates in reverse order. Fix that and use			// FIXME: specific_attr_iterator iterates in reverse order. Fix that and use
	// it here.			// it here.
	for (AttrVec::const_reverse_iterator I = FD->getAttrs().rbegin(),			for (AttrVec::const_reverse_iterator I = FD->getAttrs().rbegin(),
	E = FD->getAttrs().rend();			E = FD->getAttrs().rend();
	I != E; ++I) {			I != E; ++I) {
	EnableIfAttr EIA = dyn_cast<EnableIfAttr>(I);			EnableIfAttr EIA = dyn_cast<EnableIfAttr>(I);
	if (!EIA)			if (!EIA)
	continue;			continue;
	Out << 'X';			Out << 'X';
	mangleExpression(EIA->getCond());			mangleExpression(EIA->getCond());
	Out << 'E';			Out << 'E';
	}			}
	Out << 'E';			Out << 'E';
	FunctionTypeDepth.pop(Saved);			FunctionTypeDepth.pop(Saved);
	}			}

				if (FD->hasAttr<CUDADeviceAttr>() && FD->hasAttr<CUDAHostAttr>())
				Out << "Ua4host";

	// Whether the mangling of a function type includes the return type depends on			// Whether the mangling of a function type includes the return type depends on
				elibenUnsubmitted Not Done Reply Inline Actions Why device before host? eliben: Why device before host?
				traAuthorUnsubmitted Not Done Reply Inline Actions Name mangling chapter in Itanium CXX ABI says: In cases where multiple order-insensitive qualifiers are present, they should be ordered [...] with the 'U' qualifiers in alphabetical order by the vendor name So I've ordered attributes alphabetically. tra: [[ https://mentorembedded.github.io/cxx-abi/abi.html#mangling-type \| Name mangling chapter ]]…
				rsmithUnsubmitted Done Reply Inline Actions If we have `enable_if` on a `__host__` `__device__` function, I think we should mangle as `Ua6deviceUa9enable_ifI...EUa4host`. (The relative order of `enable_if` attributes matters, but the order of them relative to `host` and `device` does not, so retaining alphabetical order seems best.) rsmith: If we have `enable_if` on a `__host__` `__device__` function, I think we should mangle as…
	// the context and the nature of the function. The rules for deciding whether			// the context and the nature of the function. The rules for deciding whether
	// the return type is included are:			// the return type is included are:
	//			//
	// 1. Template functions (names or types) have return types encoded, with			// 1. Template functions (names or types) have return types encoded, with
	// the exceptions listed below.			// the exceptions listed below.
	// 2. Function types not appearing as part of a function name mangling,			// 2. Function types not appearing as part of a function name mangling,
	// e.g. parameters, pointer types, etc., have return type encoded, with the			// e.g. parameters, pointer types, etc., have return type encoded, with the
	// exceptions listed below.			// exceptions listed below.
	▲ Show 20 Lines • Show All 3,606 Lines • ▼ Show 20 Lines
	void ItaniumMangleContextImpl::mangleStringLiteral(const StringLiteral *, raw_ostream &) {			void ItaniumMangleContextImpl::mangleStringLiteral(const StringLiteral *, raw_ostream &) {
	llvm_unreachable("Can't mangle string literals");			llvm_unreachable("Can't mangle string literals");
	}			}

	ItaniumMangleContext *			ItaniumMangleContext *
	ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) {			ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) {
	return new ItaniumMangleContextImpl(Context, Diags);			return new ItaniumMangleContextImpl(Context, Diags);
	}			}

lib/Frontend/CompilerInvocation.cpp

Show First 20 Lines • Show All 1,406 Lines • ▼ Show 20 Lines	if (Args.hasArg(OPT_fcuda_is_device))
Opts.CUDAIsDevice = 1;		Opts.CUDAIsDevice = 1;

if (Args.hasArg(OPT_fcuda_allow_host_calls_from_host_device))		if (Args.hasArg(OPT_fcuda_allow_host_calls_from_host_device))
Opts.CUDAAllowHostCallsFromHostDevice = 1;		Opts.CUDAAllowHostCallsFromHostDevice = 1;

if (Args.hasArg(OPT_fcuda_disable_target_call_checks))		if (Args.hasArg(OPT_fcuda_disable_target_call_checks))
Opts.CUDADisableTargetCallChecks = 1;		Opts.CUDADisableTargetCallChecks = 1;

		if (Args.hasArg(OPT_fcuda_target_overloads))
		Opts.CUDATargetOverloads = 1;

if (Opts.ObjC1) {		if (Opts.ObjC1) {
if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {		if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
StringRef value = arg->getValue();		StringRef value = arg->getValue();
if (Opts.ObjCRuntime.tryParse(value))		if (Opts.ObjCRuntime.tryParse(value))
Diags.Report(diag::err_drv_unknown_objc_runtime) << value;		Diags.Report(diag::err_drv_unknown_objc_runtime) << value;
}		}

if (Args.hasArg(OPT_fobjc_gc_only))		if (Args.hasArg(OPT_fobjc_gc_only))
▲ Show 20 Lines • Show All 726 Lines • Show Last 20 Lines

lib/Sema/SemaCUDA.cpp

Show First 20 Lines • Show All 54 Lines • ▼ Show 20 Lines	if (D->hasAttr<CUDADeviceAttr>()) {
// Some implicit declarations (like intrinsic functions) are not marked.		// Some implicit declarations (like intrinsic functions) are not marked.
// Set the most lenient target on them for maximal flexibility.		// Set the most lenient target on them for maximal flexibility.
return CFT_HostDevice;		return CFT_HostDevice;
}		}

return CFT_Host;		return CFT_Host;
}		}

		// * CUDA Call preference table
		//
		// F - from,
		// T - to
		// Ph - preference in host mode
		// Pd - preference in device mode
		// H - handled in (x)
		// Preferences: b-best, f-fallback, l-last resort, n-never.
		//
		// \| F \| T \| Ph \| Pd \| H \|
		// \|----+----+----+----+-----+
		// \| d \| d \| b \| b \| (b) \|
		// \| d \| g \| n \| n \| (a) \|
		// \| d \| h \| l \| l \| (e) \|
		// \| d \| hd \| f \| f \| (c) \|
		// \| g \| d \| b \| b \| (b) \|
		// \| g \| g \| n \| n \| (a) \|
		// \| g \| h \| l \| l \| (e) \|
		// \| g \| hd \| f \| f \| (c) \|
		// \| h \| d \| l \| l \| (e) \|
		// \| h \| g \| b \| b \| (b) \|
		// \| h \| h \| b \| b \| (b) \|
		// \| h \| hd \| f \| f \| (c) \|
		// \| hd \| d \| l \| f \| (d) \|
		// \| hd \| g \| f \| n \|(d/a)\|
		// \| hd \| h \| f \| l \| (d) \|
		// \| hd \| hd \| b \| b \| (b) \|

		Sema::CUDAFunctionPreference
		Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
		const FunctionDecl *Callee) {
		assert(getLangOpts().CUDATargetOverloads &&
		"Should not be called w/o enabled target overloads.");

		assert(Callee && "Callee must be valid.");
		CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
		elibenUnsubmitted Not Done Reply Inline Actions Why allow Caller to be nullptr but not Callee? Also, != nullptr? eliben: Why allow Caller to be nullptr but not Callee? Also, != nullptr?
		traAuthorUnsubmitted Not Done Reply Inline Actions Caller may be null in global context (e.g. we're assigning function address to a global variable). Callee can't be. I'll add an assert(). Also, != nullptr? I'm not quite sure what you mean here. Are you suggesting replacing "Caller ?" with "Caller != nullptr ?" tra: Caller may be null in global context (e.g. we're assigning function address to a global…
		elibenUnsubmitted Done Reply Inline Actions Yes, I believe this is the accepted style when comparing pointers for null-ness. eliben: Yes, I believe this is the accepted style when comparing pointers for null-ness.
		CUDAFunctionTarget CallerTarget =
		(Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;

		// If one of the targets is invalid, the check always fails, no matter what
		// the other target is.
		if (CallerTarget == CFT_InvalidTarget \|\| CalleeTarget == CFT_InvalidTarget)
		return CFP_Never;

		elibenUnsubmitted Done Reply Inline Actions Not just global from global. global from device too, right? As for global from HD, the CUDA guide forbids it eliben: Not just global from global. global from device too, right? As for global from HD, the CUDA…
		traAuthorUnsubmitted Not Done Reply Inline Actions I'll update the comment. As for HD->G, nvcc happily compiles following code: __global__ void kernel() {} __host__ __device__ void foo() { #if !defined(__CUDA_ARCH__) kernel<<<0,0>>>(); #endif } Nvcc does produce an error for HD->G call during device compilation (the error actually complains about D->G or G->G calling). This patch matches nvcc behavior. tra: I'll update the comment. As for HD->G, nvcc happily compiles following code: ``` __global__…
		// (a) Can't call global from some contexts until we support CUDA's
		// dynamic parallelism.
		if (CalleeTarget == CFT_Global &&
		(CallerTarget == CFT_Global \|\| CallerTarget == CFT_Device \|\|
		(CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
		return CFP_Never;

		// (b) Best case scenarios
		if (CalleeTarget == CallerTarget \|\|
		(CallerTarget == CFT_Host && CalleeTarget == CFT_Global) \|\|
		(CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
		return CFP_Best;

		// (c) Calling HostDevice is OK as a fallback that works for everyone.
		if (CalleeTarget == CFT_HostDevice)
		return CFP_Fallback;

		// Figure out what should be returned 'last resort' cases. Normally
		// those would not be allowed, but we'll consider them if
		// CUDADisableTargetCallChecks is true.
		CUDAFunctionPreference QuestionableResult =
		getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;

		// (d) HostDevice behavior depends on compilation mode.
		if (CallerTarget == CFT_HostDevice) {
		// Calling a function that matches compilation mode is OK.
		// Calling a function from the other side is frowned upon.
		if (getLangOpts().CUDAIsDevice)
		return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
		else
		return (CalleeTarget == CFT_Host \|\| CalleeTarget == CFT_Global)
		? CFP_Fallback
		: QuestionableResult;
		}

		// (e) Calling across device/host boundary is not something you should do.
		if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) \|\|
		(CallerTarget == CFT_Device && CalleeTarget == CFT_Host) \|\|
		(CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
		return QuestionableResult;

		llvm_unreachable("All cases should've been handled by now.");
		}

bool Sema::CheckCUDATarget(const FunctionDecl *Caller,		bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
const FunctionDecl *Callee) {		const FunctionDecl *Callee) {
		// With target overloads enabled, we only disallow calling
		// combinations with CFP_Never.
		if (getLangOpts().CUDATargetOverloads)
		return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;

// The CUDADisableTargetCallChecks short-circuits this check: we assume all		// The CUDADisableTargetCallChecks short-circuits this check: we assume all
// cross-target calls are valid.		// cross-target calls are valid.
if (getLangOpts().CUDADisableTargetCallChecks)		if (getLangOpts().CUDADisableTargetCallChecks)
return false;		return false;

CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),		CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller),
CalleeTarget = IdentifyCUDATarget(Callee);		CalleeTarget = IdentifyCUDATarget(Callee);

Show All 39 Lines	if (InDeviceMode && CalleeTarget != CFT_Device) {

return true;		return true;
}		}
}		}

return false;		return false;
}		}

		template <typename T, typename FetchDeclFn>
		static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
		llvm::SmallVectorImpl<T> &Matches,
		FetchDeclFn FetchDecl) {
		rsmithUnsubmitted Done Reply Inline Actions We don't need the overhead of `std::function` here. Use this instead: template<typename T, typename FetchDeclFn> static void Erase...( ..., FetchDeclFn FetchDecl) { rsmith: We don't need the overhead of `std::function` here. Use this instead: template<typename T…
		assert(S.getLangOpts().CUDATargetOverloads &&
		"Should not be called w/o enabled target overloads.");
		if (Matches.size() <= 1)
		return;

		// Find the best call preference among the functions in Matches.
		Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
		for (auto const &Match : Matches) {
		P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
		if (P > BestCFP)
		BestCFP = P;
		}

		// Erase all functions with lower priority.
		for (unsigned I = 0, N = Matches.size(); I != N;)
		if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
		Matches[I] = Matches[--N];
		Matches.resize(N);
		} else {
		++I;
		}
		}

		void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
		SmallVectorImpl<FunctionDecl *> &Matches){
		EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
		this, Caller, Matches, [](const FunctionDecl item) { return item; });
		}

		void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
		SmallVectorImpl<DeclAccessPair> &Matches) {
		EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
		*this, Caller, Matches, [](const DeclAccessPair &item) {
		return dyn_cast<FunctionDecl>(item.getDecl());
		});
		}

		void Sema::EraseUnwantedCUDAMatches(
		const FunctionDecl *Caller,
		SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
		EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
		*this, Caller, Matches,
		[](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
		return dyn_cast<FunctionDecl>(item.second);
		});
		}

/// When an implicitly-declared special member has to invoke more than one		/// When an implicitly-declared special member has to invoke more than one
/// base/field special member, conflicts may occur in the targets of these		/// base/field special member, conflicts may occur in the targets of these
/// members. For example, if one base's member __host__ and another's is		/// members. For example, if one base's member __host__ and another's is
/// __device__, it's a conflict.		/// __device__, it's a conflict.
/// This function figures out if the given targets \param Target1 and		/// This function figures out if the given targets \param Target1 and
/// \param Target2 conflict, and if they do not it fills in		/// \param Target2 conflict, and if they do not it fills in
/// \param ResolvedTarget with a target that resolves for both calls.		/// \param ResolvedTarget with a target that resolves for both calls.
/// \return true if there's a conflict, false otherwise.		/// \return true if there's a conflict, false otherwise.
▲ Show 20 Lines • Show All 153 Lines • Show Last 20 Lines

lib/Sema/SemaDecl.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 5,504 Lines • ▼ Show 20 Lines
	///			///
	/// FIXME: This is a hack.			/// FIXME: This is a hack.
	template<typename T>			template<typename T>
	static bool isIncompleteDeclExternC(Sema &S, const T *D) {			static bool isIncompleteDeclExternC(Sema &S, const T *D) {
	if (S.getLangOpts().CPlusPlus) {			if (S.getLangOpts().CPlusPlus) {
	// In C++, the overloadable attribute negates the effects of extern "C".			// In C++, the overloadable attribute negates the effects of extern "C".
	if (!D->isInExternCContext() \|\| D->template hasAttr<OverloadableAttr>())			if (!D->isInExternCContext() \|\| D->template hasAttr<OverloadableAttr>())
	return false;			return false;

				// So do CUDA's host/device attributes if overloading is enabled.
				if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
				(D->template hasAttr<CUDADeviceAttr>() \|\|
				D->template hasAttr<CUDAHostAttr>()))
				return false;
	}			}
	return D->isExternC();			return D->isExternC();
	}			}

	static bool shouldConsiderLinkage(const VarDecl *VD) {			static bool shouldConsiderLinkage(const VarDecl *VD) {
	const DeclContext *DC = VD->getDeclContext()->getRedeclContext();			const DeclContext *DC = VD->getDeclContext()->getRedeclContext();
	if (DC->isFunctionOrMethod())			if (DC->isFunctionOrMethod())
	return VD->hasExternalStorage();			return VD->hasExternalStorage();
	▲ Show 20 Lines • Show All 9,023 Lines • Show Last 20 Lines

lib/Sema/SemaExprCXX.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,250 Lines • ▼ Show 20 Lines	if (getLangOpts().SizedDeallocation && Matches.size() == 2) {
if (Matches[0]->getNumParams() != NumArgs)		if (Matches[0]->getNumParams() != NumArgs)
Matches.erase(Matches.begin());		Matches.erase(Matches.begin());
else		else
Matches.erase(Matches.begin() + 1);		Matches.erase(Matches.begin() + 1);
assert(Matches[0]->getNumParams() == NumArgs &&		assert(Matches[0]->getNumParams() == NumArgs &&
"found an unexpected usual deallocation function");		"found an unexpected usual deallocation function");
}		}

		if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
		rsmithUnsubmitted Done Reply Inline Actions I don't see any test coverage for this; please add some tests that declare usual deallocation functions with CUDA host/device attributes and check that they behave as expected. rsmith: I don't see any test coverage for this; please add some tests that declare usual deallocation…
		EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);

assert(Matches.size() == 1 &&		assert(Matches.size() == 1 &&
"unexpectedly have multiple usual deallocation functions");		"unexpectedly have multiple usual deallocation functions");
return Matches.front();		return Matches.front();
}		}

bool Sema::FindDeallocationFunction(SourceLocation StartLoc, CXXRecordDecl *RD,		bool Sema::FindDeallocationFunction(SourceLocation StartLoc, CXXRecordDecl *RD,
DeclarationName Name,		DeclarationName Name,
FunctionDecl* &Operator, bool Diagnose) {		FunctionDecl* &Operator, bool Diagnose) {
LookupResult Found(*this, Name, StartLoc, LookupOrdinaryName);		LookupResult Found(*this, Name, StartLoc, LookupOrdinaryName);
// Try to find operator delete/operator delete[] in class scope.		// Try to find operator delete/operator delete[] in class scope.
LookupQualifiedName(Found, RD);		LookupQualifiedName(Found, RD);

if (Found.isAmbiguous())		if (Found.isAmbiguous())
return true;		return true;

		rsmithUnsubmitted Done Reply Inline Actions Use `resize`, not `set_size`. rsmith: Use `resize`, not `set_size`.
Found.suppressDiagnostics();		Found.suppressDiagnostics();
		elibenUnsubmitted Done Reply Inline Actions Nit: wrap the ++I in braces too, otherwise this looks really weird eliben: Nit: wrap the ++I in braces too, otherwise this looks really weird

SmallVector<DeclAccessPair,4> Matches;		SmallVector<DeclAccessPair,4> Matches;
for (LookupResult::iterator F = Found.begin(), FEnd = Found.end();		for (LookupResult::iterator F = Found.begin(), FEnd = Found.end();
F != FEnd; ++F) {		F != FEnd; ++F) {
NamedDecl ND = (F)->getUnderlyingDecl();		NamedDecl ND = (F)->getUnderlyingDecl();

// Ignore template operator delete members from the check for a usual		// Ignore template operator delete members from the check for a usual
// deallocation function.		// deallocation function.
if (isa<FunctionTemplateDecl>(ND))		if (isa<FunctionTemplateDecl>(ND))
continue;		continue;

if (cast<CXXMethodDecl>(ND)->isUsualDeallocationFunction())		if (cast<CXXMethodDecl>(ND)->isUsualDeallocationFunction())
Matches.push_back(F.getPair());		Matches.push_back(F.getPair());
}		}

		if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
		EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);

// There's exactly one suitable operator; pick it.		// There's exactly one suitable operator; pick it.
if (Matches.size() == 1) {		if (Matches.size() == 1) {
Operator = cast<CXXMethodDecl>(Matches[0]->getUnderlyingDecl());		Operator = cast<CXXMethodDecl>(Matches[0]->getUnderlyingDecl());

if (Operator->isDeleted()) {		if (Operator->isDeleted()) {
if (Diagnose) {		if (Diagnose) {
Diag(StartLoc, diag::err_deleted_function_use);		Diag(StartLoc, diag::err_deleted_function_use);
NoteDeletedFunction(Operator);		NoteDeletedFunction(Operator);
▲ Show 20 Lines • Show All 4,512 Lines • Show Last 20 Lines

lib/Sema/SemaOverload.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,061 Lines • ▼ Show 20 Lines	if (NewI == NewE \|\| OldI == OldE)
return true;		return true;
llvm::FoldingSetNodeID NewID, OldID;		llvm::FoldingSetNodeID NewID, OldID;
NewI->getCond()->Profile(NewID, Context, true);		NewI->getCond()->Profile(NewID, Context, true);
OldI->getCond()->Profile(OldID, Context, true);		OldI->getCond()->Profile(OldID, Context, true);
if (NewID != OldID)		if (NewID != OldID)
return true;		return true;
}		}

		if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) {
		CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
		OldTarget = IdentifyCUDATarget(Old);
		if (NewTarget == CFT_InvalidTarget \|\| NewTarget == CFT_Global)
		return false;

		assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target.");

		// Codegen expects unique destructor, so we don't allow HD
		// destructors if we already have one with different target.
		if (isa<CXXDestructorDecl>(New) &&
		((NewTarget == CFT_HostDevice) != (OldTarget == CFT_HostDevice)))
		return false;

		// Allow overloading of functions with same signature, but
		// different CUDA target attributes.
		return NewTarget != OldTarget;
		}

// The signatures match; this is not an overload.		// The signatures match; this is not an overload.
return false;		return false;
}		}

/// \brief Checks availability of the function depending on the current		/// \brief Checks availability of the function depending on the current
/// function context. Inside an unavailable function, unavailability is ignored.		/// function context. Inside an unavailable function, unavailability is ignored.
///		///
/// \returns true if \arg FD is unavailable and current context is inside		/// \returns true if \arg FD is unavailable and current context is inside
▲ Show 20 Lines • Show All 7,420 Lines • ▼ Show 20 Lines	for (auto &Cand2A : Cand2Attrs) {
S.getASTContext(), true);		S.getASTContext(), true);
if (Cand1ID != Cand2ID)		if (Cand1ID != Cand2ID)
return false;		return false;
}		}

return true;		return true;
}		}

		if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
		Cand1.Function && Cand2.Function) {
		FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
		return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
		S.IdentifyCUDAPreference(Caller, Cand2.Function);
		}

return false;		return false;
}		}

/// \brief Computes the best viable function (C++ 13.3.3)		/// \brief Computes the best viable function (C++ 13.3.3)
/// within an overload candidate set.		/// within an overload candidate set.
///		///
/// \param Loc The location of the function name (or operator symbol) for		/// \param Loc The location of the function name (or operator symbol) for
/// which overload resolution occurs.		/// which overload resolution occurs.
▲ Show 20 Lines • Show All 1,401 Lines • ▼ Show 20 Lines	if (FindAllFunctionsThatMatchTargetTypeExactly()) {
// If more than one function is selected, [...]		// If more than one function is selected, [...]
if (Matches.size() > 1) {		if (Matches.size() > 1) {
if (FoundNonTemplateFunction)		if (FoundNonTemplateFunction)
EliminateAllTemplateMatches();		EliminateAllTemplateMatches();
else		else
EliminateAllExceptMostSpecializedTemplate();		EliminateAllExceptMostSpecializedTemplate();
}		}
}		}

		if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
		Matches.size() > 1)
		EliminateSuboptimalCudaMatches();
}		}

private:		private:
bool isTargetTypeAFunction() const {		bool isTargetTypeAFunction() const {
return TargetFunctionType->isFunctionType();		return TargetFunctionType->isFunctionType();
}		}

// [ToType] [Return]		// [ToType] [Return]
▲ Show 20 Lines • Show All 159 Lines • ▼ Show 20 Lines	private:
void EliminateAllTemplateMatches() {		void EliminateAllTemplateMatches() {
// [...] any function template specializations in the set are		// [...] any function template specializations in the set are
// eliminated if the set also contains a non-template function, [...]		// eliminated if the set also contains a non-template function, [...]
for (unsigned I = 0, N = Matches.size(); I != N; ) {		for (unsigned I = 0, N = Matches.size(); I != N; ) {
if (Matches[I].second->getPrimaryTemplate() == nullptr)		if (Matches[I].second->getPrimaryTemplate() == nullptr)
++I;		++I;
else {		else {
Matches[I] = Matches[--N];		Matches[I] = Matches[--N];
Matches.set_size(N);		Matches.resize(N);
		}
}		}
}		}

		void EliminateSuboptimalCudaMatches() {
		S.EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(S.CurContext), Matches);
}		}

public:		public:
void ComplainNoMatchesFound() const {		void ComplainNoMatchesFound() const {
assert(Matches.empty());		assert(Matches.empty());
S.Diag(OvlExpr->getLocStart(), diag::err_addr_ovl_no_viable)		S.Diag(OvlExpr->getLocStart(), diag::err_addr_ovl_no_viable)
<< OvlExpr->getName() << TargetFunctionType		<< OvlExpr->getName() << TargetFunctionType
<< OvlExpr->getSourceRange();		<< OvlExpr->getSourceRange();
if (FailedCandidates.empty())		if (FailedCandidates.empty())
S.NoteAllOverloadCandidates(OvlExpr, TargetFunctionType);		S.NoteAllOverloadCandidates(OvlExpr, TargetFunctionType);
else {		else {
// We have some deduction failure messages. Use them to diagnose		// We have some deduction failure messages. Use them to diagnose
// the function templates, and diagnose the non-template candidates		// the function templates, and diagnose the non-template candidates
// normally.		// normally.
for (UnresolvedSetIterator I = OvlExpr->decls_begin(),		for (UnresolvedSetIterator I = OvlExpr->decls_begin(),
IEnd = OvlExpr->decls_end();		IEnd = OvlExpr->decls_end();
I != IEnd; ++I)		I != IEnd; ++I)
if (FunctionDecl *Fun =		if (FunctionDecl *Fun =
dyn_cast<FunctionDecl>((*I)->getUnderlyingDecl()))		dyn_cast<FunctionDecl>((*I)->getUnderlyingDecl()))
S.NoteOverloadCandidate(Fun, TargetFunctionType);		S.NoteOverloadCandidate(Fun, TargetFunctionType);
FailedCandidates.NoteCandidates(S, OvlExpr->getLocStart());		FailedCandidates.NoteCandidates(S, OvlExpr->getLocStart());
}		}
}		}
		rsmithUnsubmitted Done Reply Inline Actions Please factor out the common code shared by this and `FindUsualDeallocationFunction`. rsmith: Please factor out the common code shared by this and `FindUsualDeallocationFunction`.

bool IsInvalidFormOfPointerToMemberFunction() const {		bool IsInvalidFormOfPointerToMemberFunction() const {
return TargetTypeIsNonStaticMemberFunction &&		return TargetTypeIsNonStaticMemberFunction &&
!OvlExprInfo.HasFormOfMemberPointer;		!OvlExprInfo.HasFormOfMemberPointer;
}		}

void ComplainIsInvalidFormOfPointerToMemberFunction() const {		void ComplainIsInvalidFormOfPointerToMemberFunction() const {
// TODO: Should we condition this on whether any functions might		// TODO: Should we condition this on whether any functions might
▲ Show 20 Lines • Show All 2,410 Lines • Show Last 20 Lines

test/CodeGenCUDA/function-overload.cu

This file was added.

				// REQUIRES: x86-registered-target
				// REQUIRES: nvptx-registered-target

				// Make sure we handle target overloads correctly.
				// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
				// RUN: -fcuda-target-overloads -emit-llvm -o - %s \
				// RUN: \| FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
				// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
				// RUN: -fcuda-target-overloads -emit-llvm -o - %s \
				// RUN: \| FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s

				// Check target overloads handling with disabled call target checks.
				// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \
				// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \
				// RUN: \| FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \
				// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s
				// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \
				// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \
				// RUN: -fcuda-is-device -o - %s \
				// RUN: \| FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \
				// RUN: -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s

				#include "Inputs/cuda.h"

				typedef int (*fp_t)(void);
				typedef void (*gp_t)(void);

				// CHECK-HOST: @dhddp = global {{.*}} @_Z4dhhdv
				// CHECK-HOST: @hhdp = global {{.*}} @_Z3hhdv
				// CHECK-HOST: @dhdp = global {{.*}} @_Z3dhdUa6deviceUa4hostv
				// CHECK-HOST: @gp = global {{.*}} @_Z1gv
				// CHECK-HOST: @hp = global {{.*}} @_Z1hv

				// Check proper mangling if we mix with enable_if attribute
				__device__ __host__ __attribute__((enable_if(1, ""))) void hde() {}
				// CHECK-BOTH-LABEL: define void @_Z3hdeUa6deviceUa9enable_ifIXLi1EEEUa4hostv

				// CHECK-BOTH-LABEL: define i32 @_Z4dhhdv()
				__device__ int dhhd(void) { return 1; }
				// CHECK-DEVICE: ret i32 1

				__host__ int dhhd(void) { return 2; }
				// CHECK-HOST: ret i32 2

				// CHECK-BOTH-LABEL: define i32 @_Z4dhhdUa6deviceUa4hostv()
				__host__ __device__ int dhhd(void) { return 3; }
				// CHECK-BOTH: ret i32 3

				// CHECK-HOST-LABEL: define i32 @_Z3hhdv()
				__host__ int hhd(void) { return 4; }
				// CHECK-HOST: ret i32 4

				// CHECK-BOTH-LABEL: define i32 @_Z3dhdUa6deviceUa4hostv()
				__host__ __device__ int dhd(void) { return 5; }
				// CHECK-BOTH: ret i32 5

				// CHECK-DEVICE-LABEL: define i32 @_Z3dhdv()
				__device__ int dhd(void) { return 6; }
				// CHECK-DEVICE: ret i32 6

				// CHECK-BOTH-LABEL: define i32 @_Z3hhdUa6deviceUa4hostv()
				__host__ __device__ int hhd(void) { return 7; }
				// CHECK-BOTH: ret i32 7

				// CHECK-DEVICE-LABEL: define i32 @_Z1dv()
				__device__ int d(void) { return 8; }
				// CHECK-DEVICE: ret i32 8

				// CHECK-HOST-LABEL: define i32 @_Z1hv()
				__host__ int h(void) { return 9; }
				// CHECK-HOST: ret i32 9

				// CHECK-BOTH-LABEL: define void @_Z1gv()
				__global__ void g(void) {}
				// CHECK-BOTH: ret void

				// mangled names of extern "C" __host__ __device__ functions clash
				// with those of their __host__/__device__ counterparts, so
				// overloading of extern "C" functions can only happen for __host__
				// and __device__ functions -- we never codegen them in the same
				// compilation and therefore mangled name conflict is not a problem.

				// CHECK-DEVICE-LABEL: define i32 @chd()
				extern "C" __device__ int chd(void) {return 10;}
				// CHECK-DEVICE: ret i32 10

				// CHECK-HOST-LABEL: define i32 @chd()
				extern "C" __host__ int chd(void) {return 11;}
				// CHECK-HOST: ret i32 11


				// CHECK-HOST-LABEL: define void @_Z5hostfv()
				__host__ void hostf(void) {
				fp_t dhddp = dhhd; // CHECK-HOST: store {{.}} @_Z4dhhdv, {{.}} %dhddp,
				fp_t hhdp = hhd; // CHECK-HOST: store {{.}} @_Z3hhdv, {{.}} %hhdp,
				fp_t dhdp = dhd; // CHECK-HOST: store {{.}} @_Z3dhdUa6deviceUa4hostv, {{.}} %dhdp,
				#if defined (NOCHECKS)
				fp_t dp = d; // CHECK-HOST-NC: store {{.}} @_Z1dv, {{.}} %dp,
				#endif
				fp_t hp = h; // CHECK-HOST: store {{.}} @_Z1hv, {{.}} %hp,
				fp_t chdp = chd; // CHECK-HOST: store {{.}} @chd, {{.}} %chdp,

				dhhd(); // CHECK-HOST: call i32 @_Z4dhhdv()
				hhd(); // CHECK-HOST: call i32 @_Z3hhdv()
				dhd(); // CHECK-HOST: call i32 @_Z3dhdUa6deviceUa4hostv()
				h(); // CHECK-HOST: call i32 @_Z1hv()
				chd(); // CHECK-HOST: call i32 @chd()
				}

				// CHECK-DEVICE-LABEL: define void @_Z7devicefv()
				__device__ void devicef(void) {
				fp_t dhddp = dhhd; // CHECK-DEVICE: store {{.}} @_Z4dhhdv, {{.}} %dhddp,
				fp_t hhdp = hhd; // CHECK-DEVICE: store {{.}} @_Z3hhdUa6deviceUa4hostv, {{.}} %hhdp,
				fp_t dhdp = dhd; // CHECK-DEVICE: store {{.}} @_Z3dhdv, {{.}} %dhdp,
				fp_t dp = d; // CHECK-DEVICE: store {{.}} @_Z1dv, {{.}} %dp,
				#if defined (NOCHECKS)
				fp_t hp = h; // CHECK-DEVICE-NC: store {{.}} @_Z1hv, {{.}} %hp,
				#endif
				fp_t chdp = chd; // CHECK-DEVICE: store {{.}} @chd, {{.}} %chdp,

				dhhd(); // CHECK-DEVICE: call i32 @_Z4dhhdv()
				hhd(); // CHECK-DEVICE: call i32 @_Z3hhdUa6deviceUa4hostv()
				dhd(); // CHECK-DEVICE: call i32 @_Z3dhdv()
				d(); // CHECK-DEVICE: call i32 @_Z1dv()
				chd(); // CHECK-DEVICE: call i32 @chd()
				}

				// CHECK-BOTH-LABEL: define void @_Z11hostdevicefUa6deviceUa4hostv()
				__host__ __device__ void hostdevicef(void) {
				fp_t dhddp = dhhd; // CHECK-BOTH: store {{.}} @_Z4dhhdUa6deviceUa4hostv, {{.}} %dhddp,
				fp_t hhdp = hhd; // CHECK-BOTH: store {{.}} @_Z3hhdUa6deviceUa4hostv, {{.}} %hhdp,
				fp_t dhdp = dhd; // CHECK-BOTH: store {{.}} @_Z3dhdUa6deviceUa4hostv, {{.}} %dhdp,
				#if defined (NOCHECKS)
				fp_t dp = d; // CHECK-BOTH-NC: store {{.}} @_Z1dv, {{.}} %dp,
				fp_t hp = h; // CHECK-BOTH-NC: store {{.}} @_Z1hv, {{.}} %hp,
				#endif
				// chd would be __host__ or __device__ depending on compilation mode.
				fp_t chdp = chd; // CHECK-BOTH: store {{.}} @chd, {{.}} %chdp,

				dhhd(); // CHECK-BOTH: call i32 @_Z4dhhdUa6deviceUa4hostv()
				hhd(); // CHECK-BOTH: call i32 @_Z3hhdUa6deviceUa4hostv()
				dhd(); // CHECK-BOTH: call i32 @_Z3dhdUa6deviceUa4hostv()

				#if defined(NOCHECKS) \|\| defined(__CUDA_ARCH__)
				d(); // CHECK-BOTH-NC: call i32 @_Z1dv()
				#endif
				#if defined(NOCHECKS) \|\| !defined(__CUDA_ARCH__)
				h(); // CHECK-BOTH-NC: call i32 @_Z1hv()
				#endif
				chd(); // CHECK-BOTH: call i32 @chd()
				}

				// Test for address of overloaded function resolution in the global context.
				fp_t dhddp = dhhd;
				fp_t hhdp = hhd;
				fp_t dhdp = dhd;
				gp_t gp = g;
				fp_t hp = h;

				int x;
				// Check constructors/destructors for D/H functions
				struct s_cd_dh {
				__host__ s_cd_dh() { x = 11; }
				__device__ s_cd_dh() { x = 12; }
				__host__ __device__ s_cd_dh() { x = 13; }
				__host__ ~s_cd_dh() { x = 21; }
				__device__ ~s_cd_dh() { x = 22; }
				// no HD as it is not allowed to overload H/D for destructors.
				};

				struct s_cd_hd {
				__host__ __device__ s_cd_hd() { x = 31; }
				__host__ __device__ ~s_cd_hd() { x = 32; }
				};

				// Check overloading of (de)allocators for D/H/HD functions
				struct s_da_dhhd {
				int placeholder;

				__host__ void *operator new(size_t count) { x = 41; return &x; }
				__device__ void *operator new(size_t count) { x = 42; return &x; }
				__host__ __device__ void *operator new(size_t count) { x=43; return &x; }

				__host__ void operator delete(void *ptr) { x = 44; }
				__device__ void operator delete(void *ptr) { x = 45; }
				__host__ __device__ void operator delete(void *ptr) { x = 46; }
				};

				// Check overloading of (de)allocators for D/HD functions
				struct s_da_dhd {
				int placeholder;

				__host__ void *operator new(size_t count) { x = 51; return &x; }
				__host__ __device__ void *operator new(size_t count) { x=52; return &x; }

				__host__ void operator delete(void *ptr) { x = 53; }
				__host__ __device__ void operator delete(void *ptr) { x = 54; }
				};

				// CHECK-BOTH: define void @_Z7wrapperv
				#if defined(__CUDA_ARCH__)
				__device__
				#else
				__host__
				#endif
				void wrapper() {
				s_da_dhhd *sdap = new s_da_dhhd;
				// CHECK-BOTH: call i8* @_ZN9s_da_dhhdnwEm(
				delete sdap;
				// CHECK-BOTH: call void @_ZN9s_da_dhhddlEPv(

				s_da_dhd *sdadhdp = new s_da_dhd;
				// CHECK-HOST: call i8* @_ZN8s_da_dhdnwEm(
				// CHECK-DEVICE: call i8* @_ZN8s_da_dhdnwEUa6deviceUa4hostm(
				delete sdadhdp;
				// CHECK-HOST: call void @_ZN8s_da_dhddlEPv(
				// CHECK-DEVICE: call void @_ZN8s_da_dhddlEUa6deviceUa4hostPv(

				s_cd_dh scddh;
				// CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev(
				s_cd_hd scdhd;
				// CHECK-BOTH: call void @_ZN7s_cd_hdC1EUa6deviceUa4hostv

				// CHECK-BOTH: call void @_ZN7s_cd_hdD1EUa6deviceUa4hostv(
				// CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev(
				}
				// CHECK-BOTH: ret void

				// Now it's time to check what's been generated for the methods we used.

				// void *operator new(size_t count);
				// CHECK-BOTH: define linkonce_odr i8* @_ZN9s_da_dhhdnwEm(
				// CHECK-HOST: store i32 41,
				// CHECK-DEVICE: store i32 42,
				// CHECK-BOTH: }

				// void operator delete(void *ptr);
				// CHECK-BOTH: define linkonce_odr void @_ZN9s_da_dhhddlEPv(
				// CHECK-HOST: store i32 44,
				// CHECK-DEVICE: store i32 45,
				// CHECK-BOTH: }

				// void *operator new(size_t count);
				// CHECK-HOST: define linkonce_odr i8* @_ZN8s_da_dhdnwEm(
				// CHECK-DEVICE: define linkonce_odr i8* @_ZN8s_da_dhdnwEUa6deviceUa4hostm(
				// CHECK-HOST: store i32 51,
				// CHECK-DEVICE: store i32 52,
				// CHECK-BOTH: }

				// void operator delete(void *ptr);
				// CHECK-HOST: define linkonce_odr void @_ZN8s_da_dhddlEPv(
				// CHECK-DEVICE: define linkonce_odr void @_ZN8s_da_dhddlEUa6deviceUa4hostPv(
				// CHECK-HOST: store i32 53,
				// CHECK-DEVICE: store i32 54,
				// CHECK-BOTH: }

				// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev(
				// CHECK-HOST: store i32 11,
				// CHECK-DEVICE: store i32 12,
				// CHECK-BOTH: ret void

				// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2EUa6deviceUa4hostv(
				// CHECK-BOTH: store i32 31,
				// CHECK-BOTH: ret void

				// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2EUa6deviceUa4hostv(
				// CHECK-BOTH: store i32 32,
				// CHECK-BOTH: ret void

				// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev(
				// CHECK-HOST: store i32 21,
				// CHECK-DEVICE: store i32 22,
				// CHECK-BOTH: ret void

test/SemaCUDA/function-overload.cu

This file was added.

				// REQUIRES: x86-registered-target
				// REQUIRES: nvptx-registered-target

				// Make sure we handle target overloads correctly.
				// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
				// RUN: -fsyntax-only -fcuda-target-overloads -verify %s
				// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
				// RUN: -fsyntax-only -fcuda-target-overloads -fcuda-is-device -verify %s

				// Check target overloads handling with disabled call target checks.
				// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -fsyntax-only \
				// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads -verify %s
				// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -fsyntax-only \
				// RUN: -fcuda-disable-target-call-checks -fcuda-target-overloads \
				// RUN: -fcuda-is-device -verify %s

				#include "Inputs/cuda.h"

				typedef int (*fp_t)(void);
				typedef void (*gp_t)(void);

				__device__ int dhhd(void) { return 2; }
				__host__ int dhhd(void) { return 1; } // expected-note {{previous definition is here}}
				int dhhd(void) { return 1; } // expected-error {{redefinition of 'dhhd'}}
				__host__ __device__ int dhhd(void) { return 3; }

				__host__ int hhd(void) { return 4; }
				__host__ __device__ int dhd(void) { return 5; }

				__device__ int dhd(void) { return 6; }
				__host__ __device__ int hhd(void) { return 7; }

				__device__ int d(void) { return 8; }
				__host__ int h(void) { return 9; }
				__global__ void g(void) {}

				extern "C" __device__ int chd(void) {return 10;}
				extern "C" __host__ int chd(void) {return 11;} // expected-note {{previous definition is here}}
				extern "C" int chd(void) {return 11;} // expected-error {{redefinition of 'chd'}}
				extern "C" __host__ __device__ int chd(void) {return 12;} // expected-note {{previous definition is here}}
				extern "C" __host__ __device__ int chd(void) {return 13;} // expected-error {{redefinition of 'chd'}}

				__host__ void hostf(void) {
				fp_t dhddp = dhhd;
				fp_t hhdp = hhd;
				fp_t dhdp = dhd;
				gp_t gp = g;
				fp_t dp = d;
				#if !defined(NOCHECKS)
				// expected-error@-2 {{reference to __device__ function 'd' in __host__ function}}
				// expected-note@33 {{'d' declared here}}
				#endif
				fp_t hp = h;

				dhhd();
				hhd();
				dhd();
				chd();
				d();
				#if !defined(NOCHECKS)
				// expected-error@-2 {{no matching function for call to 'd'}}
				// expected-note@33 {{candidate function not viable: call to __device__ function from __host__ function}}
				#endif
				h();

				g(); // expected-error {{call to global function g not configured}}
				g<<<0,0>>>();
				}

				__device__ void devicef(void) {
				fp_t dhddp = dhhd;
				fp_t hhdp = hhd;
				fp_t dhdp = dhd;
				gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
				// expected-note@35 {{'g' declared here}}
				fp_t dp = d;
				fp_t hp = h;
				#if !defined(NOCHECKS)
				// expected-error@-2 {{reference to __host__ function 'h' in __device__ function}}
				// expected-note@34 {{'h' declared here}}
				#endif

				dhhd();
				hhd();
				dhd();
				chd();
				d();
				h();
				g();
				#if !defined(NOCHECKS)
				// expected-error@-3 {{no matching function for call to 'h'}}
				// expected-note@34 {{candidate function not viable: call to __host__ function from __device__ function}}
				#endif
				// expected-error@-5 {{no matching function for call to 'g'}}
				// expected-note@35 {{candidate function not viable: call to __global__ function from __device__ function}}
				g<<<0,0>>>();
				// expected-error@-1 {{reference to __global__ function 'g' in __device__ function}}
				// expected-note@35 {{'g' declared here}}
				}

				__global__ void globalf(void) {
				fp_t dhddp = dhhd;
				fp_t hhdp = hhd;
				fp_t dhdp = dhd;
				gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
				// expected-note@35 {{'g' declared here}}
				fp_t dp = d;
				fp_t hp = h;
				#if !defined(NOCHECKS)
				// expected-error@-2 {{reference to __host__ function 'h' in __global__ function}}
				// expected-note@34 {{'h' declared here}}
				#endif

				dhhd();
				hhd();
				dhd();
				chd();
				d();
				h();
				#if !defined(NOCHECKS)
				// expected-error@-2 {{no matching function for call to 'h'}}
				// expected-note@34 {{candidate function not viable: call to __host__ function from __global__ function}}
				#endif
				g();
				// expected-error@-1 {{no matching function for call to 'g'}}
				// expected-note@35 {{candidate function not viable: call to __global__ function from __global__ function}}
				g<<<0,0>>>();
				// expected-error@-1 {{reference to __global__ function 'g' in __global__ function}}
				// expected-note@35 {{'g' declared here}}

				}

				__host__ __device__ void hostdevicef(void) {
				fp_t dhddp = dhhd;
				fp_t hhdp = hhd;
				fp_t dhdp = dhd;
				gp_t gp = g;
				#if defined(__CUDA_ARCH__)
				// expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
				// expected-note@35 {{'g' declared here}}
				#endif
				fp_t dp = d;
				fp_t hp = h;
				#if !defined(NOCHECKS)
				#if !defined(__CUDA_ARCH__)
				// expected-error@-4 {{reference to __device__ function 'd' in __host__ __device__ function}}
				// expected-note@33 {{'d' declared here}}
				#else
				// expected-error@-6 {{reference to __host__ function 'h' in __host__ __device__ function}}
				// expected-note@34 {{'h' declared here}}
				#endif
				#endif

				dhhd();
				hhd();
				dhd();
				chd();

				d();
				h();
				g();
				g<<<0,0>>>();
				#if !defined(__CUDA_ARCH__)
				#if !defined(NOCHECKS)
				// expected-error@-6 {{no matching function for call to 'd'}}
				// expected-note@33 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
				#endif
				// expected-error@-7 {{call to global function g not configured}}
				#else
				#if !defined(NOCHECKS)
				// expected-error@-11 {{no matching function for call to 'h'}}
				// expected-note@34 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
				#endif
				// expected-error@-13 {{no matching function for call to 'g'}}
				// expected-note@35 {{candidate function not viable: call to __global__ function from __host__ __device__ function}}
				// expected-error@-14 {{reference to __global__ function 'g' in __host__ __device__ function}}
				// expected-note@35 {{'g' declared here}}
				#endif // __CUDA_ARCH__
				}

				// Test for address of overloaded function resolution in the global context.
				fp_t dhddp = dhhd;
				fp_t hhdp = hhd;
				fp_t dhdp = dhd;
				gp_t gp = g;
				fp_t hp = h;

				// Test overloading of destructors. We need to ensure there's only one
				// destructor present for codegen.

				// We codegen only one of these destructors, so there's no ambiguity
				struct s0 {
				__host__ ~s0() {}
				__device__ ~s0() {}
				};

				// __host__ __device__ destructor by itself is fine, too.
				struct shd {
				__host__ __device__ ~shd() {}
				};

				// __host__ __device__ destructor can't overload __host__ or
				// __device__ destructors because we'll codegen both of them.
				struct s1 {
				__host__ ~s1() {}
				__device__ ~s1() {} // expected-note {{previous declaration is here}}
				__host__ __device__ ~s1() {} // expected-error {{destructor cannot be redeclared}}
				};

				struct s2h {
				__host__ ~s2h() {} // expected-note {{previous declaration is here}}
				__host__ __device__ ~s2h() {} // expected-error {{destructor cannot be redeclared}}
				};

				struct s3h {
				__host__ __device__ ~s3h() {} // expected-note {{previous declaration is here}}
				__host__ ~s3h() {} // expected-error {{destructor cannot be redeclared}}
				};

				struct s2d {
				__device__ ~s2d() {} // expected-note {{previous declaration is here}}
				__host__ __device__ ~s2d() {} // expected-error {{destructor cannot be redeclared}}
				};

				struct s3d {
				__host__ __device__ ~s3d() {} // expected-note {{previous declaration is here}}
				__device__ ~s3d() {} // expected-error {{destructor cannot be redeclared}}
				};

				// Test (de)allocation functions

				struct a0 {
				void operator delete(void *ptr); // expected-note {{previous declaration is here}}
				__host__ void operator delete(void *ptr); // expected-error {{class member cannot be redeclared}}
				void operator delete[](void *ptr); // expected-note {{previous declaration is here}}
				__host__ void operator delete[](void *ptr); // expected-error {{class member cannot be redeclared}}
				__device__ void operator delete(void *ptr);
				__device__ void operator delete[](void *ptr);
				__host__ __device__ void operator delete(void *ptr);
				__host__ __device__ void operator delete[](void *ptr);
				};

This is an archive of the discontinued LLVM Phabricator instance.

[CUDA] Allow function overloads based on host/device attributes.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 34230

include/clang/Basic/LangOptions.def

include/clang/Driver/CC1Options.td

include/clang/Sema/Sema.h

lib/AST/ItaniumMangle.cpp

lib/Frontend/CompilerInvocation.cpp

lib/Sema/SemaCUDA.cpp

lib/Sema/SemaDecl.cpp

lib/Sema/SemaExprCXX.cpp

lib/Sema/SemaOverload.cpp

test/CodeGenCUDA/function-overload.cu

test/SemaCUDA/function-overload.cu

This is an archive of the discontinued LLVM Phabricator instance.

[CUDA] Allow function overloads based on host/device attributes.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 34230

include/clang/Basic/LangOptions.def

include/clang/Driver/CC1Options.td

include/clang/Sema/Sema.h

lib/AST/ItaniumMangle.cpp

lib/Frontend/CompilerInvocation.cpp

lib/Sema/SemaCUDA.cpp

lib/Sema/SemaDecl.cpp

lib/Sema/SemaExprCXX.cpp

lib/Sema/SemaOverload.cpp

test/CodeGenCUDA/function-overload.cu

test/SemaCUDA/function-overload.cu

[CUDA] Allow function overloads based on host/device attributes.
ClosedPublic