This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
clang/
-
lib/Sema/
-
Sema/
-
SemaDeclAttr.cpp
-
SemaExpr.cpp
-
SemaStmt.cpp
-
SemaTemplateDeduction.cpp
-
test/SemaCUDA/
-
SemaCUDA/
-
autoret-global.cu

Differential D68031

[CUDA][HIP] Enable kernel function return type deduction.
ClosedPublic

Authored by hliao on Sep 25 2019, 8:39 AM.

Download Raw Diff

Details

Reviewers

tra
jlebar

Commits

rG24337db61666: [CUDA][HIP] Enable kernel function return type deduction.
rL372898: [CUDA][HIP] Enable kernel function return type deduction.
rC372898: [CUDA][HIP] Enable kernel function return type deduction.

Summary

Even though only void is still accepted as the deduced return type, enabling deduction/instantiation on the return type allows more consistent coding.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

hliao created this revision.Sep 25 2019, 8:39 AM

Herald added a project: Restricted Project. · View Herald TranscriptSep 25 2019, 8:39 AM

Herald added a subscriber: cfe-commits. · View Herald Transcript

Harbormaster completed remote builds in B38548: Diff 221781.Sep 25 2019, 8:40 AM

Nice. I'd mention in the commit message that NVCC does not support deduced return type for kernel functions.

This revision is now accepted and ready to land.Sep 25 2019, 9:34 AM

In D68031#1682822, @tra wrote:

Nice. I'd mention in the commit message that NVCC does not support deduced return type for kernel functions.

Just tried with NVCC from CUDA 10, except auto-based deduced type is not supported, type deduction in a template is supported, the following test code passes compilation with NVCC

#include <cuda.h>

template <typename T>
__global__ T foo() {
}

void f0() {
  foo<void><<<0, 0>>>();
#if 0
  foo<int><<<0, 0>>>();
#endif
}

template <bool Cond, typename T = void> struct enable_if { typedef T type; };
template <typename T> struct enable_if<false, T> {};

template <int N>
__global__
auto bar() -> typename enable_if<N == 1>::type {
}

template <int N>
__global__
auto bar() -> typename enable_if<N == 2>::type {
}

void f3() {
  bar<1><<<0, 0>>>();
  bar<2><<<0, 0>>>();
#if 0
  bar<3><<<0, 0>>>();
#endif
}

s/#if 0/#if 1 also shows NVCC could give the error on the correct position but the message, IMHO, is misleading compared to the one from clang.

Closed by commit rG24337db61666: [CUDA][HIP] Enable kernel function return type deduction. (authored by hliao). · Explain WhySep 25 2019, 9:52 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

clang/

lib/

Sema/

SemaDeclAttr.cpp

4 lines

SemaExpr.cpp

4 lines

SemaStmt.cpp

8 lines

SemaTemplateDeduction.cpp

7 lines

test/

SemaCUDA/

autoret-global.cu

44 lines

Diff 221794

clang/lib/Sema/SemaDeclAttr.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 4,217 Lines • ▼ Show 20 Lines
	}			}

	static void handleGlobalAttr(Sema &S, Decl *D, const ParsedAttr &AL) {			static void handleGlobalAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
	if (checkAttrMutualExclusion<CUDADeviceAttr>(S, D, AL) \|\|			if (checkAttrMutualExclusion<CUDADeviceAttr>(S, D, AL) \|\|
	checkAttrMutualExclusion<CUDAHostAttr>(S, D, AL)) {			checkAttrMutualExclusion<CUDAHostAttr>(S, D, AL)) {
	return;			return;
	}			}
	const auto *FD = cast<FunctionDecl>(D);			const auto *FD = cast<FunctionDecl>(D);
	if (!FD->getReturnType()->isVoidType()) {			if (!FD->getReturnType()->isVoidType() &&
				!FD->getReturnType()->getAs<AutoType>() &&
				!FD->getReturnType()->isInstantiationDependentType()) {
	SourceRange RTRange = FD->getReturnTypeSourceRange();			SourceRange RTRange = FD->getReturnTypeSourceRange();
	S.Diag(FD->getTypeSpecStartLoc(), diag::err_kern_type_not_void_return)			S.Diag(FD->getTypeSpecStartLoc(), diag::err_kern_type_not_void_return)
	<< FD->getType()			<< FD->getType()
	<< (RTRange.isValid() ? FixItHint::CreateReplacement(RTRange, "void")			<< (RTRange.isValid() ? FixItHint::CreateReplacement(RTRange, "void")
	: FixItHint());			: FixItHint());
	return;			return;
	}			}
	if (const auto *Method = dyn_cast<CXXMethodDecl>(FD)) {			if (const auto *Method = dyn_cast<CXXMethodDecl>(FD)) {
	▲ Show 20 Lines • Show All 4,241 Lines • Show Last 20 Lines

clang/lib/Sema/SemaExpr.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,885 Lines • ▼ Show 20 Lines	ExprResult Sema::BuildResolvedCallExpr(Expr Fn, NamedDecl NDecl,
if (getLangOpts().CUDA) {		if (getLangOpts().CUDA) {
if (Config) {		if (Config) {
// CUDA: Kernel calls must be to global functions		// CUDA: Kernel calls must be to global functions
if (FDecl && !FDecl->hasAttr<CUDAGlobalAttr>())		if (FDecl && !FDecl->hasAttr<CUDAGlobalAttr>())
return ExprError(Diag(LParenLoc,diag::err_kern_call_not_global_function)		return ExprError(Diag(LParenLoc,diag::err_kern_call_not_global_function)
<< FDecl << Fn->getSourceRange());		<< FDecl << Fn->getSourceRange());

// CUDA: Kernel function must have 'void' return type		// CUDA: Kernel function must have 'void' return type
if (!FuncT->getReturnType()->isVoidType())		if (!FuncT->getReturnType()->isVoidType() &&
		!FuncT->getReturnType()->getAs<AutoType>() &&
		!FuncT->getReturnType()->isInstantiationDependentType())
return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return)		return ExprError(Diag(LParenLoc, diag::err_kern_type_not_void_return)
<< Fn->getType() << Fn->getSourceRange());		<< Fn->getType() << Fn->getSourceRange());
} else {		} else {
// CUDA: Calls to global functions must be configured		// CUDA: Calls to global functions must be configured
if (FDecl && FDecl->hasAttr<CUDAGlobalAttr>())		if (FDecl && FDecl->hasAttr<CUDAGlobalAttr>())
return ExprError(Diag(LParenLoc, diag::err_global_call_not_config)		return ExprError(Diag(LParenLoc, diag::err_global_call_not_config)
<< FDecl << Fn->getSourceRange());		<< FDecl << Fn->getSourceRange());
}		}
▲ Show 20 Lines • Show All 11,999 Lines • Show Last 20 Lines

clang/lib/Sema/SemaStmt.cpp

Show First 20 Lines • Show All 3,494 Lines • ▼ Show 20 Lines	if (!OrigResultType.getType()->getAs<AutoType>()) {
return true;		return true;
}		}
// We always deduce U = void in this case.		// We always deduce U = void in this case.
Deduced = SubstAutoType(OrigResultType.getType(), Context.VoidTy);		Deduced = SubstAutoType(OrigResultType.getType(), Context.VoidTy);
if (Deduced.isNull())		if (Deduced.isNull())
return true;		return true;
}		}

		// CUDA: Kernel function must have 'void' return type.
		if (getLangOpts().CUDA)
		if (FD->hasAttr<CUDAGlobalAttr>() && !Deduced->isVoidType()) {
		Diag(FD->getLocation(), diag::err_kern_type_not_void_return)
		<< FD->getType() << FD->getSourceRange();
		return true;
		}

// If a function with a declared return type that contains a placeholder type		// If a function with a declared return type that contains a placeholder type
// has multiple return statements, the return type is deduced for each return		// has multiple return statements, the return type is deduced for each return
// statement. [...] if the type deduced is not the same in each deduction,		// statement. [...] if the type deduced is not the same in each deduction,
// the program is ill-formed.		// the program is ill-formed.
QualType DeducedT = AT->getDeducedType();		QualType DeducedT = AT->getDeducedType();
if (!DeducedT.isNull() && !FD->isInvalidDecl()) {		if (!DeducedT.isNull() && !FD->isInvalidDecl()) {
AutoType *NewAT = Deduced->getContainedAutoType();		AutoType *NewAT = Deduced->getContainedAutoType();
// It is possible that NewAT->getDeducedType() is null. When that happens,		// It is possible that NewAT->getDeducedType() is null. When that happens,
▲ Show 20 Lines • Show All 923 Lines • Show Last 20 Lines

clang/lib/Sema/SemaTemplateDeduction.cpp

Show First 20 Lines • Show All 3,087 Lines • ▼ Show 20 Lines	CXXThisScopeRAII ThisScope(*this, ThisContext, ThisTypeQuals,
getLangOpts().CPlusPlus11);		getLangOpts().CPlusPlus11);

ResultType =		ResultType =
SubstType(Proto->getReturnType(),		SubstType(Proto->getReturnType(),
MultiLevelTemplateArgumentList(*ExplicitArgumentList),		MultiLevelTemplateArgumentList(*ExplicitArgumentList),
Function->getTypeSpecStartLoc(), Function->getDeclName());		Function->getTypeSpecStartLoc(), Function->getDeclName());
if (ResultType.isNull() \|\| Trap.hasErrorOccurred())		if (ResultType.isNull() \|\| Trap.hasErrorOccurred())
return TDK_SubstitutionFailure;		return TDK_SubstitutionFailure;
		// CUDA: Kernel function must have 'void' return type.
		if (getLangOpts().CUDA)
		if (Function->hasAttr<CUDAGlobalAttr>() && !ResultType->isVoidType()) {
		Diag(Function->getLocation(), diag::err_kern_type_not_void_return)
		<< Function->getType() << Function->getSourceRange();
		return TDK_SubstitutionFailure;
		}
}		}

// Instantiate the types of each of the function parameters given the		// Instantiate the types of each of the function parameters given the
// explicitly-specified template arguments if we didn't do so earlier.		// explicitly-specified template arguments if we didn't do so earlier.
if (!Proto->hasTrailingReturn() &&		if (!Proto->hasTrailingReturn() &&
SubstParmTypes(Function->getLocation(), Function->parameters(),		SubstParmTypes(Function->getLocation(), Function->parameters(),
Proto->getExtParameterInfosOrNull(),		Proto->getExtParameterInfosOrNull(),
MultiLevelTemplateArgumentList(*ExplicitArgumentList),		MultiLevelTemplateArgumentList(*ExplicitArgumentList),
▲ Show 20 Lines • Show All 2,611 Lines • Show Last 20 Lines

clang/test/SemaCUDA/autoret-global.cu

This file was added.

				// RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify %s

				#include "Inputs/cuda.h"

				template <typename T>
				__global__ T foo() {
				// expected-note@-1 {{kernel function type 'T ()' must have void return type}}
				}

				void f0() {
				foo<void><<<0, 0>>>();
				foo<int><<<0, 0>>>();
				// expected-error@-1 {{no matching function for call to 'foo'}}
				}

				__global__ auto f1() {
				}

				__global__ auto f2(int x) {
				return x + 1;
				// expected-error@-2 {{kernel function type 'auto (int)' must have void return type}}
				}

				template <bool Cond, typename T = void> struct enable_if { typedef T type; };
				template <typename T> struct enable_if<false, T> {};

				template <int N>
				__global__
				auto bar() -> typename enable_if<N == 1>::type {
				// expected-note@-1 {{requirement '3 == 1' was not satisfied [with N = 3]}}
				}

				template <int N>
				__global__
				auto bar() -> typename enable_if<N == 2>::type {
				// expected-note@-1 {{requirement '3 == 2' was not satisfied [with N = 3]}}
				}

				void f3() {
				bar<1><<<0, 0>>>();
				bar<2><<<0, 0>>>();
				bar<3><<<0, 0>>>();
				// expected-error@-1 {{no matching function for call to 'bar'}}
				}