This is an archive of the discontinued LLVM Phabricator instance.

[CUDA] Do not merge CUDA target attributes.
ClosedPublic

Authored by tra on Sep 13 2016, 2:36 PM.

Download Raw Diff

Details

Reviewers

Commits

rGbed18e9cc4ce: [CUDA] Do not merge CUDA target attributes.
rC281406: [CUDA] Do not merge CUDA target attributes.
rL281406: [CUDA] Do not merge CUDA target attributes.

Summary

CUDA target attributes are used for function overloading and must not be merged.

This fixes a bug where attributes were inherited during function template
specialization in CUDA and made it impossible for specialized function
to provide its own target attributes.

Diff Detail

Event Timeline

tra updated this revision to Diff 71238.Sep 13 2016, 2:36 PM

tra retitled this revision from to [CUDA] Do not merge CUDA target attributes..

tra updated this object.

tra added a reviewer: jlebar.

tra added a subscriber: cfe-commits.

Herald added a subscriber: jlebar. · View Herald TranscriptSep 13 2016, 2:36 PM

Yay, this is great.

test/SemaCUDA/target_attr_inheritance.cu
5	Other reviewers have pointed out to me that we don't usually (ever?) need this. I think these have to do with llvm's ability to generate code for our targets, but it's not relevant to clang here.

This revision is now accepted and ready to land.Sep 13 2016, 2:57 PM

Removed REQUIRED lines.

Closed by commit rL281406: [CUDA] Do not merge CUDA target attributes. (authored by tra). · Explain WhySep 13 2016, 3:25 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

lib/

Sema/

SemaDecl.cpp

8 lines

test/

SemaCUDA/

function-overload.cu

11 lines

target_attr_inheritance.cu

29 lines

Diff 71244

lib/Sema/SemaDecl.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 2,286 Lines • ▼ Show 20 Lines	static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
else if (const auto *IA = dyn_cast<MSInheritanceAttr>(Attr))		else if (const auto *IA = dyn_cast<MSInheritanceAttr>(Attr))
NewAttr = S.mergeMSInheritanceAttr(D, IA->getRange(), IA->getBestCase(),		NewAttr = S.mergeMSInheritanceAttr(D, IA->getRange(), IA->getBestCase(),
AttrSpellingListIndex,		AttrSpellingListIndex,
IA->getSemanticSpelling());		IA->getSemanticSpelling());
else if (const auto *AA = dyn_cast<AlwaysInlineAttr>(Attr))		else if (const auto *AA = dyn_cast<AlwaysInlineAttr>(Attr))
NewAttr = S.mergeAlwaysInlineAttr(D, AA->getRange(),		NewAttr = S.mergeAlwaysInlineAttr(D, AA->getRange(),
&S.Context.Idents.get(AA->getSpelling()),		&S.Context.Idents.get(AA->getSpelling()),
AttrSpellingListIndex);		AttrSpellingListIndex);
else if (const auto *MA = dyn_cast<MinSizeAttr>(Attr))		else if (S.getLangOpts().CUDA && isa<FunctionDecl>(D) &&
		(isa<CUDAHostAttr>(Attr) \|\| isa<CUDADeviceAttr>(Attr) \|\|
		isa<CUDAGlobalAttr>(Attr))) {
		// CUDA target attributes are part of function signature for
		// overloading purposes and must not be merged.
		return false;
		} else if (const auto *MA = dyn_cast<MinSizeAttr>(Attr))
NewAttr = S.mergeMinSizeAttr(D, MA->getRange(), AttrSpellingListIndex);		NewAttr = S.mergeMinSizeAttr(D, MA->getRange(), AttrSpellingListIndex);
else if (const auto *OA = dyn_cast<OptimizeNoneAttr>(Attr))		else if (const auto *OA = dyn_cast<OptimizeNoneAttr>(Attr))
NewAttr = S.mergeOptimizeNoneAttr(D, OA->getRange(), AttrSpellingListIndex);		NewAttr = S.mergeOptimizeNoneAttr(D, OA->getRange(), AttrSpellingListIndex);
else if (const auto *InternalLinkageA = dyn_cast<InternalLinkageAttr>(Attr))		else if (const auto *InternalLinkageA = dyn_cast<InternalLinkageAttr>(Attr))
NewAttr = S.mergeInternalLinkageAttr(		NewAttr = S.mergeInternalLinkageAttr(
D, InternalLinkageA->getRange(),		D, InternalLinkageA->getRange(),
&S.Context.Idents.get(InternalLinkageA->getSpelling()),		&S.Context.Idents.get(InternalLinkageA->getSpelling()),
AttrSpellingListIndex);		AttrSpellingListIndex);
▲ Show 20 Lines • Show All 13,265 Lines • Show Last 20 Lines

test/SemaCUDA/function-overload.cu

	Show First 20 Lines • Show All 373 Lines • ▼ Show 20 Lines
	__host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }			__host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }

	__host__ __device__ void test_host_device_single_side_overloading() {			__host__ __device__ void test_host_device_single_side_overloading() {
	DeviceReturnTy ret1 = device_only_function(1);			DeviceReturnTy ret1 = device_only_function(1);
	DeviceReturnTy2 ret2 = device_only_function(1.0f);			DeviceReturnTy2 ret2 = device_only_function(1.0f);
	HostReturnTy ret3 = host_only_function(1);			HostReturnTy ret3 = host_only_function(1);
	HostReturnTy2 ret4 = host_only_function(1.0f);			HostReturnTy2 ret4 = host_only_function(1.0f);
	}			}

				// Verify that we allow overloading function templates.
				template <typename T> __host__ T template_overload(const T &a) { return a; };
				template <typename T> __device__ T template_overload(const T &a) { return a; };

				__host__ void test_host_template_overload() {
				template_overload(1); // OK. Attribute-based overloading picks __host__ variant.
				}
				__device__ void test_device_template_overload() {
				template_overload(1); // OK. Attribute-based overloading picks __device__ variant.
				}

test/SemaCUDA/target_attr_inheritance.cu

This file was added.

				// Verifies correct inheritance of target attributes during template
				// instantiation and specialization.

				// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
				// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
				jlebarUnsubmitted Done Reply Inline Actions Other reviewers have pointed out to me that we don't usually (ever?) need this. I think these have to do with llvm's ability to generate code for our targets, but it's not relevant to clang here. jlebar: Other reviewers have pointed out to me that we don't usually (ever?) need this. I think these…

				#include "Inputs/cuda.h"

				// Function must inherit target attributes during instantiation, but not during
				// specialization.
				template <typename T> __host__ __device__ T function_template(const T &a);

				// Specialized functions have their own attributes.
				// expected-note@+1 {{candidate function not viable: call to __host__ function from __device__ function}}
				template <> __host__ float function_template<float>(const float &from);

				// expected-note@+1 {{candidate function not viable: call to __device__ function from __host__ function}}
				template <> __device__ double function_template<double>(const double &from);

				__host__ void hf() {
				function_template<float>(1.0f); // OK. Specialization is __host__.
				function_template<double>(2.0); // expected-error {{no matching function for call to 'function_template'}}
				function_template(1); // OK. Instantiated function template is HD.
				}
				__device__ void df() {
				function_template<float>(3.0f); // expected-error {{no matching function for call to 'function_template'}}
				function_template<double>(4.0); // OK. Specialization is __device__.
				function_template(1); // OK. Instantiated function template is HD.
				}