This is an archive of the discontinued LLVM Phabricator instance.

[CUDA] Make sure we emit all templated global functions on device side.
ClosedPublic

Authored by tra on Jul 30 2015, 3:48 PM.

Download Raw Diff

Details

Reviewers

eliben
echristo

Commits

rGc3fa25def761: [CUDA] Add implicit __attribute__((used)) to all __global__ functions.
rGb7e4aab40cd4: [CUDA] Add implicit __attribute__((used)) to all __global__ functions.
rC248293: [CUDA] Add implicit __attribute__((used)) to all __global__ functions.
rC244501: [CUDA] Add implicit __attribute__((used)) to all __global__ functions.
rL248293: [CUDA] Add implicit __attribute__((used)) to all __global__ functions.
rL244501: [CUDA] Add implicit __attribute__((used)) to all __global__ functions.

Summary

Templated kernels that were instantiated from the host code would normally be eliminated because they were never referenced on device side.
Add implicit 'used' attribute to global functions which prevents their elimination.

Diff Detail

Repository: rL LLVM

Event Timeline

tra updated this revision to Diff 31082.Jul 30 2015, 3:48 PM

tra retitled this revision from to [CUDA] Make sure we emit all templated __global__ functions on device side..

tra updated this object.

tra added reviewers: echristo, eliben.

tra added a subscriber: cfe-commits.

Couldn't you just add an implicit UsedAttr when processing the CUDAGlobalAttr
and LangOpts.CUDAIsDevice was set to true?

Changed to use implicit 'used' attribute.

Ping.

LGTM

Closed by commit rL244501: [CUDA] Add implicit __attribute__((used)) to all __global__ functions. (authored by tra). · Explain WhyAug 10 2015, 1:57 PM

This revision was automatically updated to reflect the committed changes.

tra mentioned this in D11993: [CUDA] Make sure we emit all templated __global__ functions on device side. Again..Aug 12 2015, 2:30 PM

tra mentioned this in D13067: [CUDA] __global__ functions should always be visible externally..Sep 22 2015, 1:42 PM

Revision Contents

Path

Size

cfe/

trunk/

lib/

Sema/

SemaDeclAttr.cpp

4 lines

test/

CodeGenCUDA/

ptx-kernels.cu

15 lines

Diff 31718

cfe/trunk/lib/Sema/SemaDeclAttr.cpp

Show First 20 Lines • Show All 3,344 Lines • ▼ Show 20 Lines	S.Diag(FD->getTypeSpecStartLoc(), diag::err_kern_type_not_void_return)
<< (RTRange.isValid() ? FixItHint::CreateReplacement(RTRange, "void")		<< (RTRange.isValid() ? FixItHint::CreateReplacement(RTRange, "void")
: FixItHint());		: FixItHint());
return;		return;
}		}

D->addAttr(::new (S.Context)		D->addAttr(::new (S.Context)
CUDAGlobalAttr(Attr.getRange(), S.Context,		CUDAGlobalAttr(Attr.getRange(), S.Context,
Attr.getAttributeSpellingListIndex()));		Attr.getAttributeSpellingListIndex()));

		// Add implicit attribute((used)) so we don't eliminate kernels
		// because there is nothing referencing them on device side.
		D->addAttr(UsedAttr::CreateImplicit(S.Context));
}		}

static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {		static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {
FunctionDecl *Fn = cast<FunctionDecl>(D);		FunctionDecl *Fn = cast<FunctionDecl>(D);
if (!Fn->isInlineSpecified()) {		if (!Fn->isInlineSpecified()) {
S.Diag(Attr.getLoc(), diag::warn_gnu_inline_attribute_requires_inline);		S.Diag(Attr.getLoc(), diag::warn_gnu_inline_attribute_requires_inline);
return;		return;
}		}
▲ Show 20 Lines • Show All 2,165 Lines • Show Last 20 Lines

cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu

				// Make sure that __global__ functions are emitted along with correct
				// annotations and are added to @llvm.used to prevent their elimination.
				// REQUIRES: nvptx-registered-target
				//
	// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - \| FileCheck %s			// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - \| FileCheck %s

	#include "Inputs/cuda.h"			#include "Inputs/cuda.h"

				// Make sure that all __global__ functiona are added to @llvm.used
				// CHECK: @llvm.used = appending global
				// CHECK-SAME: @global_function
				// CHECK-SAME: @_Z16templated_kernelIiEvT_

	// CHECK-LABEL: define void @device_function			// CHECK-LABEL: define void @device_function
	extern "C"			extern "C"
	__device__ void device_function() {}			__device__ void device_function() {}

	// CHECK-LABEL: define void @global_function			// CHECK-LABEL: define void @global_function
	extern "C"			extern "C"
	__global__ void global_function() {			__global__ void global_function() {
	// CHECK: call void @device_function			// CHECK: call void @device_function
	device_function();			device_function();
	}			}

				// Make sure host-instantiated kernels are preserved on device side.
				template <typename T> __global__ void templated_kernel(T param) {}
				// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
				void host_function() { templated_kernel<<<0,0>>>(0); }

	// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}			// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
				// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}