This is an archive of the discontinued LLVM Phabricator instance.

This's revised change from https://reviews.llvm.org/D76365 after fixing Sema checks on the template partial specialization. With this change, I could compile the following sample code using surface reference.

kernel.cu

#include <cuda.h>

surface<void, cudaSurfaceType2D> surf;

#if defined(__clang__)
__device__ int
suld_2d_trap(surface<void, cudaSurfaceType2D>, int, int) asm("llvm.nvvm.suld.2d.i32.trap");

template <typename T>
static inline __device__ T
surf2Dread(surface<void, cudaSurfaceType2D> s, int x, int y) {
  // By default, `surf2Dread` uses trap mode.
  return suld_2d_trap(s, x, y);
}
#endif

__device__ int foo(int x, int y) { return surf2Dread<int>(surf, x, y); }

With NVCC, it generates

kernel.ptx after nvcc --ptx -rdc=true kernel.cu

//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-27506705
// Cuda compilation tools, release 10.2, V10.2.89
// Based on LLVM 3.4svn
//

.version 6.5
.target sm_30
.address_size 64

        // .globl       _Z3fooii
.visible .global .surfref surf;

.visible .func  (.param .b32 func_retval0) _Z3fooii(
        .param .b32 _Z3fooii_param_0,
        .param .b32 _Z3fooii_param_1
)
{
        .reg .b32       %r<4>;
        .reg .b64       %rd<2>;


        ld.param.u32    %r1, [_Z3fooii_param_0];
        ld.param.u32    %r2, [_Z3fooii_param_1];
        suld.b.2d.b32.trap {%r3}, [surf, {%r1, %r2}];
        st.param.b32    [func_retval0+0], %r3;
        ret;
}

With Clang, it generates

kernel-cuda-nvptx64-nvidia-cuda-sm_30.s after clang --cuda-device-only --cuda-gpu-arch=sm_30 -O2 -S kernel.cu

//
// Generated by LLVM NVPTX Back-End
//

.version 6.4
.target sm_30
.address_size 64

        // .globl       _Z3fooii
.visible .global .surfref surf;

.visible .func  (.param .b32 func_retval0) _Z3fooii(
        .param .b32 _Z3fooii_param_0,
        .param .b32 _Z3fooii_param_1
)
{
        .reg .b32       %r<4>;
        .reg .b64       %rd<2>;

        ld.param.u32    %r1, [_Z3fooii_param_0];
        ld.param.u32    %r2, [_Z3fooii_param_1];
        mov.u64         %rd1, surf;
        suld.b.2d.b32.trap {%r3}, [%rd1, {%r1, %r2}];
        st.param.b32    [func_retval0+0], %r3;
        ret;

}

Would it be possible to update the old review with the new diff? It would make it easier to see the incremental changes you've made. If the old review can be reopened that would be great as it would keep all relevant info in one place, but I'm fine doing the review here, too, if phabricator does not let you do it.

clang/test/SemaCUDA/bad-attributes.cu
74–75	Please add few test cases replicating use of these attributes in CUDA headers.

In D76948#1946861, @tra wrote:

Would it be possible to update the old review with the new diff? It would make it easier to see the incremental changes you've made. If the old review can be reopened that would be great as it would keep all relevant info in one place, but I'm fine doing the review here, too, if phabricator does not let you do it.

I tried that before submitting this one. But, as it's in the closed state, I cannot submit that anymore. I will attach the difference against the previous change somewhere.

In D76948#1946861, @tra wrote:

Would it be possible to update the old review with the new diff? It would make it easier to see the incremental changes you've made. If the old review can be reopened that would be great as it would keep all relevant info in one place, but I'm fine doing the review here, too, if phabricator does not let you do it.

Check this for the new change.

https://gist.github.com/darkbuck/836dbb3112ca2e5fab769cf3cdaecd09

hliao marked an inline comment as done.Mar 27 2020, 1:45 PM

hliao added inline comments.

clang/test/SemaCUDA/bad-attributes.cu
74–75	the replication from CUDA headers is added on those codegen tests. These tests are illegal ones which sema checks should identify.

In D76948#1946878, @hliao wrote:

I tried that before submitting this one. But, as it's in the closed state, I cannot submit that anymore. I will attach the difference against the previous change somewhere.

I've reopened it. Let's move the patch and discussion there.

hliao abandoned this revision.Mar 27 2020, 1:55 PM

Harbormaster failed remote builds in B50725: Diff 253201!Mar 27 2020, 2:17 PM

Revision Contents

Path

Size

clang/

include/

clang/

AST/

Type.h

5 lines

Basic/

Attr.td

8 lines

AttrDocs.td

22 lines

DiagnosticSemaKinds.td

16 lines

lib/

AST/

Type.cpp

14 lines

CodeGen/

82 lines

20 lines

13 lines

72 lines

14 lines

26 lines

91 lines

Headers/

__clang_cuda_runtime_wrapper.h

4 lines

Sema/

SemaDeclAttr.cpp

10 lines

SemaDeclCXX.cpp

124 lines

test/

CodeGenCUDA/

surface.cu

42 lines

texture.cu

55 lines

Misc/

pragma-attribute-supported-attributes-list.test

2 lines

SemaCUDA/

attr-declspec.cu

15 lines

attributes-on-non-cuda.cu

15 lines

bad-attributes.cu

24 lines

llvm/

include/

llvm/

IR/

Operator.h

19 lines

Diff 253201

clang/include/clang/AST/Type.h

Show First 20 Lines • Show All 2,105 Lines • ▼ Show 20 Lines	#include "clang/Basic/OpenCLExtensionTypes.def"
bool isPipeType() const; // OpenCL pipe type		bool isPipeType() const; // OpenCL pipe type
bool isOpenCLSpecificType() const; // Any OpenCL specific type		bool isOpenCLSpecificType() const; // Any OpenCL specific type

/// Determines if this type, which must satisfy		/// Determines if this type, which must satisfy
/// isObjCLifetimeType(), is implicitly __unsafe_unretained rather		/// isObjCLifetimeType(), is implicitly __unsafe_unretained rather
/// than implicitly __strong.		/// than implicitly __strong.
bool isObjCARCImplicitlyUnretainedType() const;		bool isObjCARCImplicitlyUnretainedType() const;

		/// Check if the type is the CUDA device builtin surface type.
		bool isCUDADeviceBuiltinSurfaceType() const;
		/// Check if the type is the CUDA device builtin texture type.
		bool isCUDADeviceBuiltinTextureType() const;

/// Return the implicit lifetime for this type, which must not be dependent.		/// Return the implicit lifetime for this type, which must not be dependent.
Qualifiers::ObjCLifetime getObjCARCImplicitLifetime() const;		Qualifiers::ObjCLifetime getObjCARCImplicitLifetime() const;

enum ScalarTypeKind {		enum ScalarTypeKind {
STK_CPointer,		STK_CPointer,
STK_BlockPointer,		STK_BlockPointer,
STK_ObjCObjectPointer,		STK_ObjCObjectPointer,
STK_MemberPointer,		STK_MemberPointer,
▲ Show 20 Lines • Show All 4,906 Lines • Show Last 20 Lines

clang/include/clang/Basic/Attr.td

Show First 20 Lines • Show All 1,058 Lines • ▼ Show 20 Lines	def HIPPinnedShadow : InheritableAttr {
let Documentation = [HIPPinnedShadowDocs];		let Documentation = [HIPPinnedShadowDocs];
}		}

def CUDADeviceBuiltin : IgnoredAttr {		def CUDADeviceBuiltin : IgnoredAttr {
let Spellings = [GNU<"device_builtin">, Declspec<"__device_builtin__">];		let Spellings = [GNU<"device_builtin">, Declspec<"__device_builtin__">];
let LangOpts = [CUDA];		let LangOpts = [CUDA];
}		}

def CUDADeviceBuiltinSurfaceType : IgnoredAttr {		def CUDADeviceBuiltinSurfaceType : InheritableAttr {
let Spellings = [GNU<"device_builtin_surface_type">,		let Spellings = [GNU<"device_builtin_surface_type">,
Declspec<"__device_builtin_surface_type__">];		Declspec<"__device_builtin_surface_type__">];
let LangOpts = [CUDA];		let LangOpts = [CUDA];
		let Subjects = SubjectList<[CXXRecord]>;
		let Documentation = [CUDADeviceBuiltinSurfaceTypeDocs];
}		}

def CUDADeviceBuiltinTextureType : IgnoredAttr {		def CUDADeviceBuiltinTextureType : InheritableAttr {
let Spellings = [GNU<"device_builtin_texture_type">,		let Spellings = [GNU<"device_builtin_texture_type">,
Declspec<"__device_builtin_texture_type__">];		Declspec<"__device_builtin_texture_type__">];
let LangOpts = [CUDA];		let LangOpts = [CUDA];
		let Subjects = SubjectList<[CXXRecord]>;
		let Documentation = [CUDADeviceBuiltinTextureTypeDocs];
}		}

def CUDAGlobal : InheritableAttr {		def CUDAGlobal : InheritableAttr {
let Spellings = [GNU<"global">, Declspec<"__global__">];		let Spellings = [GNU<"global">, Declspec<"__global__">];
let Subjects = SubjectList<[Function]>;		let Subjects = SubjectList<[Function]>;
let LangOpts = [CUDA];		let LangOpts = [CUDA];
let Documentation = [Undocumented];		let Documentation = [Undocumented];
}		}
▲ Show 20 Lines • Show All 2,345 Lines • Show Last 20 Lines

clang/include/clang/Basic/AttrDocs.td

	Show First 20 Lines • Show All 4,618 Lines • ▼ Show 20 Lines
	__declspec(hip_pinned_shadow) can be added to the definition of a global variable			__declspec(hip_pinned_shadow) can be added to the definition of a global variable
	to indicate it is a HIP pinned shadow variable. A HIP pinned shadow variable can			to indicate it is a HIP pinned shadow variable. A HIP pinned shadow variable can
	be accessed on both device side and host side. It has external linkage and is			be accessed on both device side and host side. It has external linkage and is
	not initialized on device side. It has internal linkage and is initialized by			not initialized on device side. It has internal linkage and is initialized by
	the initializer on host side.			the initializer on host side.
	}];			}];
	}			}

				def CUDADeviceBuiltinSurfaceTypeDocs : Documentation {
				let Category = DocCatType;
				let Content = [{
				The ``device_builtin_surface_type`` attribute can be applied to a class
				template when declaring the surface reference. A surface reference variable
				could be accessed on the host side and, on the device side, might be translated
				into an internal surface object, which is established through surface bind and
				unbind runtime APIs.
				}];
				}

				def CUDADeviceBuiltinTextureTypeDocs : Documentation {
				let Category = DocCatType;
				let Content = [{
				The ``device_builtin_texture_type`` attribute can be applied to a class
				template when declaring the texture reference. A texture reference variable
				could be accessed on the host side and, on the device side, might be translated
				into an internal texture object, which is established through texture bind and
				unbind runtime APIs.
				}];
				}

	def LifetimeOwnerDocs : Documentation {			def LifetimeOwnerDocs : Documentation {
	let Category = DocCatDecl;			let Category = DocCatDecl;
	let Content = [{			let Content = [{
	.. Note:: This attribute is experimental and its effect on analysis is subject to change in			.. Note:: This attribute is experimental and its effect on analysis is subject to change in
	a future version of clang.			a future version of clang.

	The attribute ``[[gsl::Owner(T)]]`` applies to structs and classes that own an			The attribute ``[[gsl::Owner(T)]]`` applies to structs and classes that own an
	object of type ``T``:			object of type ``T``:
	▲ Show 20 Lines • Show All 246 Lines • Show Last 20 Lines

clang/include/clang/Basic/DiagnosticSemaKinds.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,961 Lines • ▼ Show 20 Lines	def err_cuda_host_shared : Error<
"%select{__device__\|__global__\|__host__\|__host__ __device__}0 functions">;		"%select{__device__\|__global__\|__host__\|__host__ __device__}0 functions">;
def err_cuda_nonglobal_constant : Error<"__constant__ variables must be global">;		def err_cuda_nonglobal_constant : Error<"__constant__ variables must be global">;
def err_cuda_ovl_target : Error<		def err_cuda_ovl_target : Error<
"%select{__device__\|__global__\|__host__\|__host__ __device__}0 function %1 "		"%select{__device__\|__global__\|__host__\|__host__ __device__}0 function %1 "
"cannot overload %select{__device__\|__global__\|__host__\|__host__ __device__}2 function %3">;		"cannot overload %select{__device__\|__global__\|__host__\|__host__ __device__}2 function %3">;
def note_cuda_ovl_candidate_target_mismatch : Note<		def note_cuda_ovl_candidate_target_mismatch : Note<
"candidate template ignored: target attributes do not match">;		"candidate template ignored: target attributes do not match">;

		def err_cuda_device_builtin_surftex_cls_template : Error<
		"illegal device builtin %select{surface\|texture}0 reference "
		"class template %1 declared here">;
		def note_cuda_device_builtin_surftex_cls_should_have_n_args : Note<
		"%0 needs to have exactly %1 template parameters">;
		def note_cuda_device_builtin_surftex_cls_should_have_match_arg : Note<
		"the %select{1st\|2nd\|3rd}1 template parameter of %0 needs to be "
		"%select{a type\|an integer or enum value}2">;

		def err_cuda_device_builtin_surftex_ref_decl : Error<
		"illegal device builtin %select{surface\|texture}0 reference "
		"type %1 declared here">;
		def note_cuda_device_builtin_surftex_should_be_template_class : Note<
		"%0 needs to be instantiated from a class template with proper "
		"template arguments">;

def warn_non_pod_vararg_with_format_string : Warning<		def warn_non_pod_vararg_with_format_string : Warning<
"cannot pass %select{non-POD\|non-trivial}0 object of type %1 to variadic "		"cannot pass %select{non-POD\|non-trivial}0 object of type %1 to variadic "
"%select{function\|block\|method\|constructor}2; expected type from format "		"%select{function\|block\|method\|constructor}2; expected type from format "
"string was %3">, InGroup<NonPODVarargs>, DefaultError;		"string was %3">, InGroup<NonPODVarargs>, DefaultError;
// The arguments to this diagnostic should match the warning above.		// The arguments to this diagnostic should match the warning above.
def err_cannot_pass_objc_interface_to_vararg_format : Error<		def err_cannot_pass_objc_interface_to_vararg_format : Error<
"cannot pass object with interface type %1 by value to variadic "		"cannot pass object with interface type %1 by value to variadic "
"%select{function\|block\|method\|constructor}2; expected type from format "		"%select{function\|block\|method\|constructor}2; expected type from format "
▲ Show 20 Lines • Show All 2,697 Lines • Show Last 20 Lines

clang/lib/AST/Type.cpp

Show First 20 Lines • Show All 4,087 Lines • ▼ Show 20 Lines	bool Type::isCARCBridgableType() const {
const auto *Pointer = getAs<PointerType>();		const auto *Pointer = getAs<PointerType>();
if (!Pointer)		if (!Pointer)
return false;		return false;

QualType Pointee = Pointer->getPointeeType();		QualType Pointee = Pointer->getPointeeType();
return Pointee->isVoidType() \|\| Pointee->isRecordType();		return Pointee->isVoidType() \|\| Pointee->isRecordType();
}		}

		/// Check if the specified type is the CUDA device builtin surface type.
		bool Type::isCUDADeviceBuiltinSurfaceType() const {
		if (const auto *RT = getAs<RecordType>())
		return RT->getDecl()->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>();
		return false;
		}

		/// Check if the specified type is the CUDA device builtin texture type.
		bool Type::isCUDADeviceBuiltinTextureType() const {
		if (const auto *RT = getAs<RecordType>())
		return RT->getDecl()->hasAttr<CUDADeviceBuiltinTextureTypeAttr>();
		return false;
		}

bool Type::hasSizedVLAType() const {		bool Type::hasSizedVLAType() const {
if (!isVariablyModifiedType()) return false;		if (!isVariablyModifiedType()) return false;

if (const auto *ptr = getAs<PointerType>())		if (const auto *ptr = getAs<PointerType>())
return ptr->getPointeeType()->hasSizedVLAType();		return ptr->getPointeeType()->hasSizedVLAType();
if (const auto *ref = getAs<ReferenceType>())		if (const auto *ref = getAs<ReferenceType>())
return ref->getPointeeType()->hasSizedVLAType();		return ref->getPointeeType()->hasSizedVLAType();
if (const ArrayType *arr = getAsArrayTypeUnsafe()) {		if (const ArrayType *arr = getAsArrayTypeUnsafe()) {
▲ Show 20 Lines • Show All 83 Lines • Show Last 20 Lines

clang/lib/CodeGen/CGCUDANV.cpp

Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	private:
struct KernelInfo {		struct KernelInfo {
llvm::Function *Kernel;		llvm::Function *Kernel;
const Decl *D;		const Decl *D;
};		};
llvm::SmallVector<KernelInfo, 16> EmittedKernels;		llvm::SmallVector<KernelInfo, 16> EmittedKernels;
struct VarInfo {		struct VarInfo {
llvm::GlobalVariable *Var;		llvm::GlobalVariable *Var;
const VarDecl *D;		const VarDecl *D;
unsigned Flag;		DeviceVarFlags Flags;
};		};
llvm::SmallVector<VarInfo, 16> DeviceVars;		llvm::SmallVector<VarInfo, 16> DeviceVars;
/// Keeps track of variable containing handle of GPU binary. Populated by		/// Keeps track of variable containing handle of GPU binary. Populated by
/// ModuleCtorFunction() and used to create corresponding cleanup calls in		/// ModuleCtorFunction() and used to create corresponding cleanup calls in
/// ModuleDtorFunction()		/// ModuleDtorFunction()
llvm::GlobalVariable *GpuBinaryHandle = nullptr;		llvm::GlobalVariable *GpuBinaryHandle = nullptr;
/// Whether we generate relocatable device code.		/// Whether we generate relocatable device code.
bool RelocatableDeviceCode;		bool RelocatableDeviceCode;
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines	private:
void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);		void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
std::string getDeviceSideName(const NamedDecl *ND) override;		std::string getDeviceSideName(const NamedDecl *ND) override;

public:		public:
CGNVCUDARuntime(CodeGenModule &CGM);		CGNVCUDARuntime(CodeGenModule &CGM);

void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;		void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,		void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
unsigned Flags) override {		bool Extern, bool Constant) override {
DeviceVars.push_back({&Var, VD, Flags});		DeviceVars.push_back({&Var,
		VD,
		{DeviceVarFlags::Variable, Extern, Constant,
		/Normalized/ false, /Type/ 0}});
		}
		void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
		bool Extern, int Type) override {
		DeviceVars.push_back({&Var,
		VD,
		{DeviceVarFlags::Surface, Extern, /Constant/ false,
		/Normalized/ false, Type}});
		}
		void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
		bool Extern, int Type, bool Normalized) override {
		DeviceVars.push_back({&Var,
		VD,
		{DeviceVarFlags::Texture, Extern, /Constant/ false,
		Normalized, Type}});
}		}

/// Creates module constructor function		/// Creates module constructor function
llvm::Function *makeModuleCtorFunction() override;		llvm::Function *makeModuleCtorFunction() override;
/// Creates module destructor function		/// Creates module destructor function
llvm::Function *makeModuleDtorFunction() override;		llvm::Function *makeModuleDtorFunction() override;
};		};

▲ Show 20 Lines • Show All 289 Lines • ▼ Show 20 Lines	llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
// void __cudaRegisterVar(void *, char , char , const char ,		// void __cudaRegisterVar(void *, char , char , const char ,
// int, int, int, int)		// int, int, int, int)
llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,		llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
CharPtrTy, IntTy, IntTy,		CharPtrTy, IntTy, IntTy,
IntTy, IntTy};		IntTy, IntTy};
llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(		llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
llvm::FunctionType::get(IntTy, RegisterVarParams, false),		llvm::FunctionType::get(IntTy, RegisterVarParams, false),
addUnderscoredPrefixToName("RegisterVar"));		addUnderscoredPrefixToName("RegisterVar"));
		// void __cudaRegisterSurface(void *, const struct surfaceReference ,
		// const void *, const char , int, int);
		llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
		llvm::FunctionType::get(
		VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
		false),
		addUnderscoredPrefixToName("RegisterSurface"));
		// void __cudaRegisterTexture(void *, const struct textureReference ,
		// const void *, const char , int, int, int)
		llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
		llvm::FunctionType::get(
		VoidTy,
		{VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
		false),
		addUnderscoredPrefixToName("RegisterTexture"));
for (auto &&Info : DeviceVars) {		for (auto &&Info : DeviceVars) {
llvm::GlobalVariable *Var = Info.Var;		llvm::GlobalVariable *Var = Info.Var;
unsigned Flags = Info.Flag;
llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));		llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
		switch (Info.Flags.Kind) {
		case DeviceVarFlags::Variable: {
uint64_t VarSize =		uint64_t VarSize =
CGM.getDataLayout().getTypeAllocSize(Var->getValueType());		CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
llvm::Value *Args[] = {		llvm::Value *Args[] = {&GpuBinaryHandlePtr,
&GpuBinaryHandlePtr,
Builder.CreateBitCast(Var, VoidPtrTy),		Builder.CreateBitCast(Var, VoidPtrTy),
VarName,		VarName,
VarName,		VarName,
llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0),		llvm::ConstantInt::get(IntTy, Info.Flags.Extern),
llvm::ConstantInt::get(IntTy, VarSize),		llvm::ConstantInt::get(IntTy, VarSize),
llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0),		llvm::ConstantInt::get(IntTy, Info.Flags.Constant),
llvm::ConstantInt::get(IntTy, 0)};		llvm::ConstantInt::get(IntTy, 0)};
Builder.CreateCall(RegisterVar, Args);		Builder.CreateCall(RegisterVar, Args);
		break;
		}
		case DeviceVarFlags::Surface:
		Builder.CreateCall(
		RegisterSurf,
		{&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
		VarName, llvm::ConstantInt::get(IntTy, Info.Flags.SurfTexType),
		llvm::ConstantInt::get(IntTy, Info.Flags.Extern)});
		break;
		case DeviceVarFlags::Texture:
		Builder.CreateCall(
		RegisterTex,
		{&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
		VarName, llvm::ConstantInt::get(IntTy, Info.Flags.SurfTexType),
		llvm::ConstantInt::get(IntTy, Info.Flags.Normalized),
		llvm::ConstantInt::get(IntTy, Info.Flags.Extern)});
		break;
		}
}		}

Builder.CreateRetVoid();		Builder.CreateRetVoid();
return RegisterKernelsFunc;		return RegisterKernelsFunc;
}		}

/// Creates a global constructor function for the module:		/// Creates a global constructor function for the module:
///		///
▲ Show 20 Lines • Show All 334 Lines • Show Last 20 Lines

clang/lib/CodeGen/CGCUDARuntime.h

	Show All 36 Lines
	class RValue;			class RValue;

	class CGCUDARuntime {			class CGCUDARuntime {
	protected:			protected:
	CodeGenModule &CGM;			CodeGenModule &CGM;

	public:			public:
	// Global variable properties that must be passed to CUDA runtime.			// Global variable properties that must be passed to CUDA runtime.
	enum DeviceVarFlags {			struct DeviceVarFlags {
	ExternDeviceVar = 0x01, // extern			enum DeviceVarKind : unsigned {
	ConstantDeviceVar = 0x02, // __constant__			Variable, // Variable
				Surface, // Builtin surface
				Texture, // Builtin texture
				};
				DeviceVarKind Kind : 2;
				unsigned Extern : 1;
				unsigned Constant : 1; // Constant variable.
				unsigned Normalized : 1; // Normalized texture.
				int SurfTexType; // Type of surface/texutre.
	};			};

	CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {}			CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {}
	virtual ~CGCUDARuntime();			virtual ~CGCUDARuntime();

	virtual RValue EmitCUDAKernelCallExpr(CodeGenFunction &CGF,			virtual RValue EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
	const CUDAKernelCallExpr *E,			const CUDAKernelCallExpr *E,
	ReturnValueSlot ReturnValue);			ReturnValueSlot ReturnValue);

	/// Emits a kernel launch stub.			/// Emits a kernel launch stub.
	virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;			virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
	virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,			virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
	unsigned Flags) = 0;			bool Extern, bool Constant) = 0;
				virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
				bool Extern, int Type) = 0;
				virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
				bool Extern, int Type, bool Normalized) = 0;

	/// Constructs and returns a module initialization function or nullptr if it's			/// Constructs and returns a module initialization function or nullptr if it's
	/// not needed. Must be called after all kernels have been emitted.			/// not needed. Must be called after all kernels have been emitted.
	virtual llvm::Function *makeModuleCtorFunction() = 0;			virtual llvm::Function *makeModuleCtorFunction() = 0;

	/// Returns a module cleanup function or nullptr if it's not needed.			/// Returns a module cleanup function or nullptr if it's not needed.
	/// Must be called after ModuleCtorFunction			/// Must be called after ModuleCtorFunction
	virtual llvm::Function *makeModuleDtorFunction() = 0;			virtual llvm::Function *makeModuleDtorFunction() = 0;
	Show All 13 Lines

clang/lib/CodeGen/CGExprAgg.cpp

Show All 9 Lines
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "CGCXXABI.h"		#include "CGCXXABI.h"
#include "CGObjCRuntime.h"		#include "CGObjCRuntime.h"
#include "CodeGenFunction.h"		#include "CodeGenFunction.h"
#include "CodeGenModule.h"		#include "CodeGenModule.h"
#include "ConstantEmitter.h"		#include "ConstantEmitter.h"
		#include "TargetInfo.h"
#include "clang/AST/ASTContext.h"		#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"		#include "clang/AST/Attr.h"
#include "clang/AST/DeclCXX.h"		#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclTemplate.h"		#include "clang/AST/DeclTemplate.h"
#include "clang/AST/StmtVisitor.h"		#include "clang/AST/StmtVisitor.h"
#include "llvm/IR/Constants.h"		#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"		#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"		#include "llvm/IR/GlobalVariable.h"
▲ Show 20 Lines • Show All 1,915 Lines • ▼ Show 20 Lines	if (const RecordType *RT = Ty->getAs<RecordType>()) {
"Trying to aggregate-copy a type without a trivial copy/move "		"Trying to aggregate-copy a type without a trivial copy/move "
"constructor or assignment operator");		"constructor or assignment operator");
// Ignore empty classes in C++.		// Ignore empty classes in C++.
if (Record->isEmpty())		if (Record->isEmpty())
return;		return;
}		}
}		}

		if (getLangOpts().CUDAIsDevice) {
		if (Ty->isCUDADeviceBuiltinSurfaceType()) {
		if (getTargetHooks().emitCUDADeviceBuiltinSurfaceDeviceCopy(*this, Dest,
		Src))
		return;
		} else if (Ty->isCUDADeviceBuiltinTextureType()) {
		if (getTargetHooks().emitCUDADeviceBuiltinTextureDeviceCopy(*this, Dest,
		Src))
		return;
		}
		}

// Aggregate assignment turns into llvm.memcpy. This is almost valid per		// Aggregate assignment turns into llvm.memcpy. This is almost valid per
// C99 6.5.16.1p3, which states "If the value being stored in an object is		// C99 6.5.16.1p3, which states "If the value being stored in an object is
// read from another object that overlaps in anyway the storage of the first		// read from another object that overlaps in anyway the storage of the first
// object, then the overlap shall be exact and the two objects shall have		// object, then the overlap shall be exact and the two objects shall have
// qualified or unqualified versions of a compatible type."		// qualified or unqualified versions of a compatible type."
//		//
// memcpy is not defined if the source and destination pointers are exactly		// memcpy is not defined if the source and destination pointers are exactly
// equal, but other compilers do this optimization, and almost every memcpy		// equal, but other compilers do this optimization, and almost every memcpy
▲ Show 20 Lines • Show All 81 Lines • Show Last 20 Lines

clang/lib/CodeGen/CodeGenModule.cpp

Show First 20 Lines • Show All 707 Lines • ▼ Show 20 Lines	llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) {
if (!TBAA)		if (!TBAA)
return nullptr;		return nullptr;
return TBAA->getTypeInfo(QTy);		return TBAA->getTypeInfo(QTy);
}		}

TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) {		TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) {
if (!TBAA)		if (!TBAA)
return TBAAAccessInfo();		return TBAAAccessInfo();
		if (getLangOpts().CUDAIsDevice) {
		// As CUDA builtin surface/texture types are replaced, skip generating TBAA
		// access info.
		if (AccessType->isCUDADeviceBuiltinSurfaceType()) {
		if (getTargetCodeGenInfo().getCUDADeviceBuiltinSurfaceDeviceType() !=
		nullptr)
		return TBAAAccessInfo();
		} else if (AccessType->isCUDADeviceBuiltinTextureType()) {
		if (getTargetCodeGenInfo().getCUDADeviceBuiltinTextureDeviceType() !=
		nullptr)
		return TBAAAccessInfo();
		}
		}
return TBAA->getAccessInfo(AccessType);		return TBAA->getAccessInfo(AccessType);
}		}

TBAAAccessInfo		TBAAAccessInfo
CodeGenModule::getTBAAVTablePtrAccessInfo(llvm::Type *VTablePtrType) {		CodeGenModule::getTBAAVTablePtrAccessInfo(llvm::Type *VTablePtrType) {
if (!TBAA)		if (!TBAA)
return TBAAAccessInfo();		return TBAAAccessInfo();
return TBAA->getVTablePtrAccessInfo(VTablePtrType);		return TBAA->getVTablePtrAccessInfo(VTablePtrType);
▲ Show 20 Lines • Show All 1,778 Lines • ▼ Show 20 Lines	void CodeGenModule::EmitGlobal(GlobalDecl GD) {

// If this is CUDA, be selective about which declarations we emit.		// If this is CUDA, be selective about which declarations we emit.
if (LangOpts.CUDA) {		if (LangOpts.CUDA) {
if (LangOpts.CUDAIsDevice) {		if (LangOpts.CUDAIsDevice) {
if (!Global->hasAttr<CUDADeviceAttr>() &&		if (!Global->hasAttr<CUDADeviceAttr>() &&
!Global->hasAttr<CUDAGlobalAttr>() &&		!Global->hasAttr<CUDAGlobalAttr>() &&
!Global->hasAttr<CUDAConstantAttr>() &&		!Global->hasAttr<CUDAConstantAttr>() &&
!Global->hasAttr<CUDASharedAttr>() &&		!Global->hasAttr<CUDASharedAttr>() &&
!(LangOpts.HIP && Global->hasAttr<HIPPinnedShadowAttr>()))		!(LangOpts.HIP && Global->hasAttr<HIPPinnedShadowAttr>()) &&
		!Global->getType()->isCUDADeviceBuiltinSurfaceType() &&
		!Global->getType()->isCUDADeviceBuiltinTextureType())
return;		return;
} else {		} else {
// We need to emit host-side 'shadows' for all global		// We need to emit host-side 'shadows' for all global
// device-side variables because the CUDA runtime needs their		// device-side variables because the CUDA runtime needs their
// size and host-side address in order to provide access to		// size and host-side address in order to provide access to
// their device-side incarnations.		// their device-side incarnations.

// So device-only functions are the only things we skip.		// So device-only functions are the only things we skip.
▲ Show 20 Lines • Show All 1,383 Lines • ▼ Show 20 Lines	void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D,
bool IsCUDASharedVar =		bool IsCUDASharedVar =
getLangOpts().CUDAIsDevice && D->hasAttr<CUDASharedAttr>();		getLangOpts().CUDAIsDevice && D->hasAttr<CUDASharedAttr>();
// Shadows of initialized device-side global variables are also left		// Shadows of initialized device-side global variables are also left
// undefined.		// undefined.
bool IsCUDAShadowVar =		bool IsCUDAShadowVar =
!getLangOpts().CUDAIsDevice &&		!getLangOpts().CUDAIsDevice &&
(D->hasAttr<CUDAConstantAttr>() \|\| D->hasAttr<CUDADeviceAttr>() \|\|		(D->hasAttr<CUDAConstantAttr>() \|\| D->hasAttr<CUDADeviceAttr>() \|\|
D->hasAttr<CUDASharedAttr>());		D->hasAttr<CUDASharedAttr>());
		bool IsCUDADeviceShadowVar =
		getLangOpts().CUDAIsDevice &&
		(D->getType()->isCUDADeviceBuiltinSurfaceType() \|\|
		D->getType()->isCUDADeviceBuiltinTextureType());
// HIP pinned shadow of initialized host-side global variables are also		// HIP pinned shadow of initialized host-side global variables are also
// left undefined.		// left undefined.
bool IsHIPPinnedShadowVar =		bool IsHIPPinnedShadowVar =
getLangOpts().CUDAIsDevice && D->hasAttr<HIPPinnedShadowAttr>();		getLangOpts().CUDAIsDevice && D->hasAttr<HIPPinnedShadowAttr>();
if (getLangOpts().CUDA &&		if (getLangOpts().CUDA && (IsCUDASharedVar \|\| IsCUDAShadowVar \|\|
(IsCUDASharedVar \|\| IsCUDAShadowVar \|\| IsHIPPinnedShadowVar))		IsCUDADeviceShadowVar \|\| IsHIPPinnedShadowVar))
Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));		Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
else if (D->hasAttr<LoaderUninitializedAttr>())		else if (D->hasAttr<LoaderUninitializedAttr>())
Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));		Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
else if (!InitExpr) {		else if (!InitExpr) {
// This is a tentative definition; tentative definitions are		// This is a tentative definition; tentative definitions are
// implicitly initialized with { 0 }.		// implicitly initialized with { 0 }.
//		//
// Note that tentative definitions are only emitted at the end of		// Note that tentative definitions are only emitted at the end of
▲ Show 20 Lines • Show All 94 Lines • ▼ Show 20 Lines	if (GV && LangOpts.CUDA) {
} else {		} else {
// Host-side shadows of external declarations of device-side		// Host-side shadows of external declarations of device-side
// global variables become internal definitions. These have to		// global variables become internal definitions. These have to
// be internal in order to prevent name conflicts with global		// be internal in order to prevent name conflicts with global
// host variables with the same name in a different TUs.		// host variables with the same name in a different TUs.
if (D->hasAttr<CUDADeviceAttr>() \|\| D->hasAttr<CUDAConstantAttr>() \|\|		if (D->hasAttr<CUDADeviceAttr>() \|\| D->hasAttr<CUDAConstantAttr>() \|\|
D->hasAttr<HIPPinnedShadowAttr>()) {		D->hasAttr<HIPPinnedShadowAttr>()) {
Linkage = llvm::GlobalValue::InternalLinkage;		Linkage = llvm::GlobalValue::InternalLinkage;
		// Shadow variables and their properties must be registered with CUDA
// Shadow variables and their properties must be registered		// runtime. Skip Extern global variables, which will be registered in
// with CUDA runtime.		// the TU where they are defined.
unsigned Flags = 0;
if (!D->hasDefinition())
Flags \|= CGCUDARuntime::ExternDeviceVar;
if (D->hasAttr<CUDAConstantAttr>())
Flags \|= CGCUDARuntime::ConstantDeviceVar;
// Extern global variables will be registered in the TU where they are
// defined.
if (!D->hasExternalStorage())		if (!D->hasExternalStorage())
getCUDARuntime().registerDeviceVar(D, *GV, Flags);		getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(),
} else if (D->hasAttr<CUDASharedAttr>())		D->hasAttr<CUDAConstantAttr>());
		} else if (D->hasAttr<CUDASharedAttr>()) {
// __shared__ variables are odd. Shadows do get created, but		// __shared__ variables are odd. Shadows do get created, but
// they are not registered with the CUDA runtime, so they		// they are not registered with the CUDA runtime, so they
// can't really be used to access their device-side		// can't really be used to access their device-side
// counterparts. It's not clear yet whether it's nvcc's bug or		// counterparts. It's not clear yet whether it's nvcc's bug or
// a feature, but we've got to do the same for compatibility.		// a feature, but we've got to do the same for compatibility.
Linkage = llvm::GlobalValue::InternalLinkage;		Linkage = llvm::GlobalValue::InternalLinkage;
		} else if (D->getType()->isCUDADeviceBuiltinSurfaceType() \|\|
		D->getType()->isCUDADeviceBuiltinTextureType()) {
		// Builtin surfaces and textures and their template arguments are
		// also registered with CUDA runtime.
		Linkage = llvm::GlobalValue::InternalLinkage;
		const ClassTemplateSpecializationDecl *TD =
		cast<ClassTemplateSpecializationDecl>(
		D->getType()->getAs<RecordType>()->getDecl());
		const TemplateArgumentList &Args = TD->getTemplateArgs();
		if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
		assert(Args.size() == 2 &&
		"Unexpected number of template arguments of CUDA device "
		"builtin surface type.");
		auto SurfType = Args[1].getAsIntegral();
		if (!D->hasExternalStorage())
		getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(),
		SurfType.getSExtValue());
		} else {
		assert(Args.size() == 3 &&
		"Unexpected number of template arguments of CUDA device "
		"builtin texture type.");
		auto TexType = Args[1].getAsIntegral();
		auto Normalized = Args[2].getAsIntegral();
		if (!D->hasExternalStorage())
		getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(),
		TexType.getSExtValue(),
		Normalized.getZExtValue());
		}
		}
}		}
}		}

// HIPPinnedShadowVar should remain in the final code object irrespective of		// HIPPinnedShadowVar should remain in the final code object irrespective of
// whether it is used or not within the code. Add it to used list, so that		// whether it is used or not within the code. Add it to used list, so that
// it will not get eliminated when it is unused. Also, it is an extern var		// it will not get eliminated when it is unused. Also, it is an extern var
// within device code, and it should not get initialized within device code.		// within device code, and it should not get initialized within device code.
if (IsHIPPinnedShadowVar)		if (IsHIPPinnedShadowVar)
▲ Show 20 Lines • Show All 1,871 Lines • Show Last 20 Lines

clang/lib/CodeGen/CodeGenTypes.cpp

	Show First 20 Lines • Show All 377 Lines • ▼ Show 20 Lines
	}			}

	/// ConvertType - Convert the specified type to its LLVM form.			/// ConvertType - Convert the specified type to its LLVM form.
	llvm::Type *CodeGenTypes::ConvertType(QualType T) {			llvm::Type *CodeGenTypes::ConvertType(QualType T) {
	T = Context.getCanonicalType(T);			T = Context.getCanonicalType(T);

	const Type *Ty = T.getTypePtr();			const Type *Ty = T.getTypePtr();

				// For the device-side compilation, CUDA device builtin surface/texture types
				// may be represented in different types.
				if (Context.getLangOpts().CUDAIsDevice) {
				if (T->isCUDADeviceBuiltinSurfaceType()) {
				if (auto *Ty = CGM.getTargetCodeGenInfo()
				.getCUDADeviceBuiltinSurfaceDeviceType())
				return Ty;
				} else if (T->isCUDADeviceBuiltinTextureType()) {
				if (auto *Ty = CGM.getTargetCodeGenInfo()
				.getCUDADeviceBuiltinTextureDeviceType())
				return Ty;
				}
				}

	// RecordTypes are cached and processed specially.			// RecordTypes are cached and processed specially.
	if (const RecordType *RT = dyn_cast<RecordType>(Ty))			if (const RecordType *RT = dyn_cast<RecordType>(Ty))
	return ConvertRecordDeclType(RT->getDecl());			return ConvertRecordDeclType(RT->getDecl());

	// See if type is already cached.			// See if type is already cached.
	llvm::DenseMap<const Type , llvm::Type >::iterator TCI = TypeCache.find(Ty);			llvm::DenseMap<const Type , llvm::Type >::iterator TCI = TypeCache.find(Ty);
	// If type is found in map then use it. Otherwise, convert type T.			// If type is found in map then use it. Otherwise, convert type T.
	if (TCI != TypeCache.end())			if (TCI != TypeCache.end())
	▲ Show 20 Lines • Show All 455 Lines • Show Last 20 Lines

clang/lib/CodeGen/TargetInfo.h

Show First 20 Lines • Show All 309 Lines • ▼ Show 20 Lines	createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Value *BlockLiteral) const;		llvm::Value *BlockLiteral) const;

/// \return true if the target supports alias from the unmangled name to the		/// \return true if the target supports alias from the unmangled name to the
/// mangled name of functions declared within an extern "C" region and marked		/// mangled name of functions declared within an extern "C" region and marked
/// as 'used', and having internal linkage.		/// as 'used', and having internal linkage.
virtual bool shouldEmitStaticExternCAliases() const { return true; }		virtual bool shouldEmitStaticExternCAliases() const { return true; }

virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}		virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {}

		/// Return the device-side type for the CUDA device builtin surface type.
		virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const {
		// By default, no change from the original one.
		return nullptr;
		}
		/// Return the device-side type for the CUDA device builtin texture type.
		virtual llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const {
		// By default, no change from the original one.
		return nullptr;
		}

		/// Emit the device-side copy of the builtin surface type.
		virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF,
		LValue Dst,
		LValue Src) const {
		// DO NOTHING by default.
		return false;
		}
		/// Emit the device-side copy of the builtin texture type.
		virtual bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF,
		LValue Dst,
		LValue Src) const {
		// DO NOTHING by default.
		return false;
		}
};		};

} // namespace CodeGen		} // namespace CodeGen
} // namespace clang		} // namespace clang

#endif // LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H		#endif // LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H

clang/lib/CodeGen/TargetInfo.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show All 22 Lines
#include "clang/CodeGen/CGFunctionInfo.h"		#include "clang/CodeGen/CGFunctionInfo.h"
#include "clang/CodeGen/SwiftCallingConv.h"		#include "clang/CodeGen/SwiftCallingConv.h"
#include "llvm/ADT/SmallBitVector.h"		#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/StringExtras.h"		#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"		#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"		#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"		#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"		#include "llvm/IR/DataLayout.h"
		#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/Type.h"		#include "llvm/IR/Type.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"
#include <algorithm> // std::sort		#include <algorithm> // std::sort

using namespace clang;		using namespace clang;
using namespace CodeGen;		using namespace CodeGen;

// Helper for coercing an aggregate argument or return value into an integer		// Helper for coercing an aggregate argument or return value into an integer
▲ Show 20 Lines • Show All 6,370 Lines • ▼ Show 20 Lines
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// NVPTX ABI Implementation		// NVPTX ABI Implementation
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

namespace {		namespace {

		class NVPTXTargetCodeGenInfo;

class NVPTXABIInfo : public ABIInfo {		class NVPTXABIInfo : public ABIInfo {
		NVPTXTargetCodeGenInfo &CGInfo;

public:		public:
NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}		NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
		: ABIInfo(CGT), CGInfo(Info) {}

ABIArgInfo classifyReturnType(QualType RetTy) const;		ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty) const;		ABIArgInfo classifyArgumentType(QualType Ty) const;

void computeInfo(CGFunctionInfo &FI) const override;		void computeInfo(CGFunctionInfo &FI) const override;
Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,		Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const override;		QualType Ty) const override;
};		};

class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {		class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
public:		public:
NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)		NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
: TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {}		: TargetCodeGenInfo(new NVPTXABIInfo(CGT, *this)) {}

void setTargetAttributes(const Decl D, llvm::GlobalValue GV,		void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
CodeGen::CodeGenModule &M) const override;		CodeGen::CodeGenModule &M) const override;
bool shouldEmitStaticExternCAliases() const override;		bool shouldEmitStaticExternCAliases() const override;

		llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
		// On the device side, surface reference is represented as an object handle
		// in 64-bit integer.
		return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
		}

		llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
		// On the device side, texture reference is represented as an object handle
		// in 64-bit integer.
		return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
		}

		bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
		LValue Src) const override {
		emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
		return true;
		}

		bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
		LValue Src) const override {
		emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
		return true;
		}

private:		private:
// Adds a NamedMDNode with F, Name, and Operand as operands, and adds the		// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
// resulting MDNode to the nvvm.annotations MDNode.		// resulting MDNode to the nvvm.annotations MDNode.
static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand);		static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
		int Operand);

		static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
		LValue Src) {
		llvm::Value *Handle = nullptr;
		llvm::Constant *C =
		llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
		// Lookup `addrspacecast` through the constant pointer if any.
		if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
		C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
		if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
		// Load the handle from the specific global variable using
		// `nvvm.texsurf.handle.internal` intrinsic.
		Handle = CGF.EmitRuntimeCall(
		CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
		{GV->getType()}),
		{GV}, "texsurf_handle");
		} else
		Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
		CGF.EmitStoreOfScalar(Handle, Dst);
		}
};		};

/// Checks if the type is unsupported directly by the current target.		/// Checks if the type is unsupported directly by the current target.
static bool isUnsupportedType(ASTContext &Context, QualType T) {		static bool isUnsupportedType(ASTContext &Context, QualType T) {
if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())		if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
return true;		return true;
if (!Context.getTargetInfo().hasFloat128Type() &&		if (!Context.getTargetInfo().hasFloat128Type() &&
(T->isFloat128Type() \|\|		(T->isFloat128Type() \|\|
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines
}		}

ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {		ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
// Treat an enum type as its underlying type.		// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())		if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();		Ty = EnumTy->getDecl()->getIntegerType();

// Return aggregates type as indirect by value		// Return aggregates type as indirect by value
if (isAggregateTypeForABI(Ty))		if (isAggregateTypeForABI(Ty)) {
		// Under CUDA device compilation, tex/surf builtin types are replaced with
		// object types and passed directly.
		if (getContext().getLangOpts().CUDAIsDevice) {
		if (Ty->isCUDADeviceBuiltinSurfaceType())
		return ABIArgInfo::getDirect(
		CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
		if (Ty->isCUDADeviceBuiltinTextureType())
		return ABIArgInfo::getDirect(
		CGInfo.getCUDADeviceBuiltinTextureDeviceType());
		}
return getNaturalAlignIndirect(Ty, /* byval */ true);		return getNaturalAlignIndirect(Ty, /* byval */ true);
		}

return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)		return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty)
: ABIArgInfo::getDirect());		: ABIArgInfo::getDirect());
}		}

void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {		void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
if (!getCXXABI().classifyReturnType(FI))		if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());		FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
Show All 11 Lines	Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty) const {		QualType Ty) const {
llvm_unreachable("NVPTX does not support varargs");		llvm_unreachable("NVPTX does not support varargs");
}		}

void NVPTXTargetCodeGenInfo::setTargetAttributes(		void NVPTXTargetCodeGenInfo::setTargetAttributes(
const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {		const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
if (GV->isDeclaration())		if (GV->isDeclaration())
return;		return;
		const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
		if (VD) {
		if (M.getLangOpts().CUDA) {
		if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
		addNVVMMetadata(GV, "surface", 1);
		else if (VD->getType()->isCUDADeviceBuiltinTextureType())
		addNVVMMetadata(GV, "texture", 1);
		return;
		}
		}

const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);		const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD) return;		if (!FD) return;

llvm::Function *F = cast<llvm::Function>(GV);		llvm::Function *F = cast<llvm::Function>(GV);

// Perform special handling in OpenCL mode		// Perform special handling in OpenCL mode
if (M.getLangOpts().OpenCL) {		if (M.getLangOpts().OpenCL) {
// Use OpenCL function attributes to check for kernel functions		// Use OpenCL function attributes to check for kernel functions
Show All 32 Lines	if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
if (MinBlocks > 0)		if (MinBlocks > 0)
// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node		// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());		addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
}		}
}		}
}		}
}		}

void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name,		void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
int Operand) {		StringRef Name, int Operand) {
llvm::Module *M = F->getParent();		llvm::Module *M = GV->getParent();
llvm::LLVMContext &Ctx = M->getContext();		llvm::LLVMContext &Ctx = M->getContext();

// Get "nvvm.annotations" metadata node		// Get "nvvm.annotations" metadata node
llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");		llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");

llvm::Metadata *MDVals[] = {		llvm::Metadata *MDVals[] = {
llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name),		llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
llvm::ConstantAsMetadata::get(		llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};		llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
// Append metadata to nvvm.annotations		// Append metadata to nvvm.annotations
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));		MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
}		}

bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {		bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;		return false;
▲ Show 20 Lines • Show All 3,807 Lines • Show Last 20 Lines

clang/lib/Headers/__clang_cuda_runtime_wrapper.h

	Show First 20 Lines • Show All 77 Lines • ▼ Show 20 Lines
	// device_functions_decls is replaced by __clang_cuda_device_functions.h			// device_functions_decls is replaced by __clang_cuda_device_functions.h
	// included below.			// included below.
	#define __DEVICE_FUNCTIONS_DECLS_H__			#define __DEVICE_FUNCTIONS_DECLS_H__

	#undef __CUDACC__			#undef __CUDACC__
	#if CUDA_VERSION < 9000			#if CUDA_VERSION < 9000
	#define __CUDABE__			#define __CUDABE__
	#else			#else
				#define __CUDACC__
	#define __CUDA_LIBDEVICE__			#define __CUDA_LIBDEVICE__
	#endif			#endif
	// Disables definitions of device-side runtime support stubs in			// Disables definitions of device-side runtime support stubs in
	// cuda_device_runtime_api.h			// cuda_device_runtime_api.h
				#include "host_defines.h"
				#undef __CUDACC__
	#include "driver_types.h"			#include "driver_types.h"
	#include "host_config.h"			#include "host_config.h"
	#include "host_defines.h"

	// Temporarily replace "nv_weak" with weak, so __attribute__((nv_weak)) in			// Temporarily replace "nv_weak" with weak, so __attribute__((nv_weak)) in
	// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the			// cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the
	// functional equivalent of what we need.			// functional equivalent of what we need.
	#pragma push_macro("nv_weak")			#pragma push_macro("nv_weak")
	#define nv_weak weak			#define nv_weak weak
	#undef __CUDABE__			#undef __CUDABE__
	#undef __CUDA_LIBDEVICE__			#undef __CUDA_LIBDEVICE__
	▲ Show 20 Lines • Show All 326 Lines • Show Last 20 Lines

clang/lib/Sema/SemaDeclAttr.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 6,928 Lines • ▼ Show 20 Lines	case ParsedAttr::AT_CUDADevice:
break;		break;
case ParsedAttr::AT_CUDAHost:		case ParsedAttr::AT_CUDAHost:
handleSimpleAttributeWithExclusions<CUDAHostAttr, CUDAGlobalAttr>(S, D, AL);		handleSimpleAttributeWithExclusions<CUDAHostAttr, CUDAGlobalAttr>(S, D, AL);
break;		break;
case ParsedAttr::AT_HIPPinnedShadow:		case ParsedAttr::AT_HIPPinnedShadow:
handleSimpleAttributeWithExclusions<HIPPinnedShadowAttr, CUDADeviceAttr,		handleSimpleAttributeWithExclusions<HIPPinnedShadowAttr, CUDADeviceAttr,
CUDAConstantAttr>(S, D, AL);		CUDAConstantAttr>(S, D, AL);
break;		break;
		case ParsedAttr::AT_CUDADeviceBuiltinSurfaceType:
		handleSimpleAttributeWithExclusions<CUDADeviceBuiltinSurfaceTypeAttr,
		CUDADeviceBuiltinTextureTypeAttr>(S, D,
		AL);
		break;
		case ParsedAttr::AT_CUDADeviceBuiltinTextureType:
		handleSimpleAttributeWithExclusions<CUDADeviceBuiltinTextureTypeAttr,
		CUDADeviceBuiltinSurfaceTypeAttr>(S, D,
		AL);
		break;
case ParsedAttr::AT_GNUInline:		case ParsedAttr::AT_GNUInline:
handleGNUInlineAttr(S, D, AL);		handleGNUInlineAttr(S, D, AL);
break;		break;
case ParsedAttr::AT_CUDALaunchBounds:		case ParsedAttr::AT_CUDALaunchBounds:
handleLaunchBoundsAttr(S, D, AL);		handleLaunchBoundsAttr(S, D, AL);
break;		break;
case ParsedAttr::AT_Restrict:		case ParsedAttr::AT_Restrict:
handleRestrictAttr(S, D, AL);		handleRestrictAttr(S, D, AL);
▲ Show 20 Lines • Show All 809 Lines • Show Last 20 Lines

clang/lib/Sema/SemaDeclCXX.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,871 Lines • ▼ Show 20 Lines	if (LastExportedDefaultCtor) {
S.Diag(CD->getLocation(), diag::note_entity_declared_at)		S.Diag(CD->getLocation(), diag::note_entity_declared_at)
<< CD->getDeclName();		<< CD->getDeclName();
return;		return;
}		}
LastExportedDefaultCtor = CD;		LastExportedDefaultCtor = CD;
}		}
}		}

		static void checkCUDADeviceBuiltinSurfaceClassTemplate(Sema &S,
		CXXRecordDecl *Class) {
		bool ErrorReported = false;
		auto reportIllegalClassTemplate = [&ErrorReported](Sema &S,
		ClassTemplateDecl *TD) {
		if (ErrorReported)
		return;
		S.Diag(TD->getLocation(),
		diag::err_cuda_device_builtin_surftex_cls_template)
		<< /surface/ 0 << TD;
		ErrorReported = true;
		};

		ClassTemplateDecl *TD = Class->getDescribedClassTemplate();
		if (!TD) {
		auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(Class);
		if (!SD) {
		S.Diag(Class->getLocation(),
		diag::err_cuda_device_builtin_surftex_ref_decl)
		<< /surface/ 0 << Class;
		S.Diag(Class->getLocation(),
		diag::note_cuda_device_builtin_surftex_should_be_template_class)
		<< Class;
		return;
		}
		TD = SD->getSpecializedTemplate();
		}

		TemplateParameterList *Params = TD->getTemplateParameters();
		unsigned N = Params->size();

		if (N != 2) {
		reportIllegalClassTemplate(S, TD);
		S.Diag(TD->getLocation(),
		diag::note_cuda_device_builtin_surftex_cls_should_have_n_args)
		<< TD << 2;
		}
		if (N > 0 && !isa<TemplateTypeParmDecl>(Params->getParam(0))) {
		reportIllegalClassTemplate(S, TD);
		S.Diag(TD->getLocation(),
		diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg)
		<< TD << /1st/ 0 << /type/ 0;
		}
		if (N > 1) {
		auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(Params->getParam(1));
		if (!NTTP \|\| !NTTP->getType()->isIntegralOrEnumerationType()) {
		reportIllegalClassTemplate(S, TD);
		S.Diag(TD->getLocation(),
		diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg)
		<< TD << /2nd/ 1 << /integer/ 1;
		}
		}
		}

		static void checkCUDADeviceBuiltinTextureClassTemplate(Sema &S,
		CXXRecordDecl *Class) {
		bool ErrorReported = false;
		auto reportIllegalClassTemplate = [&ErrorReported](Sema &S,
		ClassTemplateDecl *TD) {
		if (ErrorReported)
		return;
		S.Diag(TD->getLocation(),
		diag::err_cuda_device_builtin_surftex_cls_template)
		<< /texture/ 1 << TD;
		ErrorReported = true;
		};

		ClassTemplateDecl *TD = Class->getDescribedClassTemplate();
		if (!TD) {
		auto *SD = dyn_cast<ClassTemplateSpecializationDecl>(Class);
		if (!SD) {
		S.Diag(Class->getLocation(),
		diag::err_cuda_device_builtin_surftex_ref_decl)
		<< /texture/ 1 << Class;
		S.Diag(Class->getLocation(),
		diag::note_cuda_device_builtin_surftex_should_be_template_class)
		<< Class;
		return;
		}
		TD = SD->getSpecializedTemplate();
		}

		TemplateParameterList *Params = TD->getTemplateParameters();
		unsigned N = Params->size();

		if (N != 3) {
		reportIllegalClassTemplate(S, TD);
		S.Diag(TD->getLocation(),
		diag::note_cuda_device_builtin_surftex_cls_should_have_n_args)
		<< TD << 3;
		}
		if (N > 0 && !isa<TemplateTypeParmDecl>(Params->getParam(0))) {
		reportIllegalClassTemplate(S, TD);
		S.Diag(TD->getLocation(),
		diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg)
		<< TD << /1st/ 0 << /type/ 0;
		}
		if (N > 1) {
		auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(Params->getParam(1));
		if (!NTTP \|\| !NTTP->getType()->isIntegralOrEnumerationType()) {
		reportIllegalClassTemplate(S, TD);
		S.Diag(TD->getLocation(),
		diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg)
		<< TD << /2nd/ 1 << /integer/ 1;
		}
		}
		if (N > 2) {
		auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(Params->getParam(2));
		if (!NTTP \|\| !NTTP->getType()->isIntegralOrEnumerationType()) {
		reportIllegalClassTemplate(S, TD);
		S.Diag(TD->getLocation(),
		diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg)
		<< TD << /3rd/ 2 << /integer/ 1;
		}
		}
		}

void Sema::checkClassLevelCodeSegAttribute(CXXRecordDecl *Class) {		void Sema::checkClassLevelCodeSegAttribute(CXXRecordDecl *Class) {
// Mark any compiler-generated routines with the implicit code_seg attribute.		// Mark any compiler-generated routines with the implicit code_seg attribute.
for (auto *Method : Class->methods()) {		for (auto *Method : Class->methods()) {
if (Method->isUserProvided())		if (Method->isUserProvided())
continue;		continue;
if (Attr A = getImplicitCodeSegOrSectionAttrForFunction(Method, /IsDefinition=*/true))		if (Attr A = getImplicitCodeSegOrSectionAttrForFunction(Method, /IsDefinition=*/true))
Method->addAttr(A);		Method->addAttr(A);
}		}
▲ Show 20 Lines • Show All 764 Lines • ▼ Show 20 Lines	void Sema::CheckCompletedCXXClass(Scope S, CXXRecordDecl Record) {
else if (Record->hasNonTrivialDestructor())		else if (Record->hasNonTrivialDestructor())
Record->setParamDestroyedInCallee(CanPass);		Record->setParamDestroyedInCallee(CanPass);

if (getLangOpts().ForceEmitVTables) {		if (getLangOpts().ForceEmitVTables) {
// If we want to emit all the vtables, we need to mark it as used. This		// If we want to emit all the vtables, we need to mark it as used. This
// is especially required for cases like vtable assumption loads.		// is especially required for cases like vtable assumption loads.
MarkVTableUsed(Record->getInnerLocStart(), Record);		MarkVTableUsed(Record->getInnerLocStart(), Record);
}		}

		if (getLangOpts().CUDA) {
		if (Record->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>())
		checkCUDADeviceBuiltinSurfaceClassTemplate(*this, Record);
		else if (Record->hasAttr<CUDADeviceBuiltinTextureTypeAttr>())
		checkCUDADeviceBuiltinTextureClassTemplate(*this, Record);
		}
}		}

/// Look up the special member function that would be called by a special		/// Look up the special member function that would be called by a special
/// member function for a subobject of class type.		/// member function for a subobject of class type.
///		///
/// \param Class The class type of the subobject.		/// \param Class The class type of the subobject.
/// \param CSM The kind of special member function.		/// \param CSM The kind of special member function.
/// \param FieldQuals If the subobject is a field, its cv-qualifiers.		/// \param FieldQuals If the subobject is a field, its cv-qualifiers.
▲ Show 20 Lines • Show All 10,883 Lines • Show Last 20 Lines

clang/test/CodeGenCUDA/surface.cu

This file was added.

				// REQUIRES: x86-registered-target
				// REQUIRES: nvptx-registered-target

				// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s \| FileCheck --check-prefix=DEVICE %s
				// RUN: echo "GPU binary would be here" > %t
				// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s \| FileCheck --check-prefix=HOST %s

				struct surfaceReference {
				int desc;
				};

				template <typename T, int dim = 1>
				struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {
				};

				// Partial specialization over `void`.
				template<int dim>
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -template<int dim> +template <int dim> Lint: Pre-merge checks: clang-format: please reformat the code ``` -template<int dim> +template <int dim> ```
				struct __attribute__((device_builtin_surface_type)) surface<void, dim> : public surfaceReference {
				};

				// On the device side, surface references are represented as `i64` handles.
				// DEVICE: @surf = addrspace(1) global i64 undef, align 4
				// On the host side, they remain in the original type.
				// HOST: @surf = internal global %struct.surface
				// HOST: @0 = private unnamed_addr constant [5 x i8] c"surf\00"
				surface<void, 2> surf;

				__attribute__((device)) int suld_2d_zero(surface<void, 2>, int, int) asm("llvm.nvvm.suld.2d.i32.zero");

				// DEVICE-LABEL: i32 @_Z3fooii(i32 %x, i32 %y)
				// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf)
				// DEVICE: call i32 @llvm.nvvm.suld.2d.i32.zero(i64 %{{.}}, i32 %{{.}}, i32 %{{.*}})
				__attribute__((device)) int foo(int x, int y) {
				return suld_2d_zero(surf, x, y);
				}

				// HOST: define internal void @[[PREFIX:__cuda]]_register_globals
				// Texture references need registering with correct arguments.
				// HOST: call void @[[PREFIX]]RegisterSurface(i8** %0, i8{{.}}({{.}}@surf{{.}}), i8{{.}}({{.}}@0{{.}}), i8{{.}}({{.}}@0{{.}}), i32 2, i32 0)

				// They also need annotating in metadata.
				// DEVICE: !0 = !{i64 addrspace(1)* @surf, !"surface", i32 1}

clang/test/CodeGenCUDA/texture.cu

This file was added.

				// REQUIRES: x86-registered-target
				// REQUIRES: nvptx-registered-target

				// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s \| FileCheck --check-prefix=DEVICE %s
				// RUN: echo "GPU binary would be here" > %t
				// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s \| FileCheck --check-prefix=HOST %s

				struct textureReference {
				int desc;
				};

				enum ReadMode {
				ElementType = 0,
				NormalizedFloat = 1
				};

				template <typename T, int dim = 1, enum ReadMode mode = ElementType>
				struct __attribute__((device_builtin_texture_type)) texture : public textureReference {
				};

				// On the device side, texture references are represented as `i64` handles.
				// DEVICE: @tex = addrspace(1) global i64 undef, align 4
				// DEVICE: @norm = addrspace(1) global i64 undef, align 4
				// On the host side, they remain in the original type.
				// HOST: @tex = internal global %struct.texture
				// HOST: @norm = internal global %struct.texture
				// HOST: @0 = private unnamed_addr constant [4 x i8] c"tex\00"
				// HOST: @1 = private unnamed_addr constant [5 x i8] c"norm\00"
				texture<float, 2, ElementType> tex;
				texture<float, 2, NormalizedFloat> norm;

				struct v4f {
				float x, y, z, w;
				};

				__attribute__((device)) v4f tex2d_ld(texture<float, 2, ElementType>, float, float) asm("llvm.nvvm.tex.unified.2d.v4f32.f32");
				__attribute__((device)) v4f tex2d_ld(texture<float, 2, NormalizedFloat>, int, int) asm("llvm.nvvm.tex.unified.2d.v4f32.s32");

				// DEVICE-LABEL: float @_Z3fooff(float %x, float %y)
				// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex)
				// DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.f32(i64 %{{.}}, float %{{.}}, float %{{.*}})
				// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @norm)
				// DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.s32(i64 %{{.}}, i32 %{{.}}, i32 %{{.*}})
				__attribute__((device)) float foo(float x, float y) {
				return tex2d_ld(tex, x, y).x + tex2d_ld(norm, int(x), int(y)).x;
				}

				// HOST: define internal void @[[PREFIX:__cuda]]_register_globals
				// Texture references need registering with correct arguments.
				// HOST: call void @[[PREFIX]]RegisterTexture(i8** %0, i8{{.}}({{.}}@tex{{.}}), i8{{.}}({{.}}@0{{.}}), i8{{.}}({{.}}@0{{.}}), i32 2, i32 0, i32 0)
				// HOST: call void @[[PREFIX]]RegisterTexture(i8** %0, i8{{.}}({{.}}@norm{{.}}), i8{{.}}({{.}}@1{{.}}), i8{{.}}({{.}}@1{{.}}), i32 2, i32 1, i32 0)

				// They also need annotating in metadata.
				// DEVICE: !0 = !{i64 addrspace(1)* @tex, !"texture", i32 1}
				// DEVICE: !1 = !{i64 addrspace(1)* @norm, !"texture", i32 1}

clang/test/Misc/pragma-attribute-supported-attributes-list.test

	Show All 24 Lines
	// CHECK-NEXT: CFAuditedTransfer (SubjectMatchRule_function)			// CHECK-NEXT: CFAuditedTransfer (SubjectMatchRule_function)
	// CHECK-NEXT: CFConsumed (SubjectMatchRule_variable_is_parameter)			// CHECK-NEXT: CFConsumed (SubjectMatchRule_variable_is_parameter)
	// CHECK-NEXT: CFICanonicalJumpTable (SubjectMatchRule_function)			// CHECK-NEXT: CFICanonicalJumpTable (SubjectMatchRule_function)
	// CHECK-NEXT: CFUnknownTransfer (SubjectMatchRule_function)			// CHECK-NEXT: CFUnknownTransfer (SubjectMatchRule_function)
	// CHECK-NEXT: CPUDispatch (SubjectMatchRule_function)			// CHECK-NEXT: CPUDispatch (SubjectMatchRule_function)
	// CHECK-NEXT: CPUSpecific (SubjectMatchRule_function)			// CHECK-NEXT: CPUSpecific (SubjectMatchRule_function)
	// CHECK-NEXT: CUDAConstant (SubjectMatchRule_variable)			// CHECK-NEXT: CUDAConstant (SubjectMatchRule_variable)
	// CHECK-NEXT: CUDADevice (SubjectMatchRule_function, SubjectMatchRule_variable)			// CHECK-NEXT: CUDADevice (SubjectMatchRule_function, SubjectMatchRule_variable)
				// CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record)
				// CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record)
	// CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)			// CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function)
	// CHECK-NEXT: CUDAHost (SubjectMatchRule_function)			// CHECK-NEXT: CUDAHost (SubjectMatchRule_function)
	// CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)			// CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType)
	// CHECK-NEXT: CUDAShared (SubjectMatchRule_variable)			// CHECK-NEXT: CUDAShared (SubjectMatchRule_variable)
	// CHECK-NEXT: CXX11NoReturn (SubjectMatchRule_function)			// CHECK-NEXT: CXX11NoReturn (SubjectMatchRule_function)
	// CHECK-NEXT: CallableWhen (SubjectMatchRule_function_is_member)			// CHECK-NEXT: CallableWhen (SubjectMatchRule_function_is_member)
	// CHECK-NEXT: Callback (SubjectMatchRule_function)			// CHECK-NEXT: Callback (SubjectMatchRule_function)
	// CHECK-NEXT: Capability (SubjectMatchRule_record, SubjectMatchRule_type_alias)			// CHECK-NEXT: Capability (SubjectMatchRule_record, SubjectMatchRule_type_alias)
	▲ Show 20 Lines • Show All 126 Lines • Show Last 20 Lines

clang/test/SemaCUDA/attr-declspec.cu

	// Test the __declspec spellings of CUDA attributes.			// Test the __declspec spellings of CUDA attributes.
	//			//
	// RUN: %clang_cc1 -fsyntax-only -fms-extensions -verify %s			// RUN: %clang_cc1 -fsyntax-only -fms-extensions -verify %s
	// RUN: %clang_cc1 -fsyntax-only -fms-extensions -fcuda-is-device -verify %s			// RUN: %clang_cc1 -fsyntax-only -fms-extensions -fcuda-is-device -verify %s
	// Now pretend that we're compiling a C file. There should be warnings.			// Now pretend that we're compiling a C file. There should be warnings.
	// RUN: %clang_cc1 -DEXPECT_WARNINGS -fms-extensions -fsyntax-only -verify -x c %s			// RUN: %clang_cc1 -DEXPECT_WARNINGS -fms-extensions -fsyntax-only -verify -x c %s

	#if defined(EXPECT_WARNINGS)			#if defined(EXPECT_WARNINGS)
	// expected-warning@+12 {{'__device__' attribute ignored}}			// expected-warning@+15 {{'__device__' attribute ignored}}
	// expected-warning@+12 {{'__global__' attribute ignored}}			// expected-warning@+15 {{'__global__' attribute ignored}}
	// expected-warning@+12 {{'__constant__' attribute ignored}}			// expected-warning@+15 {{'__constant__' attribute ignored}}
	// expected-warning@+12 {{'__shared__' attribute ignored}}			// expected-warning@+15 {{'__shared__' attribute ignored}}
	// expected-warning@+12 {{'__host__' attribute ignored}}			// expected-warning@+15 {{'__host__' attribute ignored}}
				// expected-warning@+20 {{'__device_builtin_surface_type__' attribute ignored}}
				// expected-warning@+20 {{'__device_builtin_texture_type__' attribute ignored}}
	//			//
	// (Currently we don't for the other attributes. They are implemented with			// (Currently we don't for the other attributes. They are implemented with
	// IgnoredAttr, which is ignored irrespective of any LangOpts.)			// IgnoredAttr, which is ignored irrespective of any LangOpts.)
	#else			#else
	// expected-no-diagnostics			// expected-warning@+14 {{'__device_builtin_surface_type__' attribute only applies to classes}}
				// expected-warning@+14 {{'__device_builtin_texture_type__' attribute only applies to classes}}
	#endif			#endif

	__declspec(__device__) void f_device();			__declspec(__device__) void f_device();
	__declspec(__global__) void f_global();			__declspec(__global__) void f_global();
	__declspec(__constant__) int* g_constant;			__declspec(__constant__) int* g_constant;
	__declspec(__shared__) float *g_shared;			__declspec(__shared__) float *g_shared;
	__declspec(__host__) void f_host();			__declspec(__host__) void f_host();
	__declspec(__device_builtin__) void f_device_builtin();			__declspec(__device_builtin__) void f_device_builtin();
	typedef __declspec(__device_builtin__) const void *t_device_builtin;			typedef __declspec(__device_builtin__) const void *t_device_builtin;
	enum __declspec(__device_builtin__) e_device_builtin {E};			enum __declspec(__device_builtin__) e_device_builtin {E};
	__declspec(__device_builtin__) int v_device_builtin;			__declspec(__device_builtin__) int v_device_builtin;
	__declspec(__cudart_builtin__) void f_cudart_builtin();			__declspec(__cudart_builtin__) void f_cudart_builtin();
	__declspec(__device_builtin_surface_type__) unsigned long long surface_var;			__declspec(__device_builtin_surface_type__) unsigned long long surface_var;
	__declspec(__device_builtin_texture_type__) unsigned long long texture_var;			__declspec(__device_builtin_texture_type__) unsigned long long texture_var;

	// Note that there's no __declspec spelling of nv_weak.			// Note that there's no __declspec spelling of nv_weak.

clang/test/SemaCUDA/attributes-on-non-cuda.cu

	// Tests that CUDA attributes are warnings when compiling C files, but not when			// Tests that CUDA attributes are warnings when compiling C files, but not when
	// compiling CUDA files.			// compiling CUDA files.
	//			//
	// RUN: %clang_cc1 -fsyntax-only -verify %s			// RUN: %clang_cc1 -fsyntax-only -verify %s
	// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s			// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
	// Now pretend that we're compiling a C file. There should be warnings.			// Now pretend that we're compiling a C file. There should be warnings.
	// RUN: %clang_cc1 -DEXPECT_WARNINGS -fsyntax-only -verify -x c %s			// RUN: %clang_cc1 -DEXPECT_WARNINGS -fsyntax-only -verify -x c %s

	#if defined(EXPECT_WARNINGS)			#if defined(EXPECT_WARNINGS)
	// expected-warning@+12 {{'device' attribute ignored}}			// expected-warning@+15 {{'device' attribute ignored}}
	// expected-warning@+12 {{'global' attribute ignored}}			// expected-warning@+15 {{'global' attribute ignored}}
	// expected-warning@+12 {{'constant' attribute ignored}}			// expected-warning@+15 {{'constant' attribute ignored}}
	// expected-warning@+12 {{'shared' attribute ignored}}			// expected-warning@+15 {{'shared' attribute ignored}}
	// expected-warning@+12 {{'host' attribute ignored}}			// expected-warning@+15 {{'host' attribute ignored}}
				// expected-warning@+21 {{'device_builtin_surface_type' attribute ignored}}
				// expected-warning@+21 {{'device_builtin_texture_type' attribute ignored}}
	//			//
	// NOTE: IgnoredAttr in clang which is used for the rest of			// NOTE: IgnoredAttr in clang which is used for the rest of
	// attributes ignores LangOpts, so there are no warnings.			// attributes ignores LangOpts, so there are no warnings.
	#else			#else
	// expected-no-diagnostics			// expected-warning@+15 {{'device_builtin_surface_type' attribute only applies to classes}}
				// expected-warning@+15 {{'device_builtin_texture_type' attribute only applies to classes}}
	#endif			#endif

	__attribute__((device)) void f_device();			__attribute__((device)) void f_device();
	__attribute__((global)) void f_global();			__attribute__((global)) void f_global();
	__attribute__((constant)) int* g_constant;			__attribute__((constant)) int* g_constant;
	__attribute__((shared)) float *g_shared;			__attribute__((shared)) float *g_shared;
	__attribute__((host)) void f_host();			__attribute__((host)) void f_host();
	__attribute__((device_builtin)) void f_device_builtin();			__attribute__((device_builtin)) void f_device_builtin();
	typedef __attribute__((device_builtin)) const void *t_device_builtin;			typedef __attribute__((device_builtin)) const void *t_device_builtin;
	enum __attribute__((device_builtin)) e_device_builtin {E};			enum __attribute__((device_builtin)) e_device_builtin {E};
	__attribute__((device_builtin)) int v_device_builtin;			__attribute__((device_builtin)) int v_device_builtin;
	__attribute__((cudart_builtin)) void f_cudart_builtin();			__attribute__((cudart_builtin)) void f_cudart_builtin();
	__attribute__((nv_weak)) void f_nv_weak();			__attribute__((nv_weak)) void f_nv_weak();
	__attribute__((device_builtin_surface_type)) unsigned long long surface_var;			__attribute__((device_builtin_surface_type)) unsigned long long surface_var;
	__attribute__((device_builtin_texture_type)) unsigned long long texture_var;			__attribute__((device_builtin_texture_type)) unsigned long long texture_var;

clang/test/SemaCUDA/bad-attributes.cu

	Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines
	__constant__ int global_constant;			__constant__ int global_constant;
	void host_fn() {			void host_fn() {
	__constant__ int c; // expected-error {{__constant__ variables must be global}}			__constant__ int c; // expected-error {{__constant__ variables must be global}}
	__shared__ int s; // expected-error {{__shared__ local variables not allowed in __host__ functions}}			__shared__ int s; // expected-error {{__shared__ local variables not allowed in __host__ functions}}
	}			}
	__device__ void device_fn() {			__device__ void device_fn() {
	__constant__ int c; // expected-error {{__constant__ variables must be global}}			__constant__ int c; // expected-error {{__constant__ variables must be global}}
	}			}

				typedef __attribute__((device_builtin_surface_type)) unsigned long long s0_ty; // expected-warning {{'device_builtin_surface_type' attribute only applies to classes}}
				typedef __attribute__((device_builtin_texture_type)) unsigned long long t0_ty; // expected-warning {{'device_builtin_texture_type' attribute only applies to classes}}
				traUnsubmitted Not Done Reply Inline Actions Please add few test cases replicating use of these attributes in CUDA headers. tra: Please add few test cases replicating use of these attributes in CUDA headers.
				hliaoAuthorUnsubmitted Done Reply Inline Actions the replication from CUDA headers is added on those codegen tests. These tests are illegal ones which sema checks should identify. hliao: the replication from CUDA headers is added on those codegen tests. These tests are illegal ones…

				struct __attribute__((device_builtin_surface_type)) s1_ref {}; // expected-error {{illegal device builtin surface reference type 's1_ref' declared here}}
				// expected-note@-1 {{'s1_ref' needs to be instantiated from a class template with proper template arguments}}
				struct __attribute__((device_builtin_texture_type)) t1_ref {}; // expected-error {{illegal device builtin texture reference type 't1_ref' declared here}}
				// expected-note@-1 {{'t1_ref' needs to be instantiated from a class template with proper template arguments}}

				template <typename T>
				struct __attribute__((device_builtin_surface_type)) s2_cls_template {}; // expected-error {{illegal device builtin surface reference class template 's2_cls_template' declared here}}
				// expected-note@-1 {{'s2_cls_template' needs to have exactly 2 template parameters}}
				template <typename T>
				struct __attribute__((device_builtin_texture_type)) t2_cls_template {}; // expected-error {{illegal device builtin texture reference class template 't2_cls_template' declared here}}
				// expected-note@-1 {{'t2_cls_template' needs to have exactly 3 template parameters}}

				template <int val, void *ptr>
				struct __attribute__((device_builtin_surface_type)) s3_cls_template {}; // expected-error {{illegal device builtin surface reference class template 's3_cls_template' declared here}}
				// expected-note@-1 {{the 1st template parameter of 's3_cls_template' needs to be a type}}
				// expected-note@-2 {{the 2nd template parameter of 's3_cls_template' needs to be an integer or enum value}}
				template <int val, int type, typename T>
				struct __attribute__((device_builtin_texture_type)) t3_cls_template {}; // expected-error {{illegal device builtin texture reference class template 't3_cls_template' declared here}}
				// expected-note@-1 {{the 1st template parameter of 't3_cls_template' needs to be a type}}
				// expected-note@-2 {{the 3rd template parameter of 't3_cls_template' needs to be an integer or enum value}}

llvm/include/llvm/IR/Operator.h

Show First 20 Lines • Show All 593 Lines • ▼ Show 20 Lines	Type *getSrcTy() const {
return getOperand(0)->getType();		return getOperand(0)->getType();
}		}

Type *getDestTy() const {		Type *getDestTy() const {
return getType();		return getType();
}		}
};		};

		class AddrSpaceCastOperator
		: public ConcreteOperator<Operator, Instruction::AddrSpaceCast> {
		friend class AddrSpaceCastInst;
		friend class ConstantExpr;

		public:
		Value *getPointerOperand() { return getOperand(0); }

		const Value *getPointerOperand() const { return getOperand(0); }

		unsigned getSrcAddressSpace() const {
		return getPointerOperand()->getType()->getPointerAddressSpace();
		}

		unsigned getDestAddressSpace() const {
		return getType()->getPointerAddressSpace();
		}
		};

} // end namespace llvm		} // end namespace llvm

#endif // LLVM_IR_OPERATOR_H		#endif // LLVM_IR_OPERATOR_H