diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2153,6 +2153,11 @@ /// than implicitly __strong. bool isObjCARCImplicitlyUnretainedType() const; + /// Check if the type is the CUDA device builtin surface type. + bool isCUDADeviceBuiltinSurfaceType() const; + /// Check if the type is the CUDA device builtin texture type. + bool isCUDADeviceBuiltinTextureType() const; + /// Return the implicit lifetime for this type, which must not be dependent. Qualifiers::ObjCLifetime getObjCARCImplicitLifetime() const; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1044,16 +1044,20 @@ let LangOpts = [CUDA]; } -def CUDADeviceBuiltinSurfaceType : IgnoredAttr { +def CUDADeviceBuiltinSurfaceType : InheritableAttr { let Spellings = [GNU<"device_builtin_surface_type">, Declspec<"__device_builtin_surface_type__">]; let LangOpts = [CUDA]; + let Subjects = SubjectList<[Type]>; + let Documentation = [CUDADeviceBuiltinSurfaceTypeDocs]; } -def CUDADeviceBuiltinTextureType : IgnoredAttr { +def CUDADeviceBuiltinTextureType : InheritableAttr { let Spellings = [GNU<"device_builtin_texture_type">, Declspec<"__device_builtin_texture_type__">]; let LangOpts = [CUDA]; + let Subjects = SubjectList<[Type]>; + let Documentation = [CUDADeviceBuiltinTextureTypeDocs]; } def CUDAGlobal : InheritableAttr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4624,6 +4624,28 @@ }]; } +def CUDADeviceBuiltinSurfaceTypeDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``device_builtin_surface_type`` attribute can be applied to a class +template when declaring the surface reference. A surface reference variable +could be accessed on the host side and, on the device side, might be translated +into an internal surface object, which is established through surface bind and +unbind runtime APIs. + }]; +} + +def CUDADeviceBuiltinTextureTypeDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``device_builtin_texture_type`` attribute can be applied to a class +template when declaring the texture reference. A texture reference variable +could be accessed on the host side and, on the device side, might be translated +into an internal texture object, which is established through texture bind and +unbind runtime APIs. + }]; +} + def LifetimeOwnerDocs : Documentation { let Category = DocCatDecl; let Content = [{ diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -4116,6 +4116,20 @@ return Pointee->isVoidType() || Pointee->isRecordType(); } +/// Check if the specified type is the CUDA device builtin surface type. +bool Type::isCUDADeviceBuiltinSurfaceType() const { + if (const auto *RT = getAs()) + return RT->getDecl()->hasAttr(); + return false; +} + +/// Check if the specified type is the CUDA device builtin texture type. +bool Type::isCUDADeviceBuiltinTextureType() const { + if (const auto *RT = getAs()) + return RT->getDecl()->hasAttr(); + return false; +} + bool Type::hasSizedVLAType() const { if (!isVariablyModifiedType()) return false; diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -50,7 +50,7 @@ struct VarInfo { llvm::GlobalVariable *Var; const VarDecl *D; - unsigned Flag; + DeviceVarFlags Flags; }; llvm::SmallVector DeviceVars; /// Keeps track of variable containing handle of GPU binary. Populated by @@ -124,8 +124,25 @@ void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, - unsigned Flags) override { - DeviceVars.push_back({&Var, VD, Flags}); + bool Extern, bool Constant) override { + DeviceVars.push_back({&Var, + VD, + {DeviceVarFlags::Variable, Extern, Constant, + /*Normalized*/ false, /*Type*/ 0}}); + } + void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type) override { + DeviceVars.push_back({&Var, + VD, + {DeviceVarFlags::Surface, Extern, /*Constant*/ false, + /*Normalized*/ false, Type}}); + } + void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type, bool Normalized) override { + DeviceVars.push_back({&Var, + VD, + {DeviceVarFlags::Texture, Extern, /*Constant*/ false, + Normalized, Type}}); } /// Creates module constructor function @@ -431,22 +448,55 @@ llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterVarParams, false), addUnderscoredPrefixToName("RegisterVar")); + // void __cudaRegisterSurface(void **, const struct surfaceReference *, + // const void **, const char *, int, int); + llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction( + llvm::FunctionType::get( + VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy}, + false), + addUnderscoredPrefixToName("RegisterSurface")); + // void __cudaRegisterTexture(void **, const struct textureReference *, + // const void **, const char *, int, int, int) + llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction( + llvm::FunctionType::get( + VoidTy, + {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy}, + false), + addUnderscoredPrefixToName("RegisterTexture")); for (auto &&Info : DeviceVars) { llvm::GlobalVariable *Var = Info.Var; - unsigned Flags = Info.Flag; llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D)); - uint64_t VarSize = - CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); - llvm::Value *Args[] = { - &GpuBinaryHandlePtr, - Builder.CreateBitCast(Var, VoidPtrTy), - VarName, - VarName, - llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0), - llvm::ConstantInt::get(IntTy, VarSize), - llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0), - llvm::ConstantInt::get(IntTy, 0)}; - Builder.CreateCall(RegisterVar, Args); + switch (Info.Flags.Kind) { + case DeviceVarFlags::Variable: { + uint64_t VarSize = + CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); + llvm::Value *Args[] = {&GpuBinaryHandlePtr, + Builder.CreateBitCast(Var, VoidPtrTy), + VarName, + VarName, + llvm::ConstantInt::get(IntTy, Info.Flags.Extern), + llvm::ConstantInt::get(IntTy, VarSize), + llvm::ConstantInt::get(IntTy, Info.Flags.Constant), + llvm::ConstantInt::get(IntTy, 0)}; + Builder.CreateCall(RegisterVar, Args); + break; + } + case DeviceVarFlags::Surface: + Builder.CreateCall( + RegisterSurf, + {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName, + VarName, llvm::ConstantInt::get(IntTy, Info.Flags.SurfTexType), + llvm::ConstantInt::get(IntTy, Info.Flags.Extern)}); + break; + case DeviceVarFlags::Texture: + Builder.CreateCall( + RegisterTex, + {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName, + VarName, llvm::ConstantInt::get(IntTy, Info.Flags.SurfTexType), + llvm::ConstantInt::get(IntTy, Info.Flags.Normalized), + llvm::ConstantInt::get(IntTy, Info.Flags.Extern)}); + break; + } } Builder.CreateRetVoid(); diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h --- a/clang/lib/CodeGen/CGCUDARuntime.h +++ b/clang/lib/CodeGen/CGCUDARuntime.h @@ -42,9 +42,17 @@ public: // Global variable properties that must be passed to CUDA runtime. - enum DeviceVarFlags { - ExternDeviceVar = 0x01, // extern - ConstantDeviceVar = 0x02, // __constant__ + struct DeviceVarFlags { + enum DeviceVarKind { + Variable, // Variable + Surface, // Builtin surface + Texture, // Builtin texture + }; + unsigned Kind : 2; + unsigned Extern : 1; + unsigned Constant : 2; // Constant variable. + unsigned Normalized : 1; // Normalized texture. + int SurfTexType; // Type of surface/texutre. }; CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {} @@ -57,7 +65,11 @@ /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, - unsigned Flags) = 0; + bool Extern, bool Constant) = 0; + virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type) = 0; + virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type, bool Normalized) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -15,6 +15,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" @@ -1937,6 +1938,18 @@ } } + if (getLangOpts().CUDAIsDevice) { + if (Ty->isCUDADeviceBuiltinSurfaceType()) { + if (getTargetHooks().emitCUDADeviceBuiltinSurfaceDeviceCopy(*this, Dest, + Src)) + return; + } else if (Ty->isCUDADeviceBuiltinTextureType()) { + if (getTargetHooks().emitCUDADeviceBuiltinTextureDeviceCopy(*this, Dest, + Src)) + return; + } + } + // Aggregate assignment turns into llvm.memcpy. This is almost valid per // C99 6.5.16.1p3, which states "If the value being stored in an object is // read from another object that overlaps in anyway the storage of the first diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -698,6 +698,19 @@ TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) { if (!TBAA) return TBAAAccessInfo(); + if (getLangOpts().CUDAIsDevice) { + // As CUDA builtin surface/texture types are replaced, skip generating TBAA + // access info. + if (AccessType->isCUDADeviceBuiltinSurfaceType()) { + if (getTargetCodeGenInfo().getCUDADeviceBuiltinSurfaceDeviceType() != + nullptr) + return TBAAAccessInfo(); + } else if (AccessType->isCUDADeviceBuiltinTextureType()) { + if (getTargetCodeGenInfo().getCUDADeviceBuiltinTextureDeviceType() != + nullptr) + return TBAAAccessInfo(); + } + } return TBAA->getAccessInfo(AccessType); } @@ -2492,7 +2505,9 @@ !Global->hasAttr() && !Global->hasAttr() && !Global->hasAttr() && - !(LangOpts.HIP && Global->hasAttr())) + !(LangOpts.HIP && Global->hasAttr()) && + !Global->getType()->isCUDADeviceBuiltinSurfaceType() && + !Global->getType()->isCUDADeviceBuiltinTextureType()) return; } else { // We need to emit host-side 'shadows' for all global @@ -3945,12 +3960,16 @@ !getLangOpts().CUDAIsDevice && (D->hasAttr() || D->hasAttr() || D->hasAttr()); + bool IsCUDADeviceShadowVar = + getLangOpts().CUDAIsDevice && + (D->getType()->isCUDADeviceBuiltinSurfaceType() || + D->getType()->isCUDADeviceBuiltinTextureType()); // HIP pinned shadow of initialized host-side global variables are also // left undefined. bool IsHIPPinnedShadowVar = getLangOpts().CUDAIsDevice && D->hasAttr(); - if (getLangOpts().CUDA && - (IsCUDASharedVar || IsCUDAShadowVar || IsHIPPinnedShadowVar)) + if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar || + IsCUDADeviceShadowVar || IsHIPPinnedShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (D->hasAttr()) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); @@ -4061,25 +4080,52 @@ if (D->hasAttr() || D->hasAttr() || D->hasAttr()) { Linkage = llvm::GlobalValue::InternalLinkage; - - // Shadow variables and their properties must be registered - // with CUDA runtime. - unsigned Flags = 0; - if (!D->hasDefinition()) - Flags |= CGCUDARuntime::ExternDeviceVar; - if (D->hasAttr()) - Flags |= CGCUDARuntime::ConstantDeviceVar; - // Extern global variables will be registered in the TU where they are - // defined. + // Shadow variables and their properties must be registered with CUDA + // runtime. Skip Extern global variables, which will be registered in + // the TU where they are defined. if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceVar(D, *GV, Flags); - } else if (D->hasAttr()) + getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(), + D->hasAttr()); + } else if (D->hasAttr()) { // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they // can't really be used to access their device-side // counterparts. It's not clear yet whether it's nvcc's bug or // a feature, but we've got to do the same for compatibility. Linkage = llvm::GlobalValue::InternalLinkage; + } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() || + D->getType()->isCUDADeviceBuiltinTextureType()) { + const RecordDecl *RD = D->getType()->getAs()->getDecl(); + // Builtin surfaces and textures and their template arguments are + // also registered with CUDA runtime. + if (const ClassTemplateSpecializationDecl *TD = + dyn_cast(RD)) { + Linkage = llvm::GlobalValue::InternalLinkage; + const TemplateArgumentList &Args = TD->getTemplateInstantiationArgs(); + if (RD->hasAttr()) { + assert(Args.size() == 2 && + "Unexpcted number of template arguments of CUDA device " + "builtin surface type."); + auto Type = Args[1].getAsIntegral(); + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(), + Type.getSExtValue()); + } else { + assert(Args.size() == 3 && + "Unexpected number of template arguments of CUDA device " + "builtin texture type."); + auto Type = Args[1].getAsIntegral(); + auto Normalized = Args[2].getAsIntegral(); + assert(Normalized >= 0 && Normalized <= 1 && + "Unexpected normalized argument of CUDA device builtin " + "texture type."); + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(), + Type.getSExtValue(), + Normalized.getZExtValue()); + } + } + } } } diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -383,6 +383,20 @@ const Type *Ty = T.getTypePtr(); + // For the device-side compilation, CUDA device builtin surface/texture types + // may be represented in different types. + if (Context.getLangOpts().CUDAIsDevice) { + if (T->isCUDADeviceBuiltinSurfaceType()) { + if (auto *Ty = CGM.getTargetCodeGenInfo() + .getCUDADeviceBuiltinSurfaceDeviceType()) + return Ty; + } else if (T->isCUDADeviceBuiltinTextureType()) { + if (auto *Ty = CGM.getTargetCodeGenInfo() + .getCUDADeviceBuiltinTextureDeviceType()) + return Ty; + } + } + // RecordTypes are cached and processed specially. if (const RecordType *RT = dyn_cast(Ty)) return ConvertRecordDeclType(RT->getDecl()); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -315,6 +315,32 @@ virtual bool shouldEmitStaticExternCAliases() const { return true; } virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {} + + /// Return the device-side type for the CUDA device builtin surface type. + virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const { + // By default, no change from the original one. + return nullptr; + } + /// Return the device-side type for the CUDA device builtin texture type. + virtual llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const { + // By default, no change from the original one. + return nullptr; + } + + /// Emit the device-side copy of the builtin surface type. + virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, + LValue Dst, + LValue Src) const { + // DO NOTHING by default. + return false; + } + /// Emit the device-side copy of the builtin texture type. + virtual bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, + LValue Dst, + LValue Src) const { + // DO NOTHING by default. + return false; + } }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" #include // std::sort @@ -6435,10 +6436,51 @@ CodeGen::CodeGenModule &M) const override; bool shouldEmitStaticExternCAliases() const override; + llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { + return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); + } + + llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { + return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); + } + + bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) const override { + emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); + return true; + } + + bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) const override { + emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); + return true; + } + private: - // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the + // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. - static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand); + static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, + int Operand); + + static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) { + llvm::Value *Handle = nullptr; + llvm::Constant *C = + llvm::dyn_cast(Src.getAddress(CGF).getPointer()); + // Lookup `addrspacecast` through the constant pointer if any. + if (auto *ASC = llvm::dyn_cast_or_null(C)) + C = llvm::cast(ASC->getPointerOperand()); + if (auto *GV = llvm::dyn_cast_or_null(C)) { + // Load the handle from the specific global variable using + // `nvvm.texsurf.handle.internal` intrinsic. + Handle = CGF.EmitRuntimeCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, + {GV->getType()}), + {GV}, "texsurf_handle"); + } else + Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); + CGF.EmitStoreOfScalar(Handle, Dst); + } }; /// Checks if the type is unsupported directly by the current target. @@ -6511,8 +6553,17 @@ Ty = EnumTy->getDecl()->getIntegerType(); // Return aggregates type as indirect by value - if (isAggregateTypeForABI(Ty)) + if (isAggregateTypeForABI(Ty)) { + // Under CUDA device compilation, tex/surf builtin types are replaced with + // object types and passed directly. + if (getContext().getLangOpts().CUDAIsDevice) { + if (Ty->isCUDADeviceBuiltinSurfaceType()) + return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); + if (Ty->isCUDADeviceBuiltinTextureType()) + return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext())); + } return getNaturalAlignIndirect(Ty, /* byval */ true); + } return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); @@ -6540,6 +6591,17 @@ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (GV->isDeclaration()) return; + const VarDecl *VD = dyn_cast_or_null(D); + if (VD) { + if (M.getLangOpts().CUDA) { + if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) + addNVVMMetadata(GV, "surface", 1); + else if (VD->getType()->isCUDADeviceBuiltinTextureType()) + addNVVMMetadata(GV, "texture", 1); + return; + } + } + const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; @@ -6588,16 +6650,16 @@ } } -void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, - int Operand) { - llvm::Module *M = F->getParent(); +void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, + StringRef Name, int Operand) { + llvm::Module *M = GV->getParent(); llvm::LLVMContext &Ctx = M->getContext(); // Get "nvvm.annotations" metadata node llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name), + llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; // Append metadata to nvvm.annotations diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h --- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -83,11 +83,13 @@ #if CUDA_VERSION < 9000 #define __CUDABE__ #else +#define __CUDACC__ #define __CUDA_LIBDEVICE__ #endif // Disables definitions of device-side runtime support stubs in // cuda_device_runtime_api.h #include "driver_types.h" +#undef __CUDACC__ #include "host_config.h" #include "host_defines.h" diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6939,6 +6939,16 @@ handleSimpleAttributeWithExclusions(S, D, AL); break; + case ParsedAttr::AT_CUDADeviceBuiltinSurfaceType: + handleSimpleAttributeWithExclusions(S, D, + AL); + break; + case ParsedAttr::AT_CUDADeviceBuiltinTextureType: + handleSimpleAttributeWithExclusions(S, D, + AL); + break; case ParsedAttr::AT_GNUInline: handleGNUInlineAttr(S, D, AL); break; diff --git a/clang/test/CodeGenCUDA/surface.cu b/clang/test/CodeGenCUDA/surface.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/surface.cu @@ -0,0 +1,37 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck --check-prefix=DEVICE %s +// RUN: echo "GPU binary would be here" > %t +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s + +struct surfaceReference { + int desc; +}; + +template +struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference { +}; + +// On the device side, surface references are represented as `i64` handles. +// DEVICE: @surf = addrspace(1) global i64 undef, align 4 +// On the host side, they remain in the original type. +// HOST: @surf = internal global %struct.surface +// HOST: @0 = private unnamed_addr constant [5 x i8] c"surf\00" +surface surf; + +__attribute__((device)) int suld_2d_zero(surface, int, int) asm("llvm.nvvm.suld.2d.i32.zero"); + +// DEVICE-LABEL: i32 @_Z3fooii(i32 %x, i32 %y) +// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf) +// DEVICE: call i32 @llvm.nvvm.suld.2d.i32.zero(i64 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) +__attribute__((device)) int foo(int x, int y) { + return suld_2d_zero(surf, x, y); +} + +// HOST: define internal void @[[PREFIX:__cuda]]_register_globals +// Texture references need registering with correct arguments. +// HOST: call void @[[PREFIX]]RegisterSurface(i8** %0, i8*{{.*}}({{.*}}@surf{{.*}}), i8*{{.*}}({{.*}}@0{{.*}}), i8*{{.*}}({{.*}}@0{{.*}}), i32 2, i32 0) + +// They also need annotating in metadata. +// DEVICE: !0 = !{i64 addrspace(1)* @surf, !"surface", i32 1} diff --git a/clang/test/CodeGenCUDA/texture.cu b/clang/test/CodeGenCUDA/texture.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/texture.cu @@ -0,0 +1,55 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck --check-prefix=DEVICE %s +// RUN: echo "GPU binary would be here" > %t +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s + +struct textureReference { + int desc; +}; + +enum ReadMode { + ElementType = 0, + NormalizedFloat = 1 +}; + +template +struct __attribute__((device_builtin_texture_type)) texture : public textureReference { +}; + +// On the device side, texture references are represented as `i64` handles. +// DEVICE: @tex = addrspace(1) global i64 undef, align 4 +// DEVICE: @norm = addrspace(1) global i64 undef, align 4 +// On the host side, they remain in the original type. +// HOST: @tex = internal global %struct.texture +// HOST: @norm = internal global %struct.texture +// HOST: @0 = private unnamed_addr constant [4 x i8] c"tex\00" +// HOST: @1 = private unnamed_addr constant [5 x i8] c"norm\00" +texture tex; +texture norm; + +struct v4f { + float x, y, z, w; +}; + +__attribute__((device)) v4f tex2d_ld(texture, float, float) asm("llvm.nvvm.tex.unified.2d.v4f32.f32"); +__attribute__((device)) v4f tex2d_ld(texture, int, int) asm("llvm.nvvm.tex.unified.2d.v4f32.s32"); + +// DEVICE-LABEL: float @_Z3fooff(float %x, float %y) +// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex) +// DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.f32(i64 %{{.*}}, float %{{.*}}, float %{{.*}}) +// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @norm) +// DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.s32(i64 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) +__attribute__((device)) float foo(float x, float y) { + return tex2d_ld(tex, x, y).x + tex2d_ld(norm, int(x), int(y)).x; +} + +// HOST: define internal void @[[PREFIX:__cuda]]_register_globals +// Texture references need registering with correct arguments. +// HOST: call void @[[PREFIX]]RegisterTexture(i8** %0, i8*{{.*}}({{.*}}@tex{{.*}}), i8*{{.*}}({{.*}}@0{{.*}}), i8*{{.*}}({{.*}}@0{{.*}}), i32 2, i32 0, i32 0) +// HOST: call void @[[PREFIX]]RegisterTexture(i8** %0, i8*{{.*}}({{.*}}@norm{{.*}}), i8*{{.*}}({{.*}}@1{{.*}}), i8*{{.*}}({{.*}}@1{{.*}}), i32 2, i32 1, i32 0) + +// They also need annotating in metadata. +// DEVICE: !0 = !{i64 addrspace(1)* @tex, !"texture", i32 1} +// DEVICE: !1 = !{i64 addrspace(1)* @norm, !"texture", i32 1} diff --git a/clang/test/SemaCUDA/attr-declspec.cu b/clang/test/SemaCUDA/attr-declspec.cu --- a/clang/test/SemaCUDA/attr-declspec.cu +++ b/clang/test/SemaCUDA/attr-declspec.cu @@ -6,16 +6,21 @@ // RUN: %clang_cc1 -DEXPECT_WARNINGS -fms-extensions -fsyntax-only -verify -x c %s #if defined(EXPECT_WARNINGS) -// expected-warning@+12 {{'__device__' attribute ignored}} -// expected-warning@+12 {{'__global__' attribute ignored}} -// expected-warning@+12 {{'__constant__' attribute ignored}} -// expected-warning@+12 {{'__shared__' attribute ignored}} -// expected-warning@+12 {{'__host__' attribute ignored}} +// expected-warning@+17 {{'__device__' attribute ignored}} +// expected-warning@+17 {{'__global__' attribute ignored}} +// expected-warning@+17 {{'__constant__' attribute ignored}} +// expected-warning@+17 {{'__shared__' attribute ignored}} +// expected-warning@+17 {{'__host__' attribute ignored}} +// expected-warning@+22 {{'__device_builtin_surface_type__' attribute ignored}} +// expected-warning@+22 {{'__device_builtin_texture_type__' attribute ignored}} +// expected-warning@+22 {{'__device_builtin_surface_type__' attribute ignored}} +// expected-warning@+22 {{'__device_builtin_texture_type__' attribute ignored}} // // (Currently we don't for the other attributes. They are implemented with // IgnoredAttr, which is ignored irrespective of any LangOpts.) #else -// expected-no-diagnostics +// expected-warning@+14 {{'__device_builtin_surface_type__' attribute only applies to types}} +// expected-warning@+14 {{'__device_builtin_texture_type__' attribute only applies to types}} #endif __declspec(__device__) void f_device(); @@ -30,5 +35,7 @@ __declspec(__cudart_builtin__) void f_cudart_builtin(); __declspec(__device_builtin_surface_type__) unsigned long long surface_var; __declspec(__device_builtin_texture_type__) unsigned long long texture_var; +struct __declspec(__device_builtin_surface_type__) surf_ref {}; +struct __declspec(__device_builtin_texture_type__) tex_ref {}; // Note that there's no __declspec spelling of nv_weak. diff --git a/clang/test/SemaCUDA/attributes-on-non-cuda.cu b/clang/test/SemaCUDA/attributes-on-non-cuda.cu --- a/clang/test/SemaCUDA/attributes-on-non-cuda.cu +++ b/clang/test/SemaCUDA/attributes-on-non-cuda.cu @@ -7,16 +7,21 @@ // RUN: %clang_cc1 -DEXPECT_WARNINGS -fsyntax-only -verify -x c %s #if defined(EXPECT_WARNINGS) -// expected-warning@+12 {{'device' attribute ignored}} -// expected-warning@+12 {{'global' attribute ignored}} -// expected-warning@+12 {{'constant' attribute ignored}} -// expected-warning@+12 {{'shared' attribute ignored}} -// expected-warning@+12 {{'host' attribute ignored}} +// expected-warning@+17 {{'device' attribute ignored}} +// expected-warning@+17 {{'global' attribute ignored}} +// expected-warning@+17 {{'constant' attribute ignored}} +// expected-warning@+17 {{'shared' attribute ignored}} +// expected-warning@+17 {{'host' attribute ignored}} +// expected-warning@+23 {{'device_builtin_surface_type' attribute ignored}} +// expected-warning@+23 {{'device_builtin_texture_type' attribute ignored}} +// expected-warning@+23 {{'device_builtin_surface_type' attribute ignored}} +// expected-warning@+23 {{'device_builtin_texture_type' attribute ignored}} // // NOTE: IgnoredAttr in clang which is used for the rest of // attributes ignores LangOpts, so there are no warnings. #else -// expected-no-diagnostics +// expected-warning@+15 {{'device_builtin_surface_type' attribute only applies to types}} +// expected-warning@+15 {{'device_builtin_texture_type' attribute only applies to types}} #endif __attribute__((device)) void f_device(); @@ -32,3 +37,5 @@ __attribute__((nv_weak)) void f_nv_weak(); __attribute__((device_builtin_surface_type)) unsigned long long surface_var; __attribute__((device_builtin_texture_type)) unsigned long long texture_var; +struct __attribute__((device_builtin_surface_type)) surf_ref {}; +struct __attribute__((device_builtin_texture_type)) tex_ref {}; diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -599,6 +599,25 @@ } }; +class AddrSpaceCastOperator + : public ConcreteOperator { + friend class AddrSpaceCastInst; + friend class ConstantExpr; + +public: + Value *getPointerOperand() { return getOperand(0); } + + const Value *getPointerOperand() const { return getOperand(0); } + + unsigned getSrcAddressSpace() const { + return getPointerOperand()->getType()->getPointerAddressSpace(); + } + + unsigned getDestAddressSpace() const { + return getType()->getPointerAddressSpace(); + } +}; + } // end namespace llvm #endif // LLVM_IR_OPERATOR_H