diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2111,6 +2111,11 @@ /// than implicitly __strong. bool isObjCARCImplicitlyUnretainedType() const; + /// Check if the type is the CUDA device builtin surface type. + bool isCUDADeviceBuiltinSurfaceType() const; + /// Check if the type is the CUDA device builtin texture type. + bool isCUDADeviceBuiltinTextureType() const; + /// Return the implicit lifetime for this type, which must not be dependent. Qualifiers::ObjCLifetime getObjCARCImplicitLifetime() const; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1064,16 +1064,20 @@ let LangOpts = [CUDA]; } -def CUDADeviceBuiltinSurfaceType : IgnoredAttr { +def CUDADeviceBuiltinSurfaceType : InheritableAttr { let Spellings = [GNU<"device_builtin_surface_type">, Declspec<"__device_builtin_surface_type__">]; let LangOpts = [CUDA]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [CUDADeviceBuiltinSurfaceTypeDocs]; } -def CUDADeviceBuiltinTextureType : IgnoredAttr { +def CUDADeviceBuiltinTextureType : InheritableAttr { let Spellings = [GNU<"device_builtin_texture_type">, Declspec<"__device_builtin_texture_type__">]; let LangOpts = [CUDA]; + let Subjects = SubjectList<[CXXRecord]>; + let Documentation = [CUDADeviceBuiltinTextureTypeDocs]; } def CUDAGlobal : InheritableAttr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4624,6 +4624,28 @@ }]; } +def CUDADeviceBuiltinSurfaceTypeDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``device_builtin_surface_type`` attribute can be applied to a class +template when declaring the surface reference. A surface reference variable +could be accessed on the host side and, on the device side, might be translated +into an internal surface object, which is established through surface bind and +unbind runtime APIs. + }]; +} + +def CUDADeviceBuiltinTextureTypeDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``device_builtin_texture_type`` attribute can be applied to a class +template when declaring the texture reference. A texture reference variable +could be accessed on the host side and, on the device side, might be translated +into an internal texture object, which is established through texture bind and +unbind runtime APIs. + }]; +} + def LifetimeOwnerDocs : Documentation { let Category = DocCatDecl; let Content = [{ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7967,6 +7967,22 @@ def note_cuda_ovl_candidate_target_mismatch : Note< "candidate template ignored: target attributes do not match">; +def err_cuda_device_builtin_surftex_cls_template : Error< + "illegal device builtin %select{surface|texture}0 reference " + "class template %1 declared here">; +def note_cuda_device_builtin_surftex_cls_should_have_n_args : Note< + "%0 needs to have exactly %1 template parameters">; +def note_cuda_device_builtin_surftex_cls_should_have_match_arg : Note< + "the %select{1st|2nd|3rd}1 template parameter of %0 needs to be " + "%select{a type|an integer or enum value}2">; + +def err_cuda_device_builtin_surftex_ref_decl : Error< + "illegal device builtin %select{surface|texture}0 reference " + "type %1 declared here">; +def note_cuda_device_builtin_surftex_should_be_template_class : Note< + "%0 needs to be instantiated from a class template with proper " + "template arguments">; + def warn_non_pod_vararg_with_format_string : Warning< "cannot pass %select{non-POD|non-trivial}0 object of type %1 to variadic " "%select{function|block|method|constructor}2; expected type from format " diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -4093,6 +4093,20 @@ return Pointee->isVoidType() || Pointee->isRecordType(); } +/// Check if the specified type is the CUDA device builtin surface type. +bool Type::isCUDADeviceBuiltinSurfaceType() const { + if (const auto *RT = getAs()) + return RT->getDecl()->hasAttr(); + return false; +} + +/// Check if the specified type is the CUDA device builtin texture type. +bool Type::isCUDADeviceBuiltinTextureType() const { + if (const auto *RT = getAs()) + return RT->getDecl()->hasAttr(); + return false; +} + bool Type::hasSizedVLAType() const { if (!isVariablyModifiedType()) return false; diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -50,7 +50,7 @@ struct VarInfo { llvm::GlobalVariable *Var; const VarDecl *D; - unsigned Flag; + DeviceVarFlags Flags; }; llvm::SmallVector DeviceVars; /// Keeps track of variable containing handle of GPU binary. Populated by @@ -124,8 +124,25 @@ void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override; void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, - unsigned Flags) override { - DeviceVars.push_back({&Var, VD, Flags}); + bool Extern, bool Constant) override { + DeviceVars.push_back({&Var, + VD, + {DeviceVarFlags::Variable, Extern, Constant, + /*Normalized*/ false, /*Type*/ 0}}); + } + void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type) override { + DeviceVars.push_back({&Var, + VD, + {DeviceVarFlags::Surface, Extern, /*Constant*/ false, + /*Normalized*/ false, Type}}); + } + void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type, bool Normalized) override { + DeviceVars.push_back({&Var, + VD, + {DeviceVarFlags::Texture, Extern, /*Constant*/ false, + Normalized, Type}}); } /// Creates module constructor function @@ -431,22 +448,55 @@ llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, RegisterVarParams, false), addUnderscoredPrefixToName("RegisterVar")); + // void __cudaRegisterSurface(void **, const struct surfaceReference *, + // const void **, const char *, int, int); + llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction( + llvm::FunctionType::get( + VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy}, + false), + addUnderscoredPrefixToName("RegisterSurface")); + // void __cudaRegisterTexture(void **, const struct textureReference *, + // const void **, const char *, int, int, int) + llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction( + llvm::FunctionType::get( + VoidTy, + {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy}, + false), + addUnderscoredPrefixToName("RegisterTexture")); for (auto &&Info : DeviceVars) { llvm::GlobalVariable *Var = Info.Var; - unsigned Flags = Info.Flag; llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D)); - uint64_t VarSize = - CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); - llvm::Value *Args[] = { - &GpuBinaryHandlePtr, - Builder.CreateBitCast(Var, VoidPtrTy), - VarName, - VarName, - llvm::ConstantInt::get(IntTy, (Flags & ExternDeviceVar) ? 1 : 0), - llvm::ConstantInt::get(IntTy, VarSize), - llvm::ConstantInt::get(IntTy, (Flags & ConstantDeviceVar) ? 1 : 0), - llvm::ConstantInt::get(IntTy, 0)}; - Builder.CreateCall(RegisterVar, Args); + switch (Info.Flags.Kind) { + case DeviceVarFlags::Variable: { + uint64_t VarSize = + CGM.getDataLayout().getTypeAllocSize(Var->getValueType()); + llvm::Value *Args[] = {&GpuBinaryHandlePtr, + Builder.CreateBitCast(Var, VoidPtrTy), + VarName, + VarName, + llvm::ConstantInt::get(IntTy, Info.Flags.Extern), + llvm::ConstantInt::get(IntTy, VarSize), + llvm::ConstantInt::get(IntTy, Info.Flags.Constant), + llvm::ConstantInt::get(IntTy, 0)}; + Builder.CreateCall(RegisterVar, Args); + break; + } + case DeviceVarFlags::Surface: + Builder.CreateCall( + RegisterSurf, + {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName, + VarName, llvm::ConstantInt::get(IntTy, Info.Flags.SurfTexType), + llvm::ConstantInt::get(IntTy, Info.Flags.Extern)}); + break; + case DeviceVarFlags::Texture: + Builder.CreateCall( + RegisterTex, + {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName, + VarName, llvm::ConstantInt::get(IntTy, Info.Flags.SurfTexType), + llvm::ConstantInt::get(IntTy, Info.Flags.Normalized), + llvm::ConstantInt::get(IntTy, Info.Flags.Extern)}); + break; + } } Builder.CreateRetVoid(); diff --git a/clang/lib/CodeGen/CGCUDARuntime.h b/clang/lib/CodeGen/CGCUDARuntime.h --- a/clang/lib/CodeGen/CGCUDARuntime.h +++ b/clang/lib/CodeGen/CGCUDARuntime.h @@ -42,9 +42,17 @@ public: // Global variable properties that must be passed to CUDA runtime. - enum DeviceVarFlags { - ExternDeviceVar = 0x01, // extern - ConstantDeviceVar = 0x02, // __constant__ + struct DeviceVarFlags { + enum DeviceVarKind : unsigned { + Variable, // Variable + Surface, // Builtin surface + Texture, // Builtin texture + }; + DeviceVarKind Kind : 2; + unsigned Extern : 1; + unsigned Constant : 1; // Constant variable. + unsigned Normalized : 1; // Normalized texture. + int SurfTexType; // Type of surface/texutre. }; CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {} @@ -57,7 +65,11 @@ /// Emits a kernel launch stub. virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0; virtual void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var, - unsigned Flags) = 0; + bool Extern, bool Constant) = 0; + virtual void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type) = 0; + virtual void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var, + bool Extern, int Type, bool Normalized) = 0; /// Constructs and returns a module initialization function or nullptr if it's /// not needed. Must be called after all kernels have been emitted. diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -15,6 +15,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" @@ -1946,6 +1947,18 @@ } } + if (getLangOpts().CUDAIsDevice) { + if (Ty->isCUDADeviceBuiltinSurfaceType()) { + if (getTargetHooks().emitCUDADeviceBuiltinSurfaceDeviceCopy(*this, Dest, + Src)) + return; + } else if (Ty->isCUDADeviceBuiltinTextureType()) { + if (getTargetHooks().emitCUDADeviceBuiltinTextureDeviceCopy(*this, Dest, + Src)) + return; + } + } + // Aggregate assignment turns into llvm.memcpy. This is almost valid per // C99 6.5.16.1p3, which states "If the value being stored in an object is // read from another object that overlaps in anyway the storage of the first diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -713,6 +713,19 @@ TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) { if (!TBAA) return TBAAAccessInfo(); + if (getLangOpts().CUDAIsDevice) { + // As CUDA builtin surface/texture types are replaced, skip generating TBAA + // access info. + if (AccessType->isCUDADeviceBuiltinSurfaceType()) { + if (getTargetCodeGenInfo().getCUDADeviceBuiltinSurfaceDeviceType() != + nullptr) + return TBAAAccessInfo(); + } else if (AccessType->isCUDADeviceBuiltinTextureType()) { + if (getTargetCodeGenInfo().getCUDADeviceBuiltinTextureDeviceType() != + nullptr) + return TBAAAccessInfo(); + } + } return TBAA->getAccessInfo(AccessType); } @@ -2507,7 +2520,9 @@ !Global->hasAttr() && !Global->hasAttr() && !Global->hasAttr() && - !(LangOpts.HIP && Global->hasAttr())) + !(LangOpts.HIP && Global->hasAttr()) && + !Global->getType()->isCUDADeviceBuiltinSurfaceType() && + !Global->getType()->isCUDADeviceBuiltinTextureType()) return; } else { // We need to emit host-side 'shadows' for all global @@ -3907,12 +3922,16 @@ !getLangOpts().CUDAIsDevice && (D->hasAttr() || D->hasAttr() || D->hasAttr()); + bool IsCUDADeviceShadowVar = + getLangOpts().CUDAIsDevice && + (D->getType()->isCUDADeviceBuiltinSurfaceType() || + D->getType()->isCUDADeviceBuiltinTextureType()); // HIP pinned shadow of initialized host-side global variables are also // left undefined. bool IsHIPPinnedShadowVar = getLangOpts().CUDAIsDevice && D->hasAttr(); - if (getLangOpts().CUDA && - (IsCUDASharedVar || IsCUDAShadowVar || IsHIPPinnedShadowVar)) + if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar || + IsCUDADeviceShadowVar || IsHIPPinnedShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); else if (D->hasAttr()) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); @@ -4023,25 +4042,48 @@ if (D->hasAttr() || D->hasAttr() || D->hasAttr()) { Linkage = llvm::GlobalValue::InternalLinkage; - - // Shadow variables and their properties must be registered - // with CUDA runtime. - unsigned Flags = 0; - if (!D->hasDefinition()) - Flags |= CGCUDARuntime::ExternDeviceVar; - if (D->hasAttr()) - Flags |= CGCUDARuntime::ConstantDeviceVar; - // Extern global variables will be registered in the TU where they are - // defined. + // Shadow variables and their properties must be registered with CUDA + // runtime. Skip Extern global variables, which will be registered in + // the TU where they are defined. if (!D->hasExternalStorage()) - getCUDARuntime().registerDeviceVar(D, *GV, Flags); - } else if (D->hasAttr()) + getCUDARuntime().registerDeviceVar(D, *GV, !D->hasDefinition(), + D->hasAttr()); + } else if (D->hasAttr()) { // __shared__ variables are odd. Shadows do get created, but // they are not registered with the CUDA runtime, so they // can't really be used to access their device-side // counterparts. It's not clear yet whether it's nvcc's bug or // a feature, but we've got to do the same for compatibility. Linkage = llvm::GlobalValue::InternalLinkage; + } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() || + D->getType()->isCUDADeviceBuiltinTextureType()) { + // Builtin surfaces and textures and their template arguments are + // also registered with CUDA runtime. + Linkage = llvm::GlobalValue::InternalLinkage; + const ClassTemplateSpecializationDecl *TD = + cast( + D->getType()->getAs()->getDecl()); + const TemplateArgumentList &Args = TD->getTemplateArgs(); + if (TD->hasAttr()) { + assert(Args.size() == 2 && + "Unexpected number of template arguments of CUDA device " + "builtin surface type."); + auto SurfType = Args[1].getAsIntegral(); + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceSurf(D, *GV, !D->hasDefinition(), + SurfType.getSExtValue()); + } else { + assert(Args.size() == 3 && + "Unexpected number of template arguments of CUDA device " + "builtin texture type."); + auto TexType = Args[1].getAsIntegral(); + auto Normalized = Args[2].getAsIntegral(); + if (!D->hasExternalStorage()) + getCUDARuntime().registerDeviceTex(D, *GV, !D->hasDefinition(), + TexType.getSExtValue(), + Normalized.getZExtValue()); + } + } } } diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -383,6 +383,20 @@ const Type *Ty = T.getTypePtr(); + // For the device-side compilation, CUDA device builtin surface/texture types + // may be represented in different types. + if (Context.getLangOpts().CUDAIsDevice) { + if (T->isCUDADeviceBuiltinSurfaceType()) { + if (auto *Ty = CGM.getTargetCodeGenInfo() + .getCUDADeviceBuiltinSurfaceDeviceType()) + return Ty; + } else if (T->isCUDADeviceBuiltinTextureType()) { + if (auto *Ty = CGM.getTargetCodeGenInfo() + .getCUDADeviceBuiltinTextureDeviceType()) + return Ty; + } + } + // RecordTypes are cached and processed specially. if (const RecordType *RT = dyn_cast(Ty)) return ConvertRecordDeclType(RT->getDecl()); diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h --- a/clang/lib/CodeGen/TargetInfo.h +++ b/clang/lib/CodeGen/TargetInfo.h @@ -315,6 +315,32 @@ virtual bool shouldEmitStaticExternCAliases() const { return true; } virtual void setCUDAKernelCallingConvention(const FunctionType *&FT) const {} + + /// Return the device-side type for the CUDA device builtin surface type. + virtual llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const { + // By default, no change from the original one. + return nullptr; + } + /// Return the device-side type for the CUDA device builtin texture type. + virtual llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const { + // By default, no change from the original one. + return nullptr; + } + + /// Emit the device-side copy of the builtin surface type. + virtual bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, + LValue Dst, + LValue Src) const { + // DO NOTHING by default. + return false; + } + /// Emit the device-side copy of the builtin texture type. + virtual bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, + LValue Dst, + LValue Src) const { + // DO NOTHING by default. + return false; + } }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Type.h" #include "llvm/Support/raw_ostream.h" #include // std::sort @@ -6414,9 +6415,14 @@ namespace { +class NVPTXTargetCodeGenInfo; + class NVPTXABIInfo : public ABIInfo { + NVPTXTargetCodeGenInfo &CGInfo; + public: - NVPTXABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} + NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info) + : ABIInfo(CGT), CGInfo(Info) {} ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType Ty) const; @@ -6429,16 +6435,61 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { public: NVPTXTargetCodeGenInfo(CodeGenTypes &CGT) - : TargetCodeGenInfo(new NVPTXABIInfo(CGT)) {} + : TargetCodeGenInfo(new NVPTXABIInfo(CGT, *this)) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; bool shouldEmitStaticExternCAliases() const override; + llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override { + // On the device side, surface reference is represented as an object handle + // in 64-bit integer. + return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); + } + + llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override { + // On the device side, texture reference is represented as an object handle + // in 64-bit integer. + return llvm::Type::getInt64Ty(getABIInfo().getVMContext()); + } + + bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) const override { + emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); + return true; + } + + bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) const override { + emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src); + return true; + } + private: - // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the + // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. - static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand); + static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, + int Operand); + + static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, + LValue Src) { + llvm::Value *Handle = nullptr; + llvm::Constant *C = + llvm::dyn_cast(Src.getAddress(CGF).getPointer()); + // Lookup `addrspacecast` through the constant pointer if any. + if (auto *ASC = llvm::dyn_cast_or_null(C)) + C = llvm::cast(ASC->getPointerOperand()); + if (auto *GV = llvm::dyn_cast_or_null(C)) { + // Load the handle from the specific global variable using + // `nvvm.texsurf.handle.internal` intrinsic. + Handle = CGF.EmitRuntimeCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal, + {GV->getType()}), + {GV}, "texsurf_handle"); + } else + Handle = CGF.EmitLoadOfScalar(Src, SourceLocation()); + CGF.EmitStoreOfScalar(Handle, Dst); + } }; /// Checks if the type is unsupported directly by the current target. @@ -6511,8 +6562,19 @@ Ty = EnumTy->getDecl()->getIntegerType(); // Return aggregates type as indirect by value - if (isAggregateTypeForABI(Ty)) + if (isAggregateTypeForABI(Ty)) { + // Under CUDA device compilation, tex/surf builtin types are replaced with + // object types and passed directly. + if (getContext().getLangOpts().CUDAIsDevice) { + if (Ty->isCUDADeviceBuiltinSurfaceType()) + return ABIArgInfo::getDirect( + CGInfo.getCUDADeviceBuiltinSurfaceDeviceType()); + if (Ty->isCUDADeviceBuiltinTextureType()) + return ABIArgInfo::getDirect( + CGInfo.getCUDADeviceBuiltinTextureDeviceType()); + } return getNaturalAlignIndirect(Ty, /* byval */ true); + } return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); @@ -6540,6 +6602,17 @@ const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (GV->isDeclaration()) return; + const VarDecl *VD = dyn_cast_or_null(D); + if (VD) { + if (M.getLangOpts().CUDA) { + if (VD->getType()->isCUDADeviceBuiltinSurfaceType()) + addNVVMMetadata(GV, "surface", 1); + else if (VD->getType()->isCUDADeviceBuiltinTextureType()) + addNVVMMetadata(GV, "texture", 1); + return; + } + } + const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; @@ -6588,16 +6661,16 @@ } } -void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, - int Operand) { - llvm::Module *M = F->getParent(); +void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, + StringRef Name, int Operand) { + llvm::Module *M = GV->getParent(); llvm::LLVMContext &Ctx = M->getContext(); // Get "nvvm.annotations" metadata node llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); llvm::Metadata *MDVals[] = { - llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, Name), + llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; // Append metadata to nvvm.annotations diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h --- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -83,13 +83,15 @@ #if CUDA_VERSION < 9000 #define __CUDABE__ #else +#define __CUDACC__ #define __CUDA_LIBDEVICE__ #endif // Disables definitions of device-side runtime support stubs in // cuda_device_runtime_api.h +#include "host_defines.h" +#undef __CUDACC__ #include "driver_types.h" #include "host_config.h" -#include "host_defines.h" // Temporarily replace "nv_weak" with weak, so __attribute__((nv_weak)) in // cuda_device_runtime_api.h ends up being __attribute__((weak)) which is the diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6934,6 +6934,16 @@ handleSimpleAttributeWithExclusions(S, D, AL); break; + case ParsedAttr::AT_CUDADeviceBuiltinSurfaceType: + handleSimpleAttributeWithExclusions(S, D, + AL); + break; + case ParsedAttr::AT_CUDADeviceBuiltinTextureType: + handleSimpleAttributeWithExclusions(S, D, + AL); + break; case ParsedAttr::AT_GNUInline: handleGNUInlineAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -5877,6 +5877,123 @@ } } +static void checkCUDADeviceBuiltinSurfaceClassTemplate(Sema &S, + CXXRecordDecl *Class) { + bool ErrorReported = false; + auto reportIllegalClassTemplate = [&ErrorReported](Sema &S, + ClassTemplateDecl *TD) { + if (ErrorReported) + return; + S.Diag(TD->getLocation(), + diag::err_cuda_device_builtin_surftex_cls_template) + << /*surface*/ 0 << TD; + ErrorReported = true; + }; + + ClassTemplateDecl *TD = Class->getDescribedClassTemplate(); + if (!TD) { + auto *SD = dyn_cast(Class); + if (!SD) { + S.Diag(Class->getLocation(), + diag::err_cuda_device_builtin_surftex_ref_decl) + << /*surface*/ 0 << Class; + S.Diag(Class->getLocation(), + diag::note_cuda_device_builtin_surftex_should_be_template_class) + << Class; + return; + } + TD = SD->getSpecializedTemplate(); + } + + TemplateParameterList *Params = TD->getTemplateParameters(); + unsigned N = Params->size(); + + if (N != 2) { + reportIllegalClassTemplate(S, TD); + S.Diag(TD->getLocation(), + diag::note_cuda_device_builtin_surftex_cls_should_have_n_args) + << TD << 2; + } + if (N > 0 && !isa(Params->getParam(0))) { + reportIllegalClassTemplate(S, TD); + S.Diag(TD->getLocation(), + diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg) + << TD << /*1st*/ 0 << /*type*/ 0; + } + if (N > 1) { + auto *NTTP = dyn_cast(Params->getParam(1)); + if (!NTTP || !NTTP->getType()->isIntegralOrEnumerationType()) { + reportIllegalClassTemplate(S, TD); + S.Diag(TD->getLocation(), + diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg) + << TD << /*2nd*/ 1 << /*integer*/ 1; + } + } +} + +static void checkCUDADeviceBuiltinTextureClassTemplate(Sema &S, + CXXRecordDecl *Class) { + bool ErrorReported = false; + auto reportIllegalClassTemplate = [&ErrorReported](Sema &S, + ClassTemplateDecl *TD) { + if (ErrorReported) + return; + S.Diag(TD->getLocation(), + diag::err_cuda_device_builtin_surftex_cls_template) + << /*texture*/ 1 << TD; + ErrorReported = true; + }; + + ClassTemplateDecl *TD = Class->getDescribedClassTemplate(); + if (!TD) { + auto *SD = dyn_cast(Class); + if (!SD) { + S.Diag(Class->getLocation(), + diag::err_cuda_device_builtin_surftex_ref_decl) + << /*texture*/ 1 << Class; + S.Diag(Class->getLocation(), + diag::note_cuda_device_builtin_surftex_should_be_template_class) + << Class; + return; + } + TD = SD->getSpecializedTemplate(); + } + + TemplateParameterList *Params = TD->getTemplateParameters(); + unsigned N = Params->size(); + + if (N != 3) { + reportIllegalClassTemplate(S, TD); + S.Diag(TD->getLocation(), + diag::note_cuda_device_builtin_surftex_cls_should_have_n_args) + << TD << 3; + } + if (N > 0 && !isa(Params->getParam(0))) { + reportIllegalClassTemplate(S, TD); + S.Diag(TD->getLocation(), + diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg) + << TD << /*1st*/ 0 << /*type*/ 0; + } + if (N > 1) { + auto *NTTP = dyn_cast(Params->getParam(1)); + if (!NTTP || !NTTP->getType()->isIntegralOrEnumerationType()) { + reportIllegalClassTemplate(S, TD); + S.Diag(TD->getLocation(), + diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg) + << TD << /*2nd*/ 1 << /*integer*/ 1; + } + } + if (N > 2) { + auto *NTTP = dyn_cast(Params->getParam(2)); + if (!NTTP || !NTTP->getType()->isIntegralOrEnumerationType()) { + reportIllegalClassTemplate(S, TD); + S.Diag(TD->getLocation(), + diag::note_cuda_device_builtin_surftex_cls_should_have_match_arg) + << TD << /*3rd*/ 2 << /*integer*/ 1; + } + } +} + void Sema::checkClassLevelCodeSegAttribute(CXXRecordDecl *Class) { // Mark any compiler-generated routines with the implicit code_seg attribute. for (auto *Method : Class->methods()) { @@ -6657,6 +6774,13 @@ // is especially required for cases like vtable assumption loads. MarkVTableUsed(Record->getInnerLocStart(), Record); } + + if (getLangOpts().CUDA) { + if (Record->hasAttr()) + checkCUDADeviceBuiltinSurfaceClassTemplate(*this, Record); + else if (Record->hasAttr()) + checkCUDADeviceBuiltinTextureClassTemplate(*this, Record); + } } /// Look up the special member function that would be called by a special diff --git a/clang/test/CodeGenCUDA/surface.cu b/clang/test/CodeGenCUDA/surface.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/surface.cu @@ -0,0 +1,42 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck --check-prefix=DEVICE %s +// RUN: echo "GPU binary would be here" > %t +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s + +struct surfaceReference { + int desc; +}; + +template +struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference { +}; + +// Partial specialization over `void`. +template +struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference { +}; + +// On the device side, surface references are represented as `i64` handles. +// DEVICE: @surf = addrspace(1) global i64 undef, align 4 +// On the host side, they remain in the original type. +// HOST: @surf = internal global %struct.surface +// HOST: @0 = private unnamed_addr constant [5 x i8] c"surf\00" +surface surf; + +__attribute__((device)) int suld_2d_zero(surface, int, int) asm("llvm.nvvm.suld.2d.i32.zero"); + +// DEVICE-LABEL: i32 @_Z3fooii(i32 %x, i32 %y) +// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @surf) +// DEVICE: call i32 @llvm.nvvm.suld.2d.i32.zero(i64 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) +__attribute__((device)) int foo(int x, int y) { + return suld_2d_zero(surf, x, y); +} + +// HOST: define internal void @[[PREFIX:__cuda]]_register_globals +// Texture references need registering with correct arguments. +// HOST: call void @[[PREFIX]]RegisterSurface(i8** %0, i8*{{.*}}({{.*}}@surf{{.*}}), i8*{{.*}}({{.*}}@0{{.*}}), i8*{{.*}}({{.*}}@0{{.*}}), i32 2, i32 0) + +// They also need annotating in metadata. +// DEVICE: !0 = !{i64 addrspace(1)* @surf, !"surface", i32 1} diff --git a/clang/test/CodeGenCUDA/texture.cu b/clang/test/CodeGenCUDA/texture.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/texture.cu @@ -0,0 +1,55 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - %s | FileCheck --check-prefix=DEVICE %s +// RUN: echo "GPU binary would be here" > %t +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=HOST %s + +struct textureReference { + int desc; +}; + +enum ReadMode { + ElementType = 0, + NormalizedFloat = 1 +}; + +template +struct __attribute__((device_builtin_texture_type)) texture : public textureReference { +}; + +// On the device side, texture references are represented as `i64` handles. +// DEVICE: @tex = addrspace(1) global i64 undef, align 4 +// DEVICE: @norm = addrspace(1) global i64 undef, align 4 +// On the host side, they remain in the original type. +// HOST: @tex = internal global %struct.texture +// HOST: @norm = internal global %struct.texture +// HOST: @0 = private unnamed_addr constant [4 x i8] c"tex\00" +// HOST: @1 = private unnamed_addr constant [5 x i8] c"norm\00" +texture tex; +texture norm; + +struct v4f { + float x, y, z, w; +}; + +__attribute__((device)) v4f tex2d_ld(texture, float, float) asm("llvm.nvvm.tex.unified.2d.v4f32.f32"); +__attribute__((device)) v4f tex2d_ld(texture, int, int) asm("llvm.nvvm.tex.unified.2d.v4f32.s32"); + +// DEVICE-LABEL: float @_Z3fooff(float %x, float %y) +// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @tex) +// DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.f32(i64 %{{.*}}, float %{{.*}}, float %{{.*}}) +// DEVICE: call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @norm) +// DEVICE: call %struct.v4f @llvm.nvvm.tex.unified.2d.v4f32.s32(i64 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) +__attribute__((device)) float foo(float x, float y) { + return tex2d_ld(tex, x, y).x + tex2d_ld(norm, int(x), int(y)).x; +} + +// HOST: define internal void @[[PREFIX:__cuda]]_register_globals +// Texture references need registering with correct arguments. +// HOST: call void @[[PREFIX]]RegisterTexture(i8** %0, i8*{{.*}}({{.*}}@tex{{.*}}), i8*{{.*}}({{.*}}@0{{.*}}), i8*{{.*}}({{.*}}@0{{.*}}), i32 2, i32 0, i32 0) +// HOST: call void @[[PREFIX]]RegisterTexture(i8** %0, i8*{{.*}}({{.*}}@norm{{.*}}), i8*{{.*}}({{.*}}@1{{.*}}), i8*{{.*}}({{.*}}@1{{.*}}), i32 2, i32 1, i32 0) + +// They also need annotating in metadata. +// DEVICE: !0 = !{i64 addrspace(1)* @tex, !"texture", i32 1} +// DEVICE: !1 = !{i64 addrspace(1)* @norm, !"texture", i32 1} diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -30,6 +30,8 @@ // CHECK-NEXT: CPUSpecific (SubjectMatchRule_function) // CHECK-NEXT: CUDAConstant (SubjectMatchRule_variable) // CHECK-NEXT: CUDADevice (SubjectMatchRule_function, SubjectMatchRule_variable) +// CHECK-NEXT: CUDADeviceBuiltinSurfaceType (SubjectMatchRule_record) +// CHECK-NEXT: CUDADeviceBuiltinTextureType (SubjectMatchRule_record) // CHECK-NEXT: CUDAGlobal (SubjectMatchRule_function) // CHECK-NEXT: CUDAHost (SubjectMatchRule_function) // CHECK-NEXT: CUDALaunchBounds (SubjectMatchRule_objc_method, SubjectMatchRule_hasType_functionType) diff --git a/clang/test/SemaCUDA/attr-declspec.cu b/clang/test/SemaCUDA/attr-declspec.cu --- a/clang/test/SemaCUDA/attr-declspec.cu +++ b/clang/test/SemaCUDA/attr-declspec.cu @@ -6,16 +6,19 @@ // RUN: %clang_cc1 -DEXPECT_WARNINGS -fms-extensions -fsyntax-only -verify -x c %s #if defined(EXPECT_WARNINGS) -// expected-warning@+12 {{'__device__' attribute ignored}} -// expected-warning@+12 {{'__global__' attribute ignored}} -// expected-warning@+12 {{'__constant__' attribute ignored}} -// expected-warning@+12 {{'__shared__' attribute ignored}} -// expected-warning@+12 {{'__host__' attribute ignored}} +// expected-warning@+15 {{'__device__' attribute ignored}} +// expected-warning@+15 {{'__global__' attribute ignored}} +// expected-warning@+15 {{'__constant__' attribute ignored}} +// expected-warning@+15 {{'__shared__' attribute ignored}} +// expected-warning@+15 {{'__host__' attribute ignored}} +// expected-warning@+20 {{'__device_builtin_surface_type__' attribute ignored}} +// expected-warning@+20 {{'__device_builtin_texture_type__' attribute ignored}} // // (Currently we don't for the other attributes. They are implemented with // IgnoredAttr, which is ignored irrespective of any LangOpts.) #else -// expected-no-diagnostics +// expected-warning@+14 {{'__device_builtin_surface_type__' attribute only applies to classes}} +// expected-warning@+14 {{'__device_builtin_texture_type__' attribute only applies to classes}} #endif __declspec(__device__) void f_device(); diff --git a/clang/test/SemaCUDA/attributes-on-non-cuda.cu b/clang/test/SemaCUDA/attributes-on-non-cuda.cu --- a/clang/test/SemaCUDA/attributes-on-non-cuda.cu +++ b/clang/test/SemaCUDA/attributes-on-non-cuda.cu @@ -7,16 +7,19 @@ // RUN: %clang_cc1 -DEXPECT_WARNINGS -fsyntax-only -verify -x c %s #if defined(EXPECT_WARNINGS) -// expected-warning@+12 {{'device' attribute ignored}} -// expected-warning@+12 {{'global' attribute ignored}} -// expected-warning@+12 {{'constant' attribute ignored}} -// expected-warning@+12 {{'shared' attribute ignored}} -// expected-warning@+12 {{'host' attribute ignored}} +// expected-warning@+15 {{'device' attribute ignored}} +// expected-warning@+15 {{'global' attribute ignored}} +// expected-warning@+15 {{'constant' attribute ignored}} +// expected-warning@+15 {{'shared' attribute ignored}} +// expected-warning@+15 {{'host' attribute ignored}} +// expected-warning@+21 {{'device_builtin_surface_type' attribute ignored}} +// expected-warning@+21 {{'device_builtin_texture_type' attribute ignored}} // // NOTE: IgnoredAttr in clang which is used for the rest of // attributes ignores LangOpts, so there are no warnings. #else -// expected-no-diagnostics +// expected-warning@+15 {{'device_builtin_surface_type' attribute only applies to classes}} +// expected-warning@+15 {{'device_builtin_texture_type' attribute only applies to classes}} #endif __attribute__((device)) void f_device(); diff --git a/clang/test/SemaCUDA/bad-attributes.cu b/clang/test/SemaCUDA/bad-attributes.cu --- a/clang/test/SemaCUDA/bad-attributes.cu +++ b/clang/test/SemaCUDA/bad-attributes.cu @@ -70,3 +70,27 @@ __device__ void device_fn() { __constant__ int c; // expected-error {{__constant__ variables must be global}} } + +typedef __attribute__((device_builtin_surface_type)) unsigned long long s0_ty; // expected-warning {{'device_builtin_surface_type' attribute only applies to classes}} +typedef __attribute__((device_builtin_texture_type)) unsigned long long t0_ty; // expected-warning {{'device_builtin_texture_type' attribute only applies to classes}} + +struct __attribute__((device_builtin_surface_type)) s1_ref {}; // expected-error {{illegal device builtin surface reference type 's1_ref' declared here}} +// expected-note@-1 {{'s1_ref' needs to be instantiated from a class template with proper template arguments}} +struct __attribute__((device_builtin_texture_type)) t1_ref {}; // expected-error {{illegal device builtin texture reference type 't1_ref' declared here}} +// expected-note@-1 {{'t1_ref' needs to be instantiated from a class template with proper template arguments}} + +template +struct __attribute__((device_builtin_surface_type)) s2_cls_template {}; // expected-error {{illegal device builtin surface reference class template 's2_cls_template' declared here}} +// expected-note@-1 {{'s2_cls_template' needs to have exactly 2 template parameters}} +template +struct __attribute__((device_builtin_texture_type)) t2_cls_template {}; // expected-error {{illegal device builtin texture reference class template 't2_cls_template' declared here}} +// expected-note@-1 {{'t2_cls_template' needs to have exactly 3 template parameters}} + +template +struct __attribute__((device_builtin_surface_type)) s3_cls_template {}; // expected-error {{illegal device builtin surface reference class template 's3_cls_template' declared here}} +// expected-note@-1 {{the 1st template parameter of 's3_cls_template' needs to be a type}} +// expected-note@-2 {{the 2nd template parameter of 's3_cls_template' needs to be an integer or enum value}} +template +struct __attribute__((device_builtin_texture_type)) t3_cls_template {}; // expected-error {{illegal device builtin texture reference class template 't3_cls_template' declared here}} +// expected-note@-1 {{the 1st template parameter of 't3_cls_template' needs to be a type}} +// expected-note@-2 {{the 3rd template parameter of 't3_cls_template' needs to be an integer or enum value}} diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -599,6 +599,25 @@ } }; +class AddrSpaceCastOperator + : public ConcreteOperator { + friend class AddrSpaceCastInst; + friend class ConstantExpr; + +public: + Value *getPointerOperand() { return getOperand(0); } + + const Value *getPointerOperand() const { return getOperand(0); } + + unsigned getSrcAddressSpace() const { + return getPointerOperand()->getType()->getPointerAddressSpace(); + } + + unsigned getDestAddressSpace() const { + return getType()->getPointerAddressSpace(); + } +}; + } // end namespace llvm #endif // LLVM_IR_OPERATOR_H