diff --git a/clang/include/clang/Basic/AddressSpaces.h b/clang/include/clang/Basic/AddressSpaces.h --- a/clang/include/clang/Basic/AddressSpaces.h +++ b/clang/include/clang/Basic/AddressSpaces.h @@ -56,6 +56,10 @@ ptr32_uptr, ptr64, + // HLSL specific address spaces. + hlsl_cbuffer, + hlsl_tbuffer, + // This denotes the count of language-specific address spaces and also // the offset added to the target-specific address spaces, which are usually // specified by address space attributes __attribute__(address_space(n))). diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -948,7 +948,9 @@ 0, // sycl_private 10, // ptr32_sptr 11, // ptr32_uptr - 12 // ptr64 + 12, // ptr64 + 13, // hlsl_cbuffer + 14, // hlsl_tbuffer }; return &FakeAddrSpaceMap; } else { diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -56,7 +56,10 @@ Private, // sycl_private Generic, // ptr32_sptr Generic, // ptr32_uptr - Generic // ptr64 + Generic, // ptr64 + // HLSL address space values for this map are dummy and they can't be used + 0, // hlsl_cbuffer + 0, // hlsl_tbuffer }; const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = { @@ -79,7 +82,10 @@ Generic, // sycl_private Generic, // ptr32_sptr Generic, // ptr32_uptr - Generic // ptr64 + Generic, // ptr64 + // HLSL address space values for this map are dummy and they can't be used + 0, // hlsl_cbuffer + 0, // hlsl_tbuffer }; } // namespace targets diff --git a/clang/lib/Basic/Targets/DirectX.h b/clang/lib/Basic/Targets/DirectX.h --- a/clang/lib/Basic/Targets/DirectX.h +++ b/clang/lib/Basic/Targets/DirectX.h @@ -40,7 +40,9 @@ 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr - 0 // ptr64 + 0, // ptr64 + 4, // hlsl_cbuffer + 5, // hlsl_tbuffer }; class LLVM_LIBRARY_VISIBILITY DirectXTargetInfo : public TargetInfo { diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -42,7 +42,10 @@ 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr - 0 // ptr64 + 0, // ptr64 + // HLSL address space values for this map are dummy and they can't be used + 0, // hlsl_cbuffer + 0, // hlsl_tbuffer }; /// The DWARF address class. Taken from diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -42,7 +42,10 @@ 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr - 0 // ptr64 + 0, // ptr64 + // HLSL address space values for this map are dummy and they can't be used + 0, // hlsl_cbuffer + 0, // hlsl_tbuffer }; // Used by both the SPIR and SPIR-V targets. @@ -71,7 +74,10 @@ 0, // sycl_private 0, // ptr32_sptr 0, // ptr32_uptr - 0 // ptr64 + 0, // ptr64 + // HLSL address space values for this map are dummy and they can't be used + 0, // hlsl_cbuffer + 0, // hlsl_tbuffer }; // Base class for SPIR and SPIR-V target info. diff --git a/clang/lib/Basic/Targets/TCE.h b/clang/lib/Basic/Targets/TCE.h --- a/clang/lib/Basic/Targets/TCE.h +++ b/clang/lib/Basic/Targets/TCE.h @@ -50,6 +50,9 @@ 0, // ptr32_sptr 0, // ptr32_uptr 0, // ptr64 + // HLSL address space values for this map are dummy and they can't be used + 0, // hlsl_cbuffer + 0, // hlsl_tbuffer }; class LLVM_LIBRARY_VISIBILITY TCETargetInfo : public TargetInfo { diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -43,7 +43,10 @@ 0, // sycl_private 270, // ptr32_sptr 271, // ptr32_uptr - 272 // ptr64 + 272, // ptr64 + // HLSL address space values for this map are dummy and they can't be used + 0, // hlsl_cbuffer + 0, // hlsl_tbuffer }; // X86 target abstract base class; x86-32 and x86-64 are very close, so diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -128,6 +128,7 @@ case Decl::Concept: case Decl::LifetimeExtendedTemporary: case Decl::RequiresExprBody: + case Decl::HLSLBuffer: // None of these decls require codegen support. return; diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -15,21 +15,51 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace llvm { +class GlobalVariable; +class StructType; +} // namespace llvm + namespace clang { +class HLSLBufferDecl; +class VarDecl; +class DeclContext; namespace CodeGen { class CodeGenModule; class CGHLSLRuntime { +public: + struct Buffer { + Buffer(HLSLBufferDecl *D); + llvm::StringRef Name; + // IsCBuffer - Whether the buffer is a cbuffer (and not a tbuffer). + bool IsCBuffer; + unsigned Reg; + unsigned Space; + // Global variable and offset for each constant. + std::vector> Constants; + llvm::StructType *LayoutStruct = nullptr; + }; + protected: CodeGenModule &CGM; public: CGHLSLRuntime(CodeGenModule &CGM) : CGM(CGM) {} virtual ~CGHLSLRuntime() {} - + void addBuffer(HLSLBufferDecl *D); void finishCodeGen(); + +private: + void addConstant(VarDecl *D, Buffer &CB); + void addBufferDecls(DeclContext *DC, Buffer &CB); + llvm::SmallVector Buffers; }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #include "CGHLSLRuntime.h" +#include "CGDebugInfo.h" #include "CodeGenModule.h" + #include "clang/Basic/TargetOptions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -23,6 +25,7 @@ using namespace llvm; namespace { + void addDxilValVersion(StringRef ValVersionStr, llvm::Module &M) { // The validation of ValVersionStr is done at HLSLToolChain::TranslateArgs. // Assume ValVersionStr is legal here. @@ -42,11 +45,163 @@ StringRef DxilValKey = "dx.valver"; M.addModuleFlag(llvm::Module::ModFlagBehavior::AppendUnique, DxilValKey, Val); } + +void layoutBuffer(CGHLSLRuntime::Buffer &Buf, const DataLayout &DL) { + if (Buf.Constants.empty()) + return; + + // FIXME: support legacy cbuffer layout. + auto &Ctx = Buf.Constants[0].first->getContext(); + std::vector EltTys; + uint64_t Offset = 0; + for (auto &Const : Buf.Constants) { + auto *GV = Const.first; + Const.second = EltTys.size(); + auto *Ty = GV->getValueType(); + auto Align = DL.getPrefTypeAlign(Ty); + if (uint64_t PaddingSize = (Offset % Align.value())) { + // Not aligned. + // Adding padding. + auto *PaddingTy = IntegerType::get(Ctx, 8 * PaddingSize); + EltTys.emplace_back(PaddingTy); + Offset += PaddingSize; + } + // Save Index for Ty into Const.second. + Const.second = EltTys.size(); + EltTys.emplace_back(Ty); + + Offset += DL.getTypeAllocSize(Ty); + } + Buf.LayoutStruct = llvm::StructType::get(EltTys[0]->getContext(), EltTys); +} + +GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf, + const unsigned AddressSpace) { + // Create global variable for CB. + GlobalVariable *CBGV = + new GlobalVariable(Buf.LayoutStruct, /*isConstant*/ true, + GlobalValue::LinkageTypes::ExternalLinkage, nullptr, + Buf.Name + (Buf.IsCBuffer ? ".cb." : ".tb."), + GlobalValue::NotThreadLocal, AddressSpace); + + IRBuilder<> B(CBGV->getContext()); + Value *ZeroIdx = B.getInt32(0); + // Replace Const use with CB use. + for (auto &Const : Buf.Constants) { + auto *EltTy = Buf.LayoutStruct->getElementType(Const.second); + auto *GV = Const.first; + unsigned Offset = Const.second; + + Value *GEP = + B.CreateGEP(Buf.LayoutStruct, CBGV, {ZeroIdx, B.getInt32(Offset)}); + + auto *GVTy = GV->getValueType(); + assert(EltTy == GVTy && "constant type mismatch"); + + // Cast address space. + GEP = B.CreateAddrSpaceCast(GEP, GVTy->getPointerTo()); + // Replace. + GV->replaceAllUsesWith(GEP); + // Erase GV. + GV->removeDeadConstantUsers(); + GV->eraseFromParent(); + } + return CBGV; +} + +void addResourceBinding(GlobalVariable *GV, CGHLSLRuntime::Buffer &CB) { + // FIXME: add resource binding to GV. +} + } // namespace +void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) { + if (D->getStorageClass() == SC_Static) { + // For static inside cbuffer, take as global static. + // Don't add to cbuffer. + CGM.EmitGlobal(D); + return; + } + + auto *GV = cast(CGM.GetAddrOfGlobalVar(D)); + // Add debug info for constVal. + if (CGDebugInfo *DI = CGM.getModuleDebugInfo()) + if (CGM.getCodeGenOpts().getDebugInfo() >= + codegenoptions::DebugInfoKind::LimitedDebugInfo) + DI->EmitGlobalVariable(cast(GV), D); + + // FIXME: support packoffset. + uint32_t Offset = 0; + bool HasUserOffset = false; + + unsigned LowerBound = HasUserOffset ? Offset : UINT_MAX; + CB.Constants.emplace_back(std::make_pair(GV, LowerBound)); +} + +void CGHLSLRuntime::addBufferDecls(DeclContext *DC, Buffer &CB) { + for (Decl *it : DC->decls()) { + if (VarDecl *ConstDecl = dyn_cast(it)) { + addConstant(ConstDecl, CB); + } else if (isa(*it)) { + // Nothing to do for this declaration. + } else if (isa(it)) { + // Nothing to do for this declaration. + } else if (isa(it)) { + // A function within an cbuffer is effectively a top-level function, + // as it only refers to globally scoped declarations. + CGM.EmitTopLevelDecl(it); + } else if (NamespaceDecl *ND = dyn_cast(it)) { + addBufferDecls(ND, CB); + } else { + HLSLBufferDecl *Inner = dyn_cast(it); + if (!Inner) { + // FIXME: add test after more Resource Type like Texture2D is supported. + DiagnosticsEngine &Diags = CGM.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, + "invalid decl inside cbuffer"); + Diags.Report(it->getLocation(), DiagID); + return; + } + addBuffer(Inner); + } + } +} + +void CGHLSLRuntime::addBuffer(HLSLBufferDecl *D) { + Buffers.emplace_back(Buffer(D)); + addBufferDecls(D, Buffers.back()); +} + void CGHLSLRuntime::finishCodeGen() { auto &TargetOpts = CGM.getTarget().getTargetOpts(); llvm::Module &M = CGM.getModule(); addDxilValVersion(TargetOpts.DxilValidatorVersion, M); + auto &DL = M.getDataLayout(); + auto &ASMap = CGM.getTarget().getAddressSpaceMap(); + const unsigned CBufferAddressSpace = + ASMap[(unsigned)clang::LangAS::hlsl_cbuffer]; + const unsigned TBufferAddressSpace = + ASMap[(unsigned)clang::LangAS::hlsl_tbuffer]; + + for (auto &Buf : Buffers) { + layoutBuffer(Buf, DL); + auto AddressSapce = + Buf.IsCBuffer ? CBufferAddressSpace : TBufferAddressSpace; + auto *GV = replaceBuffer(Buf, AddressSapce); + M.getGlobalList().push_back(GV); + addResourceBinding(GV, Buf); + } +} + +CGHLSLRuntime::Buffer::Buffer(HLSLBufferDecl *D) { + Name = D->getName(); + IsCBuffer = D->isCBuffer(); + if (auto *Binding = D->getAttr()) { + Reg = Binding->getID(); + Space = Binding->getSpace(); + } else { + Reg = UINT_MAX; + Space = 0; + } } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -6281,6 +6281,10 @@ DI->EmitAndRetainType(getContext().getEnumType(cast(D))); break; + case Decl::HLSLBuffer: + getHLSLRuntime().addBuffer(cast(D)); + break; + default: // Make sure we handled everything we should, every other kind is a // non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind diff --git a/clang/test/CodeGenHLSL/cbuf.hlsl b/clang/test/CodeGenHLSL/cbuf.hlsl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenHLSL/cbuf.hlsl @@ -0,0 +1,21 @@ +// RUN: %clang_dxc -Tlib_6_7 -fcgl -Fo - %s | FileCheck %s + +// CHECK: @[[CB:.+]] = external addrspace(4) constant { float, i32, double } +cbuffer A : register(b0, space1) { + float a; + double b; +} + +// CHECK: @[[TB:.+]] = external addrspace(5) constant { float, i32, double } +tbuffer A : register(b2, space1) { + float c; + double d; +} + +float foo() { +// CHECK: load float, ptr addrspacecast (ptr addrspace(4) @[[CB]] to ptr), align 4 +// CHECK: load double, ptr addrspacecast (ptr addrspace(4) getelementptr inbounds ({ float, i32, double }, ptr addrspace(4) @[[CB]], i32 0, i32 2) to ptr), align 8 +// CHECK: load float, ptr addrspacecast (ptr addrspace(5) @[[TB]] to ptr), align 4 +// CHECK: load double, ptr addrspacecast (ptr addrspace(5) getelementptr inbounds ({ float, i32, double }, ptr addrspace(5) @[[TB]], i32 0, i32 2) to ptr), align 8 + return a + b + c*d; +}