diff --git a/llvm/lib/Target/DirectX/CBufferDataLayout.h b/llvm/lib/Target/DirectX/CBufferDataLayout.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/DirectX/CBufferDataLayout.h @@ -0,0 +1,41 @@ +//===- Target/DirectX/CBufferDataLayout.h - Cbuffer layout helper ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utils to help cbuffer layout. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_DIRECTX_CBUFFERDATALAYOUT_H +#define LLVM_TARGET_DIRECTX_CBUFFERDATALAYOUT_H + +#include +#include + +namespace llvm { +class DataLayout; +class Type; + +namespace dxil { + +class LegacyCBufferLayout; + +class CBufferDataLayout { + const DataLayout &DL; + const bool IsLegacyLayout; + std::unique_ptr LegacyDL; + +public: + CBufferDataLayout(const DataLayout &DL, const bool IsLegacy); + ~CBufferDataLayout(); + uint32_t getTypeAllocSizeInBytes(Type *Ty); +}; + +} // namespace dxil +} // namespace llvm + +#endif diff --git a/llvm/lib/Target/DirectX/CBufferDataLayout.cpp b/llvm/lib/Target/DirectX/CBufferDataLayout.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/DirectX/CBufferDataLayout.cpp @@ -0,0 +1,132 @@ +//===- Target/DirectX/CBufferDataLayout.cpp - Cbuffer layout helper -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utils to help cbuffer layout. +// +//===----------------------------------------------------------------------===// + +#include "CBufferDataLayout.h" + +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" + +namespace llvm { +namespace dxil { + +// Implement cbuffer layout in +// https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-packing-rules +class LegacyCBufferLayout { + struct LegacyStructLayout { + StructType *ST; + SmallVector Offsets; + uint32_t Size; + std::pair getElementLegacyOffset(unsigned Idx) const { + assert(Idx < Offsets.size() && "Invalid element idx!"); + unsigned Offset = Offsets[Idx]; + unsigned Ch = Offset & (RowAlign - 1); + return std::make_pair((Offset - Ch) / RowAlign, Ch); + } + }; + +public: + LegacyCBufferLayout(const DataLayout &DL) : DL(DL) {} + uint32_t getTypeAllocSizeInBytes(Type *Ty); + +private: + unsigned applyRowAlign(unsigned Offset, Type *EltTy); + unsigned getTypeAllocSize(Type *Ty); + LegacyStructLayout &getStructLayout(StructType *ST); + const DataLayout &DL; + SmallDenseMap StructLayouts; + // 4 Dwords align. + static const unsigned RowAlign = 16; + static unsigned align(unsigned Offset, unsigned Alignment) { + return (Offset + Alignment - 1) / Alignment * Alignment; + } + static unsigned alignTo4Dwords(unsigned Offset) { + return align(Offset, RowAlign); + } +}; + +uint32_t LegacyCBufferLayout::getTypeAllocSizeInBytes(Type *Ty) { + return getTypeAllocSize(Ty); +} + +unsigned LegacyCBufferLayout::applyRowAlign(unsigned Offset, Type *EltTy) { + unsigned AlignedOffset = alignTo4Dwords(Offset); + + if (AlignedOffset == Offset) + return Offset; + + if (isa(EltTy) || isa(EltTy)) + return AlignedOffset; + unsigned Size = DL.getTypeStoreSize(EltTy); + if ((Offset + Size) > AlignedOffset) + return AlignedOffset; + else + return Offset; +} + +unsigned LegacyCBufferLayout::getTypeAllocSize(Type *Ty) { + if (auto *ST = dyn_cast(Ty)) { + LegacyStructLayout &Layout = getStructLayout(ST); + return Layout.Size; + } else if (auto *AT = dyn_cast(Ty)) { + unsigned NumElts = AT->getNumElements(); + if (NumElts == 0) + return 0; + + unsigned EltSize = getTypeAllocSize(AT->getElementType()); + unsigned AlignedEltSize = alignTo4Dwords(EltSize); + // Each new element start 4 dwords aligned. + return AlignedEltSize * (NumElts - 1) + EltSize; + } else { + // NOTE: Use type store size, not align to ABI on basic types for legacy + // layout. + return DL.getTypeStoreSize(Ty); + } +} + +LegacyCBufferLayout::LegacyStructLayout & +LegacyCBufferLayout::getStructLayout(StructType *ST) { + auto it = StructLayouts.find(ST); + if (it != StructLayouts.end()) + return it->second; + + unsigned Offset = 0; + LegacyStructLayout Layout; + Layout.ST = ST; + for (Type *EltTy : ST->elements()) { + unsigned EltSize = getTypeAllocSize(EltTy); + if (unsigned ScalarSize = EltTy->getScalarSizeInBits()) + Offset = align(Offset, ScalarSize >> 3); + Offset = applyRowAlign(Offset, EltTy); + Layout.Offsets.emplace_back(Offset); + Offset += EltSize; + } + Layout.Size = Offset; + StructLayouts[ST] = Layout; + return StructLayouts[ST]; +} + +CBufferDataLayout::CBufferDataLayout(const DataLayout &DL, const bool IsLegacy) + : DL(DL), IsLegacyLayout(IsLegacy), + LegacyDL(IsLegacy ? std::make_unique(DL) : nullptr) { +} + +CBufferDataLayout::~CBufferDataLayout() = default; + +uint32_t CBufferDataLayout::getTypeAllocSizeInBytes(Type *Ty) { + if (IsLegacyLayout) + return LegacyDL->getTypeAllocSizeInBytes(Ty); + else + return DL.getTypeAllocSize(Ty); +} + +} // namespace dxil +} // namespace llvm diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt --- a/llvm/lib/Target/DirectX/CMakeLists.txt +++ b/llvm/lib/Target/DirectX/CMakeLists.txt @@ -12,6 +12,7 @@ add_public_tablegen_target(DirectXCommonTableGen) add_llvm_target(DirectXCodeGen + CBufferDataLayout.cpp DirectXAsmPrinter.cpp DirectXInstrInfo.cpp DirectXRegisterInfo.cpp diff --git a/llvm/lib/Target/DirectX/DXILResource.h b/llvm/lib/Target/DirectX/DXILResource.h --- a/llvm/lib/Target/DirectX/DXILResource.h +++ b/llvm/lib/Target/DirectX/DXILResource.h @@ -25,6 +25,7 @@ class GlobalVariable; namespace dxil { +class CBufferDataLayout; class ResourceBase { protected: @@ -107,14 +108,34 @@ void print(raw_ostream &O) const; }; +class ConstantBuffer : public ResourceBase { + uint32_t CBufferSizeInBytes = 0; // Cbuffer used size in bytes. +public: + ConstantBuffer(uint32_t I, hlsl::FrontendResource R); + void setSize(CBufferDataLayout &DL); + MDNode *write() const; + void print(raw_ostream &O) const; +}; + +template class ResourceTable { + StringRef MDName; + + llvm::SmallVector Data; + +public: + ResourceTable(StringRef Name) : MDName(Name) {} + void collect(Module &M); + MDNode *write(Module &M) const; + void print(raw_ostream &O) const; +}; + // FIXME: Fully computing the resource structures requires analyzing the IR // because some flags are set based on what operations are performed on the // resource. This partial patch handles some of the leg work, but not all of it. // See issue https://github.com/llvm/llvm-project/issues/57936. class Resources { - llvm::SmallVector UAVs; - - void collectUAVs(Module &M); + ResourceTable UAVs = {"hlsl.uavs"}; + ResourceTable CBuffers = {"hlsl.cbufs"}; public: void collect(Module &M); diff --git a/llvm/lib/Target/DirectX/DXILResource.cpp b/llvm/lib/Target/DirectX/DXILResource.cpp --- a/llvm/lib/Target/DirectX/DXILResource.cpp +++ b/llvm/lib/Target/DirectX/DXILResource.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "DXILResource.h" +#include "CBufferDataLayout.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Metadata.h" @@ -22,23 +23,43 @@ using namespace llvm::dxil; using namespace llvm::hlsl; -void Resources::collectUAVs(Module &M) { - NamedMDNode *Entry = M.getNamedMetadata("hlsl.uavs"); +template void ResourceTable::collect(Module &M) { + NamedMDNode *Entry = M.getNamedMetadata(MDName); if (!Entry || Entry->getNumOperands() == 0) return; uint32_t Counter = 0; - for (auto *UAV : Entry->operands()) { - UAVs.push_back(UAVResource(Counter++, FrontendResource(cast(UAV)))); + for (auto *Res : Entry->operands()) { + Data.push_back(T(Counter++, FrontendResource(cast(Res)))); } } -void Resources::collect(Module &M) { collectUAVs(M); } +template <> void ResourceTable::collect(Module &M) { + NamedMDNode *Entry = M.getNamedMetadata(MDName); + if (!Entry || Entry->getNumOperands() == 0) + return; + + uint32_t Counter = 0; + for (auto *Res : Entry->operands()) { + Data.push_back( + ConstantBuffer(Counter++, FrontendResource(cast(Res)))); + } + // FIXME: share CBufferDataLayout with CBuffer load lowering. + // See https://github.com/llvm/llvm-project/issues/58381 + CBufferDataLayout CBDL(M.getDataLayout(), /*IsLegacy*/ true); + for (auto &CB : Data) + CB.setSize(CBDL); +} + +void Resources::collect(Module &M) { + UAVs.collect(M); + CBuffers.collect(M); +} ResourceBase::ResourceBase(uint32_t I, FrontendResource R) : ID(I), GV(R.getGlobalVariable()), Name(""), Space(R.getSpace()), LowerBound(R.getResourceIndex()), RangeSize(1) { - if (auto *ArrTy = dyn_cast(GV->getInitializer()->getType())) + if (auto *ArrTy = dyn_cast(GV->getValueType())) RangeSize = ArrTy->getNumElements(); } @@ -276,6 +297,30 @@ ExtProps.ElementType = ElTy; } +ConstantBuffer::ConstantBuffer(uint32_t I, hlsl::FrontendResource R) + : ResourceBase(I, R) {} + +void ConstantBuffer::setSize(CBufferDataLayout &DL) { + CBufferSizeInBytes = DL.getTypeAllocSizeInBytes(GV->getValueType()); +} + +void ConstantBuffer::print(raw_ostream &OS) const { + OS << "; " << left_justify(Name, 31); + + OS << right_justify("cbuffer", 10); + + printComponentType(Kinds::CBuffer, ComponentType::Invalid, 8, OS); + + printKind(Kinds::CBuffer, 12, OS, /*SRV*/ false, /*HasCounter*/ false); + // Print the binding part. + ResourceBase::print(OS, "CB", "cb"); +} + +template void ResourceTable::print(raw_ostream &OS) const { + for (auto &Res : Data) + Res.print(OS); +} + MDNode *ResourceBase::ExtendedProperties::write(LLVMContext &Ctx) const { IRBuilder<> B(Ctx); SmallVector Entries; @@ -315,14 +360,36 @@ return MDNode::get(Ctx, Entries); } +MDNode *ConstantBuffer::write() const { + auto &Ctx = GV->getContext(); + IRBuilder<> B(Ctx); + Metadata *Entries[7]; + ResourceBase::write(Ctx, Entries); + + Entries[6] = ConstantAsMetadata::get(B.getInt32(CBufferSizeInBytes)); + return MDNode::get(Ctx, Entries); +} + +template MDNode *ResourceTable::write(Module &M) const { + if (Data.empty()) + return nullptr; + SmallVector MDs; + for (auto &Res : Data) + MDs.emplace_back(Res.write()); + + NamedMDNode *Entry = M.getNamedMetadata(MDName); + if (Entry) + Entry->eraseFromParent(); + + return MDNode::get(M.getContext(), MDs); +} + void Resources::write(Module &M) const { Metadata *ResourceMDs[4] = {nullptr, nullptr, nullptr, nullptr}; - SmallVector UAVMDs; - for (auto &UAV : UAVs) - UAVMDs.emplace_back(UAV.write()); - if (!UAVMDs.empty()) - ResourceMDs[1] = MDNode::get(M.getContext(), UAVMDs); + ResourceMDs[1] = UAVs.write(M); + + ResourceMDs[2] = CBuffers.write(M); bool HasResource = ResourceMDs[0] != nullptr || ResourceMDs[1] != nullptr || ResourceMDs[2] != nullptr || ResourceMDs[3] != nullptr; @@ -346,8 +413,8 @@ << "; ------------------------------ ---------- ------- ----------- " "------- -------------- ------\n"; - for (auto &UAV : UAVs) - UAV.print(O); + CBuffers.print(O); + UAVs.print(O); } void Resources::dump() const { print(dbgs()); } diff --git a/llvm/test/CodeGen/DirectX/cbuf.ll b/llvm/test/CodeGen/DirectX/cbuf.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/cbuf.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD +; RUN: opt -S --passes="print-dxil-resource" < %s 2>&1 | FileCheck %s --check-prefix=PRINT + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-library" + +; Make sure the size is 24 = 16 + 8 (float,i32,double -> 16 and int2 -> 8) +; DXILMD:!{i32 0, ptr @A.cb., !"", i32 1, i32 2, i32 1, i32 24} + +; Make sure match register(b2, space1) with ID 0. +; PRINT:cbuffer NA NA CB0 cb2,space1 1 + +@A.cb. = external constant { float, i32, double, <2 x i32> } + +; Function Attrs: noinline nounwind optnone +define noundef float @"?foo@@YAMXZ"() #0 { +entry: + %0 = load float, ptr @A.cb., align 4 + %conv = fpext float %0 to double + %1 = load double, ptr getelementptr inbounds ({ float, i32, double, <2 x i32> }, ptr @A.cb., i32 0, i32 2), align 8 + %2 = load <2 x i32>, ptr getelementptr inbounds ({ float, i32, double, <2 x i32> }, ptr @A.cb., i32 0, i32 3), align 8 + %3 = extractelement <2 x i32> %2, i32 1 + %conv1 = sitofp i32 %3 to double + %4 = call double @llvm.fmuladd.f64(double %1, double %conv1, double %conv) + %conv2 = fptrunc double %4 to float + ret float %conv2 +} + +; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn +declare double @llvm.fmuladd.f64(double, double, double) #1 + +attributes #0 = { noinline nounwind } +attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } + +!hlsl.cbufs = !{!1} + +!1 = !{ptr @A.cb., !"A.cb.ty", i32 13, i32 2, i32 1} diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll @@ -0,0 +1,14 @@ +; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-library" + +; Make sure the size is 36 = 16 + 16 + 4 (float, double -> 16, float, half, i16, i64 -> 16 and int -> 4) +; DXILMD:!{i32 0, ptr @A.cb., !"", i32 0, i32 2, i32 1, i32 36} + +@A.cb. = external local_unnamed_addr constant { float, double, float, half, i16, i64, i32 } + + +!hlsl.cbufs = !{!1} + +!1 = !{ptr @A.cb., !"A.cb.ty", i32 13, i32 2, i32 0} diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-library" + +; +; cbuffer B +; { +; +; struct B +; { +; +; double B0; ; Offset: 0 +; float3 B1; ; Offset: 16 +; float B2; ; Offset: 28 +; double3 B3; ; Offset: 32 +; half B4; ; Offset: 56 +; double2 B5; ; Offset: 64 +; float B6; ; Offset: 80 +; half3 B7; ; Offset: 84 +; half3 B8; ; Offset: 90 +; +; } B; ; Offset: 0 Size: 96 +; +; } +; + + +; Make sure the size is 96 +; DXILMD:!{i32 0, ptr @B.cb., !"", i32 0, i32 1, i32 1, i32 96} + +@B.cb. = external local_unnamed_addr constant { double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> } + + +!hlsl.cbufs = !{!0} + +!0 = !{ptr @B.cb., !"B.cb.ty", i32 13, i32 1, i32 0} diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll @@ -0,0 +1,51 @@ +; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-library" + +; cbuffer B +; { +; +; struct B +; { +; +; double B0[2]; ; Offset: 0 +; float3 B1[3]; ; Offset: 32 +; float B2; ; Offset: 76 +; double B3[3]; ; Offset: 80 +; half B4; ; Offset: 120 +; double2 B5[1]; ; Offset: 128 +; float B6; ; Offset: 144 +; half3 B7[2]; ; Offset: 160 +; half3 B8; ; Offset: 182 +; +; } B; ; Offset: 0 Size: 188 +; +; } +; +; cbuffer B +; { +; +; struct B.0 +; { +; +; double3 B9[3]; ; Offset: 0 +; half3 B10; ; Offset: 88 +; +; } B; ; Offset: 0 Size: 94 +; +; } + + +; Make sure the size is 188. +; DXILMD:!{i32 0, ptr @B.cb., !"", i32 0, i32 1, i32 1, i32 188} +; Make sure the size is 94. +; DXILMD:!{i32 1, ptr @B.cb..1, !"", i32 0, i32 2, i32 1, i32 94} + +@B.cb. = external local_unnamed_addr constant { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> } +@B.cb..1 = external local_unnamed_addr constant { [3 x <3 x double>], <3 x half> } + +!hlsl.cbufs = !{!0, !1} + +!0 = !{ptr @B.cb., !"B.cb.ty", i32 13, i32 1, i32 0} +!1 = !{ptr @B.cb..1, !"B.cb.ty", i32 13, i32 2, i32 0} diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll @@ -0,0 +1,81 @@ +; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-library" + +; cbuffer D +; { +; +; struct D +; { +; +; int D0; ; Offset: 0 +; struct struct.B +; { +; +; double B0; ; Offset: 16 +; float3 B1; ; Offset: 32 +; float B2; ; Offset: 44 +; double3 B3; ; Offset: 48 +; half B4; ; Offset: 72 +; double2 B5; ; Offset: 80 +; float B6; ; Offset: 96 +; half3 B7; ; Offset: 100 +; half3 B8; ; Offset: 106 +; +; } D1; ; Offset: 16 +; +; half D2; ; Offset: 112 +; struct struct.C +; { +; +; struct struct.A +; { +; +; float A0; ; Offset: 128 +; double A1; ; Offset: 136 +; float A2; ; Offset: 144 +; half A3; ; Offset: 148 +; int16_t A4; ; Offset: 150 +; int64_t A5; ; Offset: 152 +; int A6; ; Offset: 160 +; +; } C0; ; Offset: 128 +; +; float C1[1]; ; Offset: 176 +; struct struct.B +; { +; +; double B0; ; Offset: 192 +; float3 B1; ; Offset: 208 +; float B2; ; Offset: 220 +; double3 B3; ; Offset: 224 +; half B4; ; Offset: 248 +; double2 B5; ; Offset: 256 +; float B6; ; Offset: 272 +; half3 B7; ; Offset: 276 +; half3 B8; ; Offset: 282 +; +; } C2[2];; ; Offset: 192 +; +; half C3; ; Offset: 384 +; +; } D3; ; Offset: 128 +; +; double D4; ; Offset: 392 +; +; } D; ; Offset: 0 Size: 400 + + +; Make sure the size is 400 +; DXILMD:!{i32 0, ptr @D.cb., !"", i32 0, i32 1, i32 1, i32 400} + + +%struct.B = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }> +%struct.C = type <{ %struct.A, [1 x float], [2 x %struct.B], half }> +%struct.A = type <{ float, double, float, half, i16, i64, i32 }> + +@D.cb. = external local_unnamed_addr constant { i32, %struct.B, half, %struct.C, double } + +!hlsl.cbufs = !{!0} +!0 = !{ptr @D.cb., !"D.cb.ty", i32 13, i32 1, i32 0}