diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt --- a/llvm/lib/Target/DirectX/CMakeLists.txt +++ b/llvm/lib/Target/DirectX/CMakeLists.txt @@ -26,6 +26,8 @@ DXILResourceAnalysis.cpp DXILShaderFlags.cpp DXILTranslateMetadata.cpp + DXILTypedBufferLowering.cpp + MemAccessLowerHelper.cpp PointerTypeAnalysis.cpp LINK_COMPONENTS diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -26,11 +26,14 @@ def ThreadIdInGroupClass : dxil_class<"ThreadIdInGroup">; def ThreadIdClass : dxil_class<"ThreadId">; def GroupIdClass : dxil_class<"GroupId">; +def BufferLoadClass : dxil_class<"BufferLoad">; +def BufferStoreClass : dxil_class<"BufferStore">; +def CreateHandleClass : dxil_class<"CreateHandle">; def binary_uint : dxil_category<"Binary uint">; def unary_float : dxil_category<"Unary float">; def ComputeID : dxil_category<"Compute/Mesh/Amplification shader">; - +def Resources : dxil_category<"Resources">; // The parameter description for a DXIL instruction class dxil_param ]>, dxil_map_intrinsic; + +def CreateHandle : dxil_op< "CreateHandle", 57, CreateHandleClass, Resources, "creates the handle to a resource", + "void;", "ro", + [ + dxil_param<0, "dx.types.Handle", "", "the handle to the resource">, + dxil_param<1, "i32", "opcode", "DXIL opcode">, + dxil_param<2, "i8", "resourceClass", "the class of resource to create (SRV, UAV, CBuffer, Sampler)", 1>, // maps to DxilResourceBase::Class + dxil_param<3, "i32", "rangeId", "range identifier for resource", 1>, + dxil_param<4, "i32", "index", "zero-based index into range">, + dxil_param<5, "i1", "nonUniformIndex", "non-uniform resource index", 1> + ]>; + +def BufferLoad : dxil_op< "BufferLoad", 68, BufferLoadClass,Resources, "reads from a TypedBuffer", "half;float;i16;i32;", "ro", + [ + dxil_param<0, "dx.types.ResRet", "", "the loaded value">, + dxil_param<1, "i32", "opcode", "DXIL opcode">, + dxil_param<2, "dx.types.Handle", "srv", "handle of TypedBuffer SRV to sample">, + dxil_param<3, "i32", "index", "element index">, + dxil_param<4, "i32", "offset", "Used for offset into element for StructuredBuffer in sm6.0/6.1. Always undef for ByteAddressBuffer/TypedBuffer. Always undef for shader model higher than 6.1"> + ]>; + +def BufferStore : dxil_op< "BufferStore", 69, BufferStoreClass,Resources, "writes to a RWTypedBuffer", "half;float;i16;i32;", "", + [ + dxil_param<0, "void", "", "">, + dxil_param<1, "i32", "opcode", "DXIL opcode">, + dxil_param<2, "dx.types.Handle", "uav", "handle of UAV to store to">, + dxil_param<3, "i32", "coord0", "coordinate in elements">, + dxil_param<4, "i32", "coord1", "coordinate (unused?>">, + dxil_param<5, "$o", "value0", "value">, + dxil_param<6, "$o", "value1", "value">, + dxil_param<7, "$o", "value2", "value">, + dxil_param<8, "$o", "value3", "value">, + dxil_param<9, "i8", "mask", "written value mask"> + ]>; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h --- a/llvm/lib/Target/DirectX/DXILOpBuilder.h +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h @@ -35,6 +35,13 @@ bool NoOpCodeParam); static const char *getOpCodeName(dxil::OpCode DXILOp); + CallInst *createCreateHandle(int8_t ResClass, int RangeID, Value *Index, + bool NonUniformIndex); + CallInst *createBufferLoad(Type *OverloadTy, Value *Hdl, Value *Index); + CallInst *createBufferStore(Type *OverloadTy, Value *Hdl, Value *Index, + Value *V0, Value *V1, Value *V2, Value *V3, + uint8_t Mask); + private: Module &M; IRBuilderBase &B; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// #include "DXILOpBuilder.h" -#include "DXILConstants.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/DXILOperationCommon.h" @@ -320,5 +320,32 @@ const char *DXILOpBuilder::getOpCodeName(dxil::OpCode DXILOp) { return ::getOpCodeName(DXILOp); } + +CallInst *DXILOpBuilder::createCreateHandle(int8_t ResClass, int RangeID, + Value *Index, + bool NonUniformIndex) { + auto Fn = + getOrCreateDXILOpFunction(dxil::OpCode::CreateHandle, B.getVoidTy(), M); + return B.CreateCall(Fn, {B.getInt32((int32_t)dxil::OpCode::CreateHandle), + B.getInt8(ResClass), B.getInt32(RangeID), Index, + B.getInt1(NonUniformIndex)}); +} + +CallInst *DXILOpBuilder::createBufferLoad(Type *OverloadTy, Value *Hdl, + Value *Index) { + auto Fn = getOrCreateDXILOpFunction(dxil::OpCode::BufferLoad, OverloadTy, M); + return B.CreateCall(Fn, {B.getInt32((int32_t)dxil::OpCode::BufferLoad), Hdl, + Index, PoisonValue::get(B.getInt32Ty())}); +} + +CallInst *DXILOpBuilder::createBufferStore(Type *OverloadTy, Value *Hdl, + Value *Index, Value *V0, Value *V1, + Value *V2, Value *V3, uint8_t Mask) { + auto Fn = getOrCreateDXILOpFunction(dxil::OpCode::BufferStore, OverloadTy, M); + return B.CreateCall(Fn, {B.getInt32((int32_t)dxil::OpCode::BufferStore), Hdl, + Index, PoisonValue::get(B.getInt32Ty()), V0, V1, V2, + V3, B.getInt8(Mask)}); +} + } // namespace dxil } // namespace llvm diff --git a/llvm/lib/Target/DirectX/DXILResource.h b/llvm/lib/Target/DirectX/DXILResource.h --- a/llvm/lib/Target/DirectX/DXILResource.h +++ b/llvm/lib/Target/DirectX/DXILResource.h @@ -90,6 +90,9 @@ MDNode *write(LLVMContext &Ctx) const; }; + GlobalVariable *getVariable() const { return GV; } + uint32_t getRangeID() const { return ID; } + uint32_t getIndex() const { return LowerBound; } }; class UAVResource : public ResourceBase { @@ -122,6 +125,7 @@ void write(Module &M) const; void print(raw_ostream &O) const; LLVM_DUMP_METHOD void dump() const; + const llvm::SmallVector &getUAVs() const; }; } // namespace dxil diff --git a/llvm/lib/Target/DirectX/DXILResource.cpp b/llvm/lib/Target/DirectX/DXILResource.cpp --- a/llvm/lib/Target/DirectX/DXILResource.cpp +++ b/llvm/lib/Target/DirectX/DXILResource.cpp @@ -346,3 +346,7 @@ } void Resources::dump() const { print(dbgs()); } + +const llvm::SmallVector &llvm::dxil::Resources::getUAVs() const { + return UAVs; +} diff --git a/llvm/lib/Target/DirectX/DXILTypedBufferLowering.cpp b/llvm/lib/Target/DirectX/DXILTypedBufferLowering.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/DirectX/DXILTypedBufferLowering.cpp @@ -0,0 +1,177 @@ +//===- DXILTypedBufferLower.cpp - Lowering TypedBuffer to DIXL ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file This file contains passes to lower typed buffer load/store to DXIL. +//===----------------------------------------------------------------------===// + +#include "DXILConstants.h" +#include "DXILOpBuilder.h" +#include "DXILResource.h" +#include "DXILResourceAnalysis.h" +#include "DirectX.h" +#include "MemAccessLowerHelper.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/Utils/Local.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" + +#define DEBUG_TYPE "dxil-typedbuf-lower" + +using namespace llvm; +using namespace llvm::dxil; + +static bool lowerUAVAccess(Module &M, dxil::Resources &Res) { + // FIXME: Allocate resource binding first. + // See https://github.com/llvm/llvm-project/issues/58051 + const SmallVector &UAVs = Res.getUAVs(); + + if (UAVs.empty()) + return false; + const DataLayout &DL = M.getDataLayout(); + LLVMContext &Ctx = M.getContext(); + for (const auto &UAV : UAVs) { + GlobalVariable *GV = UAV.getVariable(); + std::vector AccessList; + for (User *U : make_early_inc_range(GV->users())) { + if (auto *SI = dyn_cast(U)) { + Value *V = SI->getValueOperand(); + // FIXME: use createHandle generated in clangCodeGen. + // See https://github.com/llvm/llvm-project/issues/58031. + SI->eraseFromParent(); + MemAccessLowerHelper::collectMemAccess(V, AccessList, DL); + } else if (auto *IntrinsicCI = dyn_cast(U)) { + if (IntrinsicCI->getIntrinsicID() == Intrinsic::invariant_start || + IntrinsicCI->getIntrinsicID() == Intrinsic::invariant_end) + IntrinsicCI->eraseFromParent(); + } else if (LoadInst *LI = dyn_cast(U)) + MemAccessLowerHelper::collectMemAccess(LI, AccessList, DL); + // FIXME: support array of resource. + // See https://github.com/llvm/llvm-project/issues/58486 + } + + SmallDenseMap HandleMap; + uint64_t RangeID = UAV.getRangeID(); + ConstantInt *UAVIndex = + ConstantInt::get(Type::getInt32Ty(Ctx), UAV.getIndex()); + assert(UAVIndex->getLimitedValue() != -1ULL && "unallocated binding"); + for (auto &Access : AccessList) { + Value *Index = Access.Offset.Index; + Instruction *User = Access.User; + // make handle in the Function. + Function *F = User->getParent()->getParent(); + auto It = HandleMap.find(F); + CallInst *Hdl = nullptr; + if (It == HandleMap.end()) { + IRBuilder<> B(&*F->getEntryBlock().getFirstInsertionPt()); + DXILOpBuilder DXILB(M, B); + Hdl = DXILB.createCreateHandle( + static_cast(hlsl::ResourceClass::UAV), RangeID, UAVIndex, + false); + HandleMap[F] = Hdl; + } else + Hdl = It->second; + + if (StoreInst *SI = dyn_cast(User)) { + Value *V = SI->getValueOperand(); + // Skip the store on hdl. + if (SI->getPointerOperand() == GV) + continue; + Type *Ty = V->getType(); + IRBuilder<> B(SI); + DXILOpBuilder DXILB(M, B); + Type *OverloadTy = Ty->isVectorTy() ? Ty->getContainedType(0) : Ty; + Value *UnusedV = PoisonValue::get(OverloadTy); + Value *Elts[4] = {UnusedV, UnusedV, UnusedV, UnusedV}; + uint8_t Mask = 0; + if (Ty->isIntegerTy() || Ty->isFloatingPointTy()) { + Elts[0] = V; + Mask = 1; + } else if (isa(Ty)) { + // Only support fixed vectory type. + auto *VT = cast(Ty); + for (unsigned I = 0; I < VT->getNumElements(); ++I) + Elts[I] = B.CreateExtractValue(V, I); + Mask = (1 << VT->getNumElements()) - 1; + } else { + assert(0 && "invalid type for buffer load."); + } + DXILB.createBufferStore(OverloadTy, Hdl, Index, Elts[0], Elts[1], + Elts[2], Elts[3], Mask); + SI->eraseFromParent(); + } else { + LoadInst *LI = cast(User); + Type *Ty = LI->getType(); + IRBuilder<> B(LI); + DXILOpBuilder DXILB(M, B); + Value *BufLd = DXILB.createBufferLoad( + Ty->isVectorTy() ? Ty->getContainedType(0) : Ty, Hdl, Index); + + if (Ty->isIntegerTy() || Ty->isFloatingPointTy()) { + BufLd = B.CreateExtractValue(BufLd, 0); + } else if (isa(Ty)) { + // Only support fixed vectory type. + auto *VT = cast(Ty); + Value *Result = PoisonValue::get(VT); + for (unsigned I = 0; I < VT->getNumElements(); ++I) { + Value *Elt = B.CreateExtractValue(BufLd, I); + Result = B.CreateInsertElement(Result, Elt, I); + } + BufLd = Result; + } else { + assert(0 && "invalid type for buffer load."); + } + LI->replaceAllUsesWith(BufLd); + LI->eraseFromParent(); + } + } + } + return true; +} + +/// A pass that lowering typed buffer access into DXIL. +namespace { +class DXILTypedBufferLoweringLegacy : public ModulePass { +public: + bool runOnModule(Module &M) override { + dxil::Resources &Res = getAnalysis().getDXILResource(); + return lowerUAVAccess(M, Res); + } + StringRef getPassName() const override { return "DXIL TypedBuffer lowering"; } + DXILTypedBufferLoweringLegacy() : ModulePass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + } + + static char ID; // Pass identification. +}; +char DXILTypedBufferLoweringLegacy::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(DXILTypedBufferLoweringLegacy, DEBUG_TYPE, + "DXIL TypedBuffer lowering", false, false) +INITIALIZE_PASS_DEPENDENCY(DXILResourceWrapper) +INITIALIZE_PASS_END(DXILTypedBufferLoweringLegacy, DEBUG_TYPE, + "DXIL TypedBuffer lowering", false, false) + +ModulePass *llvm::createDXILTypedBufferLoweringLegacyPass() { + return new DXILTypedBufferLoweringLegacy(); +} diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h --- a/llvm/lib/Target/DirectX/DirectX.h +++ b/llvm/lib/Target/DirectX/DirectX.h @@ -34,6 +34,12 @@ /// Pass to lowering LLVM intrinsic call to DXIL op function call. ModulePass *createDXILOpLoweringLegacyPass(); +/// Initializer for DXILTypedBufferLowering +void initializeDXILTypedBufferLoweringLegacyPass(PassRegistry &); + +/// Pass to lowering TypedBuffer to DXIL op function call. +ModulePass *createDXILTypedBufferLoweringLegacyPass(); + /// Initializer for DXILTranslateMetadata. void initializeDXILTranslateMetadataPass(PassRegistry &); diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -43,6 +43,7 @@ initializeDXILOpLoweringLegacyPass(*PR); initializeDXILTranslateMetadataPass(*PR); initializeDXILResourceWrapperPass(*PR); + initializeDXILTypedBufferLoweringLegacyPass(*PR); } class DXILTargetObjectFile : public TargetLoweringObjectFile { @@ -72,6 +73,7 @@ FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; } void addCodeGenPrepare() override { + addPass(createDXILTypedBufferLoweringLegacyPass()); addPass(createDXILOpLoweringLegacyPass()); addPass(createDXILPrepareModulePass()); addPass(createDXILTranslateMetadataPass()); diff --git a/llvm/lib/Target/DirectX/MemAccessLowerHelper.h b/llvm/lib/Target/DirectX/MemAccessLowerHelper.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/DirectX/MemAccessLowerHelper.h @@ -0,0 +1,57 @@ +//===- Target/DirectX/MemAccessLowerHelper.h - Mem access helper --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utils to help lowering memory access for resources. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_DIRECTX_MEMACCESSLOWERHELPER_H +#define LLVM_TARGET_DIRECTX_MEMACCESSLOWERHELPER_H + +#include + +namespace llvm { + +class Value; +class Instruction; +class GlobalVariable; +class DataLayout; +class LLVMContext; +class GEPOperator; + +namespace dxil { + +namespace MemAccessLowerHelper { + +// For Buffer like RWBuffer U. +// Index is I for U[I]. +struct TypedBufMemOffset { + Value *Index = nullptr; + TypedBufMemOffset appendGEPOffset(GEPOperator *GEP, const DataLayout &DL); +}; + +template struct MemAccess { + OffsetT Offset; + Instruction *User; // The instruction which access memory like Load/Store. +}; + +using TypedBufMemAccess = MemAccess; + +/// Collect all memory access for typed buffer global +/// variable \c Ptr. +void collectMemAccess(llvm::Value *Ptr, + std::vector &AccessList, + const llvm::DataLayout &DL); + +} // namespace MemAccessLowerHelper + +} // namespace dxil + +} // namespace llvm + +#endif // LLVM_TARGET_DIRECTX_MEMACCESSLOWERHELPER_H diff --git a/llvm/lib/Target/DirectX/MemAccessLowerHelper.cpp b/llvm/lib/Target/DirectX/MemAccessLowerHelper.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/DirectX/MemAccessLowerHelper.cpp @@ -0,0 +1,69 @@ +//===- MemAccessLowerHelper.cpp - Mem access helper -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// Utils to help lowering memory access for resources. +//===----------------------------------------------------------------------===// + +#include "MemAccessLowerHelper.h" +#include "llvm/Analysis/Utils/Local.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" + +using namespace llvm; +using namespace llvm::dxil::MemAccessLowerHelper; + +TypedBufMemOffset +llvm::dxil::MemAccessLowerHelper::TypedBufMemOffset::appendGEPOffset( + GEPOperator *GEP, const DataLayout &DL) { + assert(GEP->getNumIndices() == 1); + Value *Idx = *GEP->idx_begin(); + return {Idx}; +} + +template +static void collectUserMemAccess(llvm::User *U, T &Offset, + std::vector> &AccessList, + const llvm::DataLayout &DL) { + if (auto *GEP = dyn_cast(U)) { + // Calculate new Offset. + T NewOffset = Offset.appendGEPOffset(GEP, DL); + + for (llvm::User *GEPU : GEP->users()) + collectUserMemAccess(GEPU, NewOffset, AccessList, DL); + } else if (isa(U)) { + for (User *AU : U->users()) + collectUserMemAccess(AU, Offset, AccessList, DL); + } else if (auto *LI = dyn_cast(U)) { + MemAccess Access = {Offset, LI}; + AccessList.emplace_back(Access); + } else if (auto *SI = dyn_cast(U)) { + MemAccess Access = {Offset, SI}; + AccessList.emplace_back(Access); + } else + llvm_unreachable("unsupported user"); +} + +/// Collect all memory access for non-legacy cbuffer/ typed buffer global +/// variable \c Ptr. +template +static void collectMemAccess(llvm::Value *Ptr, + std::vector> &AccessList, + const llvm::DataLayout &DL) { + for (llvm::User *U : Ptr->users()) { + T Offset; + ::collectUserMemAccess(U, Offset, AccessList, DL); + } +} + +void llvm::dxil::MemAccessLowerHelper::collectMemAccess( + llvm::Value *Ptr, std::vector &AccessList, + const llvm::DataLayout &DL) { + return ::collectMemAccess(Ptr, AccessList, DL); +} diff --git a/llvm/test/CodeGen/DirectX/typed_buf_ld_st.ll b/llvm/test/CodeGen/DirectX/typed_buf_ld_st.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/typed_buf_ld_st.ll @@ -0,0 +1,54 @@ +; RUN: opt -S -dxil-typedbuf-lower < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-compute" + +; Make sure generate create handle. +; CHECK-DAG:%[[HDL_IN:.+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 2, i1 false) +; CHECK-DAG:%[[HDL_OUT:.+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 3, i1 false) + +; Make sure generate Out[TID] = In[TID]. +; CHECK:%[[TID:.+]] = tail call i32 @llvm.dx.flattened.thread.id.in.group() +; CHECK:%[[LD:.+]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %[[HDL_IN]], i32 %[[TID]], i32 poison) +; CHECK:%[[LD_ELT:.+]] = extractvalue %dx.types.ResRet.f32 %[[LD]], 0 +; CHECK:call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle %[[HDL_OUT]], i32 %[[TID]], i32 poison, float %[[LD_ELT]], float poison, float poison, float poison, i8 1) + +%"class.hlsl::RWBuffer" = type { ptr } + +@In = internal global %"class.hlsl::RWBuffer" zeroinitializer, align 4 +@"?Out@@3V?$RWBuffer@M@hlsl@@A" = local_unnamed_addr global %"class.hlsl::RWBuffer" zeroinitializer, align 4 + +; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn +declare ptr @llvm.invariant.start.p0(i64 immarg %0, ptr nocapture %1) #0 + +; Function Attrs: mustprogress norecurse nounwind willreturn +define void @main() local_unnamed_addr #1 { +entry: + %0 = tail call ptr @llvm.dx.create.handle(i8 1) + store ptr %0, ptr @In, align 4 + %1 = tail call ptr @llvm.invariant.start.p0(i64 4, ptr nonnull @In) + %2 = tail call ptr @llvm.dx.create.handle(i8 1) + store ptr %2, ptr @"?Out@@3V?$RWBuffer@M@hlsl@@A", align 4 + %3 = tail call i32 @llvm.dx.flattened.thread.id.in.group() + %4 = load ptr, ptr @In, align 4 + %arrayidx.i.i = getelementptr inbounds float, ptr %4, i32 %3 + %5 = load float, ptr %arrayidx.i.i, align 4 + %arrayidx.i3.i = getelementptr inbounds float, ptr %2, i32 %3 + store float %5, ptr %arrayidx.i3.i, align 4 + ret void +} + +; Function Attrs: mustprogress nofree nosync nounwind readnone willreturn +declare i32 @llvm.dx.flattened.thread.id.in.group() #2 + +; Function Attrs: mustprogress nounwind willreturn +declare ptr @llvm.dx.create.handle(i8 %0) #3 + +attributes #0 = { argmemonly mustprogress nocallback nofree nosync nounwind willreturn } +attributes #1 = { mustprogress norecurse nounwind willreturn "frame-pointer"="all" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { mustprogress nofree nosync nounwind readnone willreturn } +attributes #3 = { mustprogress nounwind willreturn } + +!hlsl.uavs = !{!0, !1} + +!0 = !{ptr @In, !"RWBuffer", i32 0, i32 10, i32 2, i32 0} +!1 = !{ptr @"?Out@@3V?$RWBuffer@M@hlsl@@A", !"RWBuffer", i32 1, i32 10, i32 3, i32 0}