diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -27,6 +27,7 @@ def ThreadIdClass : dxil_class<"ThreadId">; def GroupIdClass : dxil_class<"GroupId">; def CBufferLoadClass : dxil_class<"CBufferLoad">; +def CBufferLoadLegacyClass : dxil_class<"CBufferLoadLegacy">; def CreateHandleClass : dxil_class<"CreateHandle">; def binary_uint : dxil_category<"Binary uint">; @@ -155,6 +156,14 @@ dxil_param<4, "i32", "alignment", "load access alignment", 1> ]>; +def CBufferLoadLegacy : dxil_op<"CBufferLoadLegacy", 59, CBufferLoadLegacyClass,Resources, "loads a value from a constant buffer resource", "half;float;double;i16;i32;i64;", "ro", + [ + dxil_param<0, "dx.types.CBufRet", "", "the value for the constant buffer variable">, + dxil_param<1, "i32", "opcode", "DXIL opcode">, + dxil_param<2, "dx.types.Handle", "srv", "cbuffer handle">, + dxil_param<3, "i32", "regIndex", "0-based index into cbuffer instance"> + ]>; + def CreateHandle : dxil_op< "CreateHandle", 57, CreateHandleClass, Resources, "creates the handle to a resource", "void;", "ro", [ diff --git a/llvm/lib/Target/DirectX/DXILCBufferLowering.cpp b/llvm/lib/Target/DirectX/DXILCBufferLowering.cpp --- a/llvm/lib/Target/DirectX/DXILCBufferLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILCBufferLowering.cpp @@ -35,14 +35,304 @@ namespace { struct BufAccess { Value *Index; // Address when access cbuf. + unsigned Channel; // Channel for legacy cbuf layout. Instruction *User; // The instruction which access cbuf. }; +class LegacyCBufferLayout { + struct LegacyStructLayout { + StructType *ST; + SmallVector Offsets; + unsigned Size; + std::pair getElementLegacyOffset(unsigned Idx) const { + assert(Idx < Offsets.size() && "Invalid element idx!"); + unsigned Offset = Offsets[Idx]; + unsigned Ch = Offset & (RowAlign - 1); + return std::make_pair((Offset - Ch) / RowAlign, Ch); + } + }; + +public: + LegacyCBufferLayout(const DataLayout &DL) : DL(DL) {} + int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef Indices, + unsigned &Ch); + Value *emitGEPOffset(IRBuilder<> *Builder, GEPOperator *GEPOp, unsigned &Ch); + +private: + unsigned applyRowAlign(unsigned Offset, Type *EltTy); + unsigned getTypeAllocSize(Type *Ty); + LegacyStructLayout &getStructLayout(StructType *ST); + const DataLayout &DL; + SmallDenseMap StructLayouts; + // 4 Dwords align. + static const unsigned RowAlign = 16; + static unsigned align(unsigned Offset, unsigned Alignment) { + return (Offset + Alignment - 1) / Alignment * Alignment; + } + static unsigned alignTo4Dwords(unsigned Offset) { + return align(Offset, RowAlign); + } +}; + } // namespace -static void collectBufUserAccess(User *U, Value *Addr, +unsigned LegacyCBufferLayout::applyRowAlign(unsigned Offset, Type *EltTy) { + unsigned AlignedOffset = alignTo4Dwords(Offset); + + if (AlignedOffset == Offset) + return Offset; + + if (isa(EltTy) || isa(EltTy)) + return AlignedOffset; + unsigned Size = DL.getTypeStoreSize(EltTy); + if ((Offset + Size) > AlignedOffset) + return AlignedOffset; + else + return Offset; +} + +unsigned LegacyCBufferLayout::getTypeAllocSize(Type *Ty) { + if (auto *ST = dyn_cast(Ty)) { + LegacyStructLayout &Layout = getStructLayout(ST); + return Layout.Size; + } else if (auto *AT = dyn_cast(Ty)) { + unsigned NumElts = AT->getNumElements(); + if (NumElts == 0) + return 0; + + unsigned EltSize = getTypeAllocSize(AT->getElementType()); + unsigned AlignedEltSize = alignTo4Dwords(EltSize); + // Each new element start 4 dwords aligned. + return AlignedEltSize * (NumElts - 1) + EltSize; + } else { + return DL.getTypeStoreSize(Ty); + } +} + +LegacyCBufferLayout::LegacyStructLayout & +LegacyCBufferLayout::getStructLayout(StructType *ST) { + auto it = StructLayouts.find(ST); + if (it != StructLayouts.end()) + return it->second; + + unsigned Offset = 0; + LegacyStructLayout Layout; + Layout.ST = ST; + for (Type *EltTy : ST->elements()) { + unsigned EltSize = getTypeAllocSize(EltTy); + if (unsigned ScalarSize = EltTy->getScalarSizeInBits()) + Offset = align(Offset, ScalarSize >> 3); + Offset = applyRowAlign(Offset, EltTy); + Layout.Offsets.emplace_back(Offset); + Offset += EltSize; + } + Layout.Size = Offset; + StructLayouts[ST] = Layout; + return StructLayouts[ST]; +} + +int64_t LegacyCBufferLayout::getIndexedOffsetInType(Type *ElemTy, + ArrayRef Indices, + unsigned &Ch) { + int64_t Result = 0; + + generic_gep_type_iterator GTI = + gep_type_begin(ElemTy, Indices), + GTE = gep_type_end(ElemTy, Indices); + // Mark if current GEP index Op is used for array. + // The first idxOp is already array indexing because it is like ptr[idx]. + bool IsArrayIndexing = true; + bool IsVectorIndexing = false; + for (; GTI != GTE; ++GTI) { + Value *Idx = GTI.getOperand(); + if (StructType *STy = GTI.getStructTypeOrNull()) { + assert(Idx->getType()->isIntegerTy(32) && "Illegal struct idx"); + unsigned FieldNo = cast(Idx)->getZExtValue(); + + // Get structure layout information... + const auto &Layout = getStructLayout(STy); + + // Add in the offset, as calculated by the structure layout info... + auto [Offset, Channel] = Layout.getElementLegacyOffset(FieldNo); + Result += Offset; + Ch = Channel; + } else { + // Get the array index and the size of each array element. + if (int64_t ArrayIdx = cast(Idx)->getSExtValue()) { + unsigned EltSize = getTypeAllocSize(GTI.getIndexedType()); + // Array indexing need 4 dwords align. + if (IsArrayIndexing) { + EltSize = alignTo4Dwords(EltSize); + Ch = 0; + } + + if (IsVectorIndexing) { + // Vector indexing only change channel, because vector will not cross + // a row, unless for 64bit types. + if (EltSize > 4) { + switch (ArrayIdx) { + case 0: + break; + case 1: + Ch += EltSize; + break; + case 2: + Result += 1; + break; + case 3: + Ch += EltSize; + Result += 1; + break; + default: + llvm_unreachable("invalid vector size"); + break; + } + } else { + Ch += ArrayIdx * EltSize; + } + } else { + Result += ArrayIdx * (EltSize / RowAlign); + } + } + } + IsArrayIndexing = isa(GTI.getIndexedType()); + IsVectorIndexing = isa(GTI.getIndexedType()); + } + + // Ch = Result & (RowAlign - 1); + return Result; // - Ch; +} + +Value *LegacyCBufferLayout::emitGEPOffset(IRBuilder<> *Builder, + GEPOperator *GEPOp, unsigned &Ch) { + Type *IntIdxTy = DL.getIndexType(GEPOp->getType()); + Value *Result = nullptr; + + // If the GEP is inbounds, we know that none of the addressing operations will + // overflow in a signed sense. + bool isInBounds = GEPOp->isInBounds(); + + // Build a mask for high order bits. + unsigned IntPtrWidth = IntIdxTy->getScalarType()->getIntegerBitWidth(); + uint64_t PtrSizeMask = + std::numeric_limits::max() >> (64 - IntPtrWidth); + // Mark if current GEP index Op is used for array. + // The first idxOp is already array indexing because it is like ptr+idx. + bool IsArrayIndexing = true; + bool IsVectorIndexing = false; + gep_type_iterator GTI = gep_type_begin(GEPOp); + for (User::op_iterator i = GEPOp->op_begin() + 1, e = GEPOp->op_end(); i != e; + ++i, ++GTI) { + Value *Op = *i; + uint64_t Size = getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; + // Array indexing need 4 dwords align. + if (IsArrayIndexing) { + Size = alignTo4Dwords(Size); + Ch = 0; + } + if (IsVectorIndexing) { + // Vector indexing only change channel, because vector will not cross + // a row, unless for double. + // FIXME: take care dynamic indexing on vector inside cbuffer. + Constant *OpC = cast(Op); + unsigned ArrayIdx = OpC->getUniqueInteger().getZExtValue(); + if (Size > 4) { + switch (ArrayIdx) { + case 0: + break; + case 1: + Ch += Size; + break; + case 2: + Result += 1; + break; + case 3: + Ch += Size; + Result += 1; + break; + default: + llvm_unreachable("invalid vector size"); + break; + } + } else { + Ch += OpC->getUniqueInteger().getZExtValue() * (Size / RowAlign); + } + continue; + } + + Value *Offset; + if (Constant *OpC = dyn_cast(Op)) { + if (OpC->isZeroValue()) + continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = GTI.getStructTypeOrNull()) { + Ch = 0; + uint64_t OpValue = OpC->getUniqueInteger().getZExtValue(); + // Get structure layout information... + const auto &Layout = getStructLayout(STy); + auto [EltOffset, Channel] = Layout.getElementLegacyOffset(OpValue); + Ch = Channel; + if (!EltOffset) + continue; + + Offset = ConstantInt::get(IntIdxTy, EltOffset); + } else { + // Splat the constant if needed. + if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy()) + OpC = ConstantVector::getSplat( + cast(IntIdxTy)->getElementCount(), OpC); + + Constant *Scale = ConstantInt::get(IntIdxTy, Size); + // >> 4 for 4 dwords each row. + Scale = ConstantExpr::getLShr(Scale, ConstantInt::get(IntIdxTy, 4)); + Constant *OC = + ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/); + Offset = + ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/); + } + } else { + // Splat the index if needed. + if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy()) + Op = Builder->CreateVectorSplat( + cast(IntIdxTy)->getNumElements(), Op); + + // Convert to correct type. + if (Op->getType() != IntIdxTy) + Op = Builder->CreateIntCast(Op, IntIdxTy, true, + Op->getName().str() + ".c"); + + // >> 4 for 4 dwords each row. + Size >>= 4; + Constant *Scale = ConstantInt::get(IntIdxTy, Size); + + if (Size != 1) { + // We'll let instcombine(mul) convert this to a shl if possible. + Op = Builder->CreateMul(Op, Scale, GEPOp->getName().str() + ".idx", + false /*NUW*/, isInBounds /*NSW*/); + } + Offset = Op; + } + + if (Result) + Result = + Builder->CreateAdd(Result, Offset, GEPOp->getName().str() + ".offs", + false /*NUW*/, isInBounds /*NSW*/); + else + Result = Offset; + IsArrayIndexing = isa(GTI.getIndexedType()); + IsVectorIndexing = isa(GTI.getIndexedType()); + } + return Result ? Result : Constant::getNullValue(IntIdxTy); +} + +static void collectBufUserAccess(User *U, Value *Addr, unsigned Channel, std::vector &AccessList, - const DataLayout &DL) { + LegacyCBufferLayout &LegacyCBLayout) { + bool IsZeroAddr = false; + if (auto *CAddr = dyn_cast(Addr)) + IsZeroAddr = CAddr->getLimitedValue() == 0; + if (auto *GEP = dyn_cast(U)) { // Calculate new Addr. Value *NewAddr = Addr; @@ -51,34 +341,42 @@ B.SetInsertPoint(Inst); if (GEP->hasAllConstantIndices()) { SmallVector IdxList(GEP->idx_begin(), GEP->idx_end()); - NewAddr = B.CreateAdd(Addr, B.getInt32(DL.getIndexedOffsetInType( - GEP->getSourceElementType(), IdxList))); + unsigned Index = LegacyCBLayout.getIndexedOffsetInType( + GEP->getSourceElementType(), IdxList, Channel); + if (IsZeroAddr) + NewAddr = B.getInt32(Index); + else + NewAddr = B.CreateAdd(Addr, B.getInt32(Index)); } else { - Value *Offset = EmitGEPOffset(&B, DL, GEP, /*NoAssumptions=*/true); - NewAddr = B.CreateAdd(Addr, Offset); + Value *Offset = LegacyCBLayout.emitGEPOffset(&B, GEP, Channel); + if (IsZeroAddr) + NewAddr = Offset; + else + NewAddr = B.CreateAdd(Addr, Offset); } for (User *GEPU : GEP->users()) { - collectBufUserAccess(GEPU, NewAddr, AccessList, DL); + collectBufUserAccess(GEPU, NewAddr, Channel, AccessList, LegacyCBLayout); } } else if (isa(U) || isa(U)) { for (User *AU : U->users()) { - collectBufUserAccess(AU, Addr, AccessList, DL); + collectBufUserAccess(AU, Addr, Channel, AccessList, LegacyCBLayout); } } else if (auto *LI = dyn_cast(U)) { - BufAccess Access = {Addr, LI}; + BufAccess Access = {Addr, Channel, LI}; AccessList.emplace_back(Access); } else llvm_unreachable("unsupported user"); } -static std::vector collectBufAccess(GlobalVariable *GV, - const DataLayout &DL) { +static std::vector +collectBufAccess(GlobalVariable *GV, LegacyCBufferLayout &LegacyCBLayout) { auto &Ctx = GV->getContext(); Value *OffsetZero = ConstantInt::get(Type::getInt32Ty(Ctx), 0); std::vector AccessList; for (User *U : GV->users()) { - collectBufUserAccess(U, OffsetZero, AccessList, DL); + unsigned Channel = 0; + collectBufUserAccess(U, OffsetZero, Channel, AccessList, LegacyCBLayout); } return AccessList; } @@ -90,14 +388,13 @@ if (!ResTable) return false; const DataLayout &DL = M.getDataLayout(); + LegacyCBufferLayout LegacyCBLayout(DL); for (auto *Res : ResTable->operands()) { assert(Res->getNumOperands() == 5 && "invalid resource metadata"); auto *GVMD = cast(Res->getOperand(0).get()); auto *GV = cast(GVMD->getValue()); - assert(GV->getAddressSpace() == DXIL::CBufferAddrSpace && - "invalid global variable for cbuffer"); - std::vector AccessList = collectBufAccess(GV, DL); + std::vector AccessList = collectBufAccess(GV, LegacyCBLayout); SmallDenseMap HandleMap; uint64_t RangeID = @@ -127,30 +424,45 @@ IRBuilder<> B(LI); DXILOpBuilder DXILB(M, B); + unsigned Ch = Access.Channel; + unsigned EltSizeInBytes = Ty->getScalarSizeInBits() >> 3; + Ch /= EltSizeInBytes; if (Ty->isIntegerTy() || Ty->isFloatingPointTy()) { - CBLd = DXILB.createCBufferLoad(Ty, Hdl, Index, - DL.getPrefTypeAlign(Ty).value()); + CBLd = DXILB.createCBufferLoadLegacy(Ty, Hdl, Index); + CBLd = B.CreateExtractValue(CBLd, Ch); } else if (isa(Ty)) { // Only support fixed vectory type. auto *VT = cast(Ty); - VT; Value *Result = PoisonValue::get(VT); Type *EltTy = VT->getElementType(); - uint64_t Align = DL.getPrefTypeAlign(EltTy).value(); - for (unsigned i = 0; i < VT->getNumElements(); ++i) { - Value *Offset = - B.CreateAdd(Index, B.getInt32(i * DL.getTypeAllocSize(EltTy))); - Value *Elt = DXILB.createCBufferLoad(EltTy, Hdl, Offset, Align); - Result = B.CreateInsertElement(Result, Elt, i); + + CBLd = DXILB.createCBufferLoadLegacy(EltTy, Hdl, Index); + unsigned EltSize = DL.getTypeAllocSize(EltTy).getFixedSize(); + // Cross 4 Dwords. + if (EltSize > 4 && VT->getNumElements() > 2) { + for (unsigned i = 0; i < 2; ++i) { + Value *Elt = B.CreateExtractValue(CBLd, (i + Ch)); + Result = B.CreateInsertElement(Result, Elt, i); + } + Index = B.CreateAdd(Index, ConstantInt::get(Index->getType(), 1)); + CBLd = DXILB.createCBufferLoadLegacy(EltTy, Hdl, Index); + for (unsigned i = 0; i < VT->getNumElements() - 2; ++i) { + Value *Elt = B.CreateExtractValue(CBLd, (i + Ch)); + Result = B.CreateInsertElement(Result, Elt, i + 2); + } + } else { + for (unsigned i = 0; i < VT->getNumElements(); ++i) { + Value *Elt = B.CreateExtractValue(CBLd, (i + Ch)); + Result = B.CreateInsertElement(Result, Elt, i); + } } CBLd = Result; } else { + llvm_unreachable("fail to SROA"); } LI->replaceAllUsesWith(CBLd); LI->eraseFromParent(); } - - return false; } return true; } diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h --- a/llvm/lib/Target/DirectX/DXILOpBuilder.h +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h @@ -39,6 +39,8 @@ bool NonUniformIndex); CallInst *createCBufferLoad(Type *OverloadTy, Value *Hdl, Value *ByteOffset, uint32_t Alignment); + CallInst *createCBufferLoadLegacy(Type *OverloadTy, Value *Hdl, + Value *RegIndex); private: Module &M; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -182,6 +182,40 @@ return getOrCreateStructType(TypeName, FieldTypes, Ctx); } +static StructType *getCBufRetType(Type *OverloadTy, LLVMContext &Ctx) { + OverloadKind Kind = getOverloadKind(OverloadTy); + std::string TypeName = constructOverloadTypeName(Kind, "dx.types.CBufRet."); + unsigned OverloadSize = 0; + if (OverloadTy->isFloatTy()) + OverloadSize = 32; + else if (OverloadTy->isDoubleTy()) + OverloadSize = 64; + else if (OverloadTy->isHalfTy()) + OverloadSize = 16; + else + OverloadSize = cast(OverloadTy)->getBitWidth(); + + switch (OverloadSize) { + case 32: { + Type *FieldTypes[4] = {OverloadTy, OverloadTy, OverloadTy, OverloadTy}; + return getOrCreateStructType(TypeName, FieldTypes, Ctx); + } + case 64: { + Type *FieldTypes[2] = {OverloadTy, OverloadTy}; + return getOrCreateStructType(TypeName, FieldTypes, Ctx); + } + case 16: { + TypeName += ".8"; // dx.types.CBufRet.fp16.8 for buffer of 8 halves + Type *FieldTypes[8] = {OverloadTy, OverloadTy, OverloadTy, OverloadTy, + OverloadTy, OverloadTy, OverloadTy, OverloadTy}; + return getOrCreateStructType(TypeName, FieldTypes, Ctx); + } + default: + llvm_unreachable("invalid Overload type"); + return nullptr; + } +} + static StructType *getHandleType(LLVMContext &Ctx) { return getOrCreateStructType("dx.types.Handle", Type::getInt8PtrTy(Ctx), Ctx); } @@ -213,6 +247,8 @@ return getResRetType(OverloadTy, Ctx); case ParameterKind::DXIL_HANDLE: return getHandleType(Ctx); + case ParameterKind::CBUFFER_RET: + return getCBufRetType(OverloadTy, Ctx); default: break; } @@ -339,5 +375,13 @@ ByteOffset, B.getInt32(Alignment)}); } +CallInst *DXILOpBuilder::createCBufferLoadLegacy(Type *OverloadTy, Value *Hdl, + Value *RegIndex) { + auto Fn = + getOrCreateDXILOpFunction(DXIL::OpCode::CBufferLoadLegacy, OverloadTy, M); + return B.CreateCall(Fn, {B.getInt32((int32_t)DXIL::OpCode::CBufferLoadLegacy), + Hdl, RegIndex}); +} + } // namespace DXIL } // namespace llvm diff --git a/llvm/test/CodeGen/DirectX/cbuf.ll b/llvm/test/CodeGen/DirectX/cbuf.ll deleted file mode 100644 --- a/llvm/test/CodeGen/DirectX/cbuf.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: opt -S -dxil-cbuf-lower < %s | FileCheck %s -target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" -target triple = "dxil-unknown-shadermodel6.7-library" - -; Make sure generate create handle. -; CHECK:%[[HDL:.+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 2, i1 false) - -; Make sure load at offset 0/8/20 for @A.cb.float/double/<2 x i32>.y -; CHECK:call float @dx.op.cbufferLoad.f32(i32 58, %dx.types.Handle %[[HDL]], i32 0, i32 4) -; CHECK:call double @dx.op.cbufferLoad.f64(i32 58, %dx.types.Handle %[[HDL]], i32 8, i32 8) -; CHECK:call i32 @dx.op.cbufferLoad.i32(i32 58, %dx.types.Handle %[[HDL]], i32 20, i32 4) -@A.cb. = external addrspace(4) constant { float, i32, double, <2 x i32> } - -; Function Attrs: noinline nounwind optnone -define noundef float @"?foo@@YAMXZ"() #0 { -entry: - %0 = load float, ptr addrspacecast (ptr addrspace(4) @A.cb. to ptr), align 4 - %conv = fpext float %0 to double - %1 = load double, ptr addrspacecast (ptr addrspace(4) getelementptr inbounds ({ float, i32, double, <2 x i32> }, ptr addrspace(4) @A.cb., i32 0, i32 2) to ptr), align 8 - %2 = load <2 x i32>, ptr addrspacecast (ptr addrspace(4) getelementptr inbounds ({ float, i32, double, <2 x i32> }, ptr addrspace(4) @A.cb., i32 0, i32 3) to ptr), align 8 - %3 = extractelement <2 x i32> %2, i32 1 - %conv1 = sitofp i32 %3 to double - %4 = call double @llvm.fmuladd.f64(double %1, double %conv1, double %conv) - %conv2 = fptrunc double %4 to float - ret float %conv2 -} - -; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn -declare double @llvm.fmuladd.f64(double, double, double) #1 - -attributes #0 = { noinline nounwind } -attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } - -!hlsl.cbufs = !{!1} - -!1 = !{ptr addrspace(4) @A.cb., !"A.cb.ty", i32 0, i32 2, i32 1} diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll @@ -0,0 +1,105 @@ +; RUN: opt -S -dxil-cbuf-lower < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-library" + +@A.cb. = external local_unnamed_addr constant { float, double, float, half, i16, i64, i32 } + +; Make sure first float is load from cb[0].x. +; CHECK:float @fooA0() +; CHECK-NEXT:entry +; CHECK-NEXT:%0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT:%1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 0) +; CHECK-NEXT:%2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT:ret float %2 +define noundef float @fooA0() local_unnamed_addr { +entry: + %0 = load float, ptr @A.cb., align 8 + ret float %0 +} + +; Make sure double is load from cb[0].zw. +; CHECK:double @fooA1() +; CHECK-NEXT:entry +; CHECK-NEXT:%0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT:%1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 0) +; CHECK-NEXT:%2 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT:ret double %2 +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define noundef double @fooA1() local_unnamed_addr { +entry: + %0 = load double, ptr getelementptr inbounds ({ float, double, float, half, i16, i64, i32 }, ptr @A.cb., i32 0, i32 1), align 8 + ret double %0 +} + +; Make sure second float is load from cb[1].x. +; CHECK:float @fooA2() +; CHECK-NEXT:entry +; CHECK-NEXT:%0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT:%1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 1) +; CHECK-NEXT:%2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT:ret float %2 +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define noundef float @fooA2() local_unnamed_addr { +entry: + %0 = load float, ptr getelementptr inbounds ({ float, double, float, half, i16, i64, i32 }, ptr @A.cb., i32 0, i32 2), align 8 + ret float %0 +} + +; Make sure half is load from low16bit of cb[1].y. +; CHECK:half @fooA3() +; CHECK-NEXT:entry +; CHECK-NEXT:%0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT:%1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 1) +; CHECK-NEXT:%2 = extractvalue %dx.types.CBufRet.f16.8 %1, 2 +; CHECK-NEXT:ret half %2 +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define noundef half @fooA3() local_unnamed_addr { +entry: + %0 = load half, ptr getelementptr inbounds ({ float, double, float, half, i16, i64, i32 }, ptr @A.cb., i32 0, i32 3), align 4 + ret half %0 +} + +; Make sure i16 is load from high16bit of cb[1].y. +; CHECK:signext i16 @fooA4() +; CHECK-NEXT:entry +; CHECK-NEXT:%0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT:%1 = call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %0, i32 1) +; CHECK-NEXT:%2 = extractvalue %dx.types.CBufRet.i16.8 %1, 3 +; CHECK-NEXT:ret i16 %2 +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define noundef signext i16 @fooA4() local_unnamed_addr { +entry: + %0 = load i16, ptr getelementptr inbounds ({ float, double, float, half, i16, i64, i32 }, ptr @A.cb., i32 0, i32 4), align 2 + ret i16 %0 +} + +; Make sure i64 is load from cb[1].zw. +; CHECK:i64 @fooA5() +; CHECK-NEXT:entry +; CHECK-NEXT:%0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT:%1 = call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %0, i32 1) +; CHECK-NEXT:%2 = extractvalue %dx.types.CBufRet.i64 %1, 1 +; CHECK-NEXT:ret i64 %2 +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define noundef i64 @fooA5() local_unnamed_addr { +entry: + %0 = load i64, ptr getelementptr inbounds ({ float, double, float, half, i16, i64, i32 }, ptr @A.cb., i32 0, i32 5), align 8 + ret i64 %0 +} + +; Make sure i32 is load from cb[2].x. +; CHECK:i32 @fooA6() +; CHECK-NEXT:entry +; CHECK-NEXT:%0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT:%1 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %0, i32 2) +; CHECK-NEXT:%2 = extractvalue %dx.types.CBufRet.i32 %1, 0 +; CHECK-NEXT:ret i32 %2 +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define noundef i32 @fooA6() local_unnamed_addr { +entry: + %0 = load i32, ptr getelementptr inbounds ({ float, double, float, half, i16, i64, i32 }, ptr @A.cb., i32 0, i32 6), align 8 + ret i32 %0 +} + +!hlsl.cbufs = !{!0} +!0 = !{ptr @A.cb., !"A.cb.ty", i32 0, i32 -1, i32 0} diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll @@ -0,0 +1,150 @@ +; RUN: opt -S -dxil-cbuf-lower < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-library" + +@B.cb. = external local_unnamed_addr constant { double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> } + +; Make sure first double load from C[0].xy +; CHECK:double @fooB0() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 0) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: ret double %2 +define noundef double @fooB0() local_unnamed_addr { +entry: + %0 = load double, ptr @B.cb., align 32 + ret double %0 +} + +; Make sure <3 x float> load from C[1].xyz +; CHECK:<3 x float> @fooB1() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: %3 = insertelement <3 x float> poison, float %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f32 %1, 1 +; CHECK-NEXT: %5 = insertelement <3 x float> %3, float %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f32 %1, 2 +; CHECK-NEXT: %7 = insertelement <3 x float> %5, float %6, i64 2 +; CHECK-NEXT: ret <3 x float> %7 +define noundef <3 x float> @fooB1() local_unnamed_addr { +entry: + %0 = load <3 x float>, ptr getelementptr inbounds ({ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }, ptr @B.cb., i32 0, i32 1), align 16 + ret <3 x float> %0 +} + +; Make sure first float load from C[1].w +; CHECK: float @fooB2() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 3 +; CHECK-NEXT: ret float %2 +define noundef float @fooB2() local_unnamed_addr { +entry: + %0 = load float, ptr getelementptr inbounds ({ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }, ptr @B.cb., i32 0, i32 2), align 32 + ret float %0 +} + +; Make sure <3 x double> load from C[2].xyzw and C[3].xy +; CHECK: <3 x double> @fooB3() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 2) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: %3 = insertelement <3 x double> poison, double %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: %5 = insertelement <3 x double> %3, double %4, i64 1 +; CHECK-NEXT: %6 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 3) +; CHECK-NEXT: %7 = extractvalue %dx.types.CBufRet.f64 %6, 0 +; CHECK-NEXT: %8 = insertelement <3 x double> %5, double %7, i64 2 +; CHECK-NEXT: ret <3 x double> %8 +define noundef <3 x double> @fooB3() local_unnamed_addr { +entry: + %0 = load <3 x double>, ptr getelementptr inbounds ({ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }, ptr @B.cb., i32 0, i32 3), align 32 + ret <3 x double> %0 +} + +; Make sure half load from low16bit of C[3].z +;CHECK: half @fooB4() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 3) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 4 +; CHECK-NEXT: ret half %2 +define noundef half @fooB4() local_unnamed_addr { +entry: + %0 = load half, ptr getelementptr inbounds ({ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }, ptr @B.cb., i32 0, i32 4), align 32 + ret half %0 +} + +; Make sure <2 x double> load from C[4].xyzw +; CHECK: <2 x double> @fooB5() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 4) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: %3 = insertelement <2 x double> poison, double %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: %5 = insertelement <2 x double> %3, double %4, i64 1 +; CHECK-NEXT: ret <2 x double> %5 +define noundef <2 x double> @fooB5() local_unnamed_addr { +entry: + %0 = load <2 x double>, ptr getelementptr inbounds ({ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }, ptr @B.cb., i32 0, i32 5), align 16 + ret <2 x double> %0 +} + +; Make sure second float load from C[5].x +; CHECK:float @fooB6() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 5) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: ret float %2 +define noundef float @fooB6() local_unnamed_addr { +entry: + %0 = load float, ptr getelementptr inbounds ({ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }, ptr @B.cb., i32 0, i32 6), align 32 + ret float %0 +} + +; Make sure first <3 x half> load from C[5].y and low 16bit of C[5].z +;CHECK: <3 x half> @fooB7() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 5) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 2 +; CHECK-NEXT: %3 = insertelement <3 x half> poison, half %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f16.8 %1, 3 +; CHECK-NEXT: %5 = insertelement <3 x half> %3, half %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f16.8 %1, 4 +; CHECK-NEXT: %7 = insertelement <3 x half> %5, half %6, i64 2 +; CHECK-NEXT: ret <3 x half> %7 +define noundef <3 x half> @fooB7() local_unnamed_addr { +entry: + %0 = load <3 x half>, ptr getelementptr inbounds ({ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }, ptr @B.cb., i32 0, i32 7), align 8 + ret <3 x half> %0 +} + +; Make sure second <3 x half> load from high 16bit of C[5].z and C[5].w +;CHECK: <3 x half> @fooB8() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 5) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 5 +; CHECK-NEXT: %3 = insertelement <3 x half> poison, half %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f16.8 %1, 6 +; CHECK-NEXT: %5 = insertelement <3 x half> %3, half %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f16.8 %1, 7 +; CHECK-NEXT: %7 = insertelement <3 x half> %5, half %6, i64 2 +; CHECK-NEXT: ret <3 x half> %7 +define noundef <3 x half> @fooB8() local_unnamed_addr { +entry: + %0 = load <3 x half>, ptr getelementptr inbounds ({ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }, ptr @B.cb., i32 0, i32 8), align 16 + ret <3 x half> %0 +} + +!hlsl.cbufs = !{!0} + +!0 = !{ptr @B.cb., !"B.cb.ty", i32 0, i32 -1, i32 0} diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll @@ -0,0 +1,203 @@ +; RUN: opt -S -dxil-cbuf-lower < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-library" + +@B.cb. = external local_unnamed_addr constant { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> } +@B.cb..1 = external local_unnamed_addr constant { [3 x <3 x double>], <3 x half> } + +; Make sure indexing [2 x double] from C[0].xy +; CHECK: double @fooB0(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %arrayidx = getelementptr inbounds [2 x double], ptr @B.cb., i32 0, i32 %i +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 %i) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: ret double %2 +define noundef double @fooB0(i32 noundef %i) local_unnamed_addr { +entry: + %arrayidx = getelementptr inbounds [2 x double], ptr @B.cb., i32 0, i32 %i + %0 = load double, ptr %arrayidx, align 8 + ret double %0 +} + +; Make sure indexing [3 x <3 x float>] from C[2].xyz +; CHECK: <3 x float> @fooB1(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %arrayidx.offs = add nsw i32 2, %i +; CHECK-NEXT: %arrayidx = getelementptr inbounds { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 1, i32 %i +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 %arrayidx.offs) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: %3 = insertelement <3 x float> poison, float %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f32 %1, 1 +; CHECK-NEXT: %5 = insertelement <3 x float> %3, float %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f32 %1, 2 +; CHECK-NEXT: %7 = insertelement <3 x float> %5, float %6, i64 2 +; CHECK-NEXT: ret <3 x float> %7 +define noundef <3 x float> @fooB1(i32 noundef %i) local_unnamed_addr { +entry: + %arrayidx = getelementptr inbounds { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 1, i32 %i + %0 = load <3 x float>, ptr %arrayidx, align 16 + ret <3 x float> %0 +} + +; Make sure load first float from C[4].z +; CHECK: float @fooB2() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 4) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 3 +; CHECK-NEXT: ret float %2 +define noundef float @fooB2() local_unnamed_addr { +entry: + %0 = load float, ptr getelementptr inbounds ({ [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 2), align 16 + ret float %0 +} + +; Make sure indexing [3 x double] from C[5].xy. +; CHECK: double @fooB3(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %arrayidx.offs = add nsw i32 5, %i +; CHECK-NEXT: %arrayidx = getelementptr inbounds { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 3, i32 %i +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 %arrayidx.offs) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: ret double %2 +define noundef double @fooB3(i32 noundef %i) local_unnamed_addr { +entry: + %arrayidx = getelementptr inbounds { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 3, i32 %i + %0 = load double, ptr %arrayidx, align 8 + ret double %0 +} + +; Make sure load half from low16bit of C[7].z +; CHECK:half @fooB4() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 7) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 4 +; CHECK-NEXT: ret half %2 +define noundef half @fooB4() local_unnamed_addr { +entry: + %0 = load half, ptr getelementptr inbounds ({ [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 4), align 16 + ret half %0 +} + +; Make sure indexing [1 x <2 x double>] from C[8].xy +; CHECK: <2 x double> @fooB5(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %arrayidx.offs = add nsw i32 8, %i +; CHECK-NEXT: %arrayidx = getelementptr inbounds { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 5, i32 %i +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 %arrayidx.offs) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: %3 = insertelement <2 x double> poison, double %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: %5 = insertelement <2 x double> %3, double %4, i64 1 +; CHECK-NEXT: ret <2 x double> %5 +define noundef <2 x double> @fooB5(i32 noundef %i) local_unnamed_addr { +entry: + %arrayidx = getelementptr inbounds { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 5, i32 %i + %0 = load <2 x double>, ptr %arrayidx, align 16 + ret <2 x double> %0 +} + +; Make sure second fload load from C[9].x +; CHECK: float @fooB6() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 9) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: ret float %2 +define noundef float @fooB6() local_unnamed_addr { +entry: + %0 = load float, ptr getelementptr inbounds ({ [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 6), align 16 + ret float %0 +} + +; Make sure indexing [2 x <3 x half>] from C[10].x and low 16bit of C[10].y +; CHECK: <3 x half> @fooB7(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %arrayidx.offs = add nsw i32 10, %i +; CHECK-NEXT: %arrayidx = getelementptr inbounds { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 7, i32 %i +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 %arrayidx.offs) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 0 +; CHECK-NEXT: %3 = insertelement <3 x half> poison, half %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f16.8 %1, 1 +; CHECK-NEXT: %5 = insertelement <3 x half> %3, half %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f16.8 %1, 2 +; CHECK-NEXT: %7 = insertelement <3 x half> %5, half %6, i64 2 +; CHECK-NEXT: ret <3 x half> %7 +define noundef <3 x half> @fooB7(i32 noundef %i) local_unnamed_addr { +entry: + %arrayidx = getelementptr inbounds { [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 7, i32 %i + %0 = load <3 x half>, ptr %arrayidx, align 8 + ret <3 x half> %0 +} + +; Make sure load half3 from high 16bit of C[11].y and C[11].z +; CHECK: <3 x half> @fooB8() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 11) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 3 +; CHECK-NEXT: %3 = insertelement <3 x half> poison, half %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f16.8 %1, 4 +; CHECK-NEXT: %5 = insertelement <3 x half> %3, half %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f16.8 %1, 5 +; CHECK-NEXT: %7 = insertelement <3 x half> %5, half %6, i64 2 +; CHECK-NEXT: ret <3 x half> %7 +define noundef <3 x half> @fooB8() local_unnamed_addr { +entry: + %0 = load <3 x half>, ptr getelementptr inbounds ({ [2 x double], [3 x <3 x float>], float, [3 x double], half, [1 x <2 x double>], float, [2 x <3 x half>], <3 x half> }, ptr @B.cb., i32 0, i32 8), align 8 + ret <3 x half> %0 +} + +; Make sure indexing [3 x <3 x double>] from C1[0]. +; CHECK: <3 x double> @fooB9(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 1, i32 -1, i1 false) +; Make sure each row is 2x16dwords. +; CHECK-NEXT: %arrayidx.idx = mul nsw i32 %i, 2 +; CHECK-NEXT: %arrayidx = getelementptr inbounds [3 x <3 x double>], ptr @B.cb..1, i32 0, i32 %i +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 %arrayidx.idx) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: %3 = insertelement <3 x double> poison, double %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: %5 = insertelement <3 x double> %3, double %4, i64 1 +; Next row for z of <3 x double> +; CHECK-NEXT: %6 = add i32 %arrayidx.idx, 1 +; CHECK-NEXT: %7 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 %6) +; CHECK-NEXT: %8 = extractvalue %dx.types.CBufRet.f64 %7, 0 +; CHECK-NEXT: %9 = insertelement <3 x double> %5, double %8, i64 2 +; CHECK-NEXT: ret <3 x double> %9 +define noundef <3 x double> @fooB9(i32 noundef %i) local_unnamed_addr { +entry: + %arrayidx = getelementptr inbounds [3 x <3 x double>], ptr @B.cb..1, i32 0, i32 %i + %loadVec3 = load <3 x double>, ptr %arrayidx, align 32 + ret <3 x double> %loadVec3 +} + +; Make sure load half3 from C1[5].z and low 16bit of C1[5].w +; CHECK: <3 x half> @fooB10() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 1, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 5) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 4 +; CHECK-NEXT: %3 = insertelement <3 x half> poison, half %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f16.8 %1, 5 +; CHECK-NEXT: %5 = insertelement <3 x half> %3, half %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f16.8 %1, 6 +; CHECK-NEXT: %7 = insertelement <3 x half> %5, half %6, i64 2 +; CHECK-NEXT: ret <3 x half> %7 +define noundef <3 x half> @fooB10() local_unnamed_addr { +entry: + %loadVec3 = load <3 x half>, ptr getelementptr inbounds ({ [3 x <3 x double>], <3 x half> }, ptr @B.cb..1, i32 0, i32 1), align 32 + ret <3 x half> %loadVec3 +} + +!hlsl.cbufs = !{!0, !1} + +!0 = !{ptr @B.cb., !"B.cb.ty", i32 0, i32 -1, i32 0} +!1 = !{ptr @B.cb..1, !"B.cb.ty", i32 1, i32 -1, i32 0} diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll @@ -0,0 +1,554 @@ +; RUN: opt -S -dxil-cbuf-lower < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-unknown-shadermodel6.7-library" + +; struct A { +; float A0; // Offset 0 +; double A1; // Offset 0, ch zw +; float A2; // Offest 1 +; half A3; // Offset 1, ch low16 y +; int16_t A4; // Offset 1, ch high16 y +; int64_t A5; // Offset 1, ch zw +; int A6; // Offset 2, +; }; // Next offset at 2.y +; +; struct B { +; double B0; // Offset 0 +; float3 B1; // Offset 1 +; float B2; // Offset 1, ch 3 +; double3 B3; // Offset 2 +; half B4; // Offset 3.z low16 +; double2 B5; // Offset 4 +; float B6; // Offset 5 +; half3 B7; // Offset 5, ch y and low16 of z +; half3 B8; // Offset 5, ch high16 z and w +; }; // Next offset 6 +; +; struct C { +; A C0; // Offest 0, size 3 +; float C1[1]; // Offset 3, +; B C2[2]; // Offset 4, size 6 * 2 +; half C3; // Offset 16 +; }; // Next offset 16, high 16 of x +; +; cbuffer D { +; int D0; // Offset 0 +; B D1; // Offest 1, Size 6 +; half D2; // Offset 7 +; C D3; // Offset 8, Size 16.high 16x +; double D4; // Offset 24, ch zw +; } + +%struct.B = type <{ double, <3 x float>, float, <3 x double>, half, <2 x double>, float, <3 x half>, <3 x half> }> +%struct.C = type <{ %struct.A, [1 x float], [2 x %struct.B], half }> +%struct.A = type <{ float, double, float, half, i16, i64, i32 }> + +@D.cb. = external local_unnamed_addr constant { i32, %struct.B, half, %struct.C, double } + +; Make sure D0 load from C[0].x +; CHECK: i32 @fooD0() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %0, i32 0) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.i32 %1, 0 +; CHECK-NEXT: ret i32 %2 +define noundef i32 @fooD0() local_unnamed_addr { +entry: + %0 = load i32, ptr @D.cb., align 8 + ret i32 %0 +} + +; Make sure D2 load from low 16bit of C[7].x +; CHECK: half @fooD2() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 7) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 0 +; CHECK-NEXT: ret half %2 +define noundef half @fooD2() local_unnamed_addr { +entry: + %0 = load half, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 2), align 2 + ret half %0 +} + +; Mae sure D4 load from C[24].zw +; CHECK: double @fooD4() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 24) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: ret double %2 +define noundef double @fooD4() local_unnamed_addr { +entry: + %0 = load double, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 4), align 8 + ret double %0 +} + +; Make sure D1.B0 load from C[1].xy +; CHECK: double @fooD1_B0() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: ret double %2 +define noundef double @fooD1_B0() local_unnamed_addr { +entry: + %0 = load double, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 1), align 4 + ret double %0 +} + +; Make sure D1.B1 load from C[2].xyz +; CHECK: <3 x float> @fooD1_B1() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 2) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: %3 = insertelement <3 x float> poison, float %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f32 %1, 1 +; CHECK-NEXT: %5 = insertelement <3 x float> %3, float %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f32 %1, 2 +; CHECK-NEXT: %7 = insertelement <3 x float> %5, float %6, i64 2 +; CHECK-NEXT: ret <3 x float> %7 +define noundef <3 x float> @fooD1_B1() local_unnamed_addr { +entry: + %0 = load <3 x float>, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 1, i32 1), align 4 + ret <3 x float> %0 +} + +; Make sure D1.B2 load from C[2].w +; CHECK: float @fooD1_B2() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 2) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 3 +; CHECK-NEXT: ret float %2 +define noundef float @fooD1_B2() local_unnamed_addr { +entry: + %0 = load float, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 1, i32 2), align 4 + ret float %0 +} + +; Make sure D1.B3 load from C[3].xyzw and C[4].xy +; CHECK: <3 x double> @fooD1_B3() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 3) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: %3 = insertelement <3 x double> poison, double %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: %5 = insertelement <3 x double> %3, double %4, i64 1 +; CHECK-NEXT: %6 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 4) +; CHECK-NEXT: %7 = extractvalue %dx.types.CBufRet.f64 %6, 0 +; CHECK-NEXT: %8 = insertelement <3 x double> %5, double %7, i64 2 +; CHECK-NEXT: ret <3 x double> %8 +define noundef <3 x double> @fooD1_B3() local_unnamed_addr { +entry: + %0 = load <3 x double>, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 1, i32 3), align 8 + ret <3 x double> %0 +} + +; Make sure D1.B4 load from low 16bit of C[4].z +; CHECK: half @fooD1_B4() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 4) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 4 +; CHECK-NEXT: ret half %2 +; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn +define noundef half @fooD1_B4() local_unnamed_addr { +entry: + %0 = load half, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 1, i32 4), align 8 + ret half %0 +} + +; Make sure D1.B5 load from C[5].xyzw +; CHECK: <2 x double> @fooD1_B5() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 5) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: %3 = insertelement <2 x double> poison, double %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: %5 = insertelement <2 x double> %3, double %4, i64 1 +; CHECK-NEXT: ret <2 x double> %5 +define noundef <2 x double> @fooD1_B5() local_unnamed_addr { +entry: + %0 = load <2 x double>, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 1, i32 5), align 2 + ret <2 x double> %0 +} + +; Make sure D1.B6 load from C[6].x +; CHECK: float @fooD1_B6() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 6) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: ret float %2 +define noundef float @fooD1_B6() local_unnamed_addr { +entry: + %0 = load float, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 1, i32 6), align 2 + ret float %0 +} + +; Make sure D1.B7 load from low 16bit of C[6].z and C[6].y +; CHECK: <3 x half> @fooD1_B7() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 6) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 2 +; CHECK-NEXT: %3 = insertelement <3 x half> poison, half %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f16.8 %1, 3 +; CHECK-NEXT: %5 = insertelement <3 x half> %3, half %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f16.8 %1, 4 +; CHECK-NEXT: %7 = insertelement <3 x half> %5, half %6, i64 2 +; CHECK-NEXT: ret <3 x half> %7 +define noundef <3 x half> @fooD1_B7() local_unnamed_addr { +entry: + %0 = load <3 x half>, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 1, i32 7), align 2 + ret <3 x half> %0 +} + +; Make sure D1.B8 load from high 16bit of C[6].z and C[6].w +; CHECK: <3 x half> @fooD1_B8() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 6) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 5 +; CHECK-NEXT: %3 = insertelement <3 x half> poison, half %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f16.8 %1, 6 +; CHECK-NEXT: %5 = insertelement <3 x half> %3, half %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f16.8 %1, 7 +; CHECK-NEXT: %7 = insertelement <3 x half> %5, half %6, i64 2 +; CHECK-NEXT: ret <3 x half> %7 +define noundef <3 x half> @fooD1_B8() local_unnamed_addr { +entry: + %0 = load <3 x half>, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 1, i32 8), align 2 + ret <3 x half> %0 +} + +; Make sure D3.C0.A0 load from C[8].x +; CHECK: float @fooD3_C0_A0() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 8) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: ret float %2 +define noundef float @fooD3_C0_A0() local_unnamed_addr { +entry: + %0 = load float, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3), align 8 + ret float %0 +} + +; Make sure D3.C0.A1 load from C[8].zw +; CHECK: double @fooD3_C0_A1() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 8) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: ret double %2 +define noundef double @fooD3_C0_A1() local_unnamed_addr { +entry: + %0 = load double, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 0, i32 1), align 4 + ret double %0 +} + +; Make sure D3.C0.A2 load from C[9].x +; CHECK: float @fooD3_C0_A2() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 9) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: ret float %2 +define noundef float @fooD3_C0_A2() local_unnamed_addr { +entry: + %0 = load float, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 0, i32 2), align 4 + ret float %0 +} + +; Make sure D3.C0.A3 load from low 16bit of C[9].y +; CHECK: half @fooD3_C0_A3() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 9) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 2 +; CHECK-NEXT: ret half %2 +define noundef half @fooD3_C0_A3() local_unnamed_addr { +entry: + %0 = load half, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 0, i32 3), align 8 + ret half %0 +} + +; Make sure D3.C0.A4 load from high 16bit of C[9].y +; CHECK: i16 @fooD4_C0_A4() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.i16.8 @dx.op.cbufferLoadLegacy.i16(i32 59, %dx.types.Handle %0, i32 9) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.i16.8 %1, 3 +; CHECK-NEXT: ret i16 %2 +define noundef signext i16 @fooD4_C0_A4() local_unnamed_addr { +entry: + %0 = load i16, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 0, i32 4), align 2 + ret i16 %0 +} + +; Make sure D3.C0.A5 load from C[9].zw +; CHECK: i64 @fooD3_C0_A5() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.i64 @dx.op.cbufferLoadLegacy.i64(i32 59, %dx.types.Handle %0, i32 9) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.i64 %1, 1 +; CHECK-NEXT: ret i64 %2 +define noundef i64 @fooD3_C0_A5() local_unnamed_addr { +entry: + %0 = load i64, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 0, i32 5), align 4 + ret i64 %0 +} + +; Make sure D3.C0.A6 load from C[10].x +; CHECK: i32 @fooD3_C0_A6() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 59, %dx.types.Handle %0, i32 10) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.i32 %1, 0 +; CHECK-NEXT: ret i32 %2 +define noundef i32 @fooD3_C0_A6() local_unnamed_addr { +entry: + %0 = load i32, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 0, i32 6), align 4 + ret i32 %0 +} + +; Make sure indexing D3.C1 from C[11].x +; CHECK: float @fooD3_C1(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %arrayidx.offs = add nsw i32 11, %i +; CHECK-NEXT: %arrayidx = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 1, i32 %i +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 %arrayidx.offs) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: ret float %2 +define noundef float @fooD3_C1(i32 noundef %i) local_unnamed_addr { +entry: + %arrayidx = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 1, i32 %i + %0 = load float, ptr %arrayidx, align 4 + ret float %0 +} + +; Make sure load D3.C3 from low 16bit of C[24].x +; CHECK: half @fooD3_C3() +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 24) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 0 +; CHECK-NEXT: ret half %2 +define noundef half @fooD3_C3() local_unnamed_addr { +entry: + %0 = load half, ptr getelementptr inbounds ({ i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 3), align 8 + ret half %0 +} + +; Make sure indexing D3.C2.B0 from C[12].xy +; CHECK: double @fooD3_C2_B0(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; Make sure struct size is 6 x 4dwords. +; CHECK-NEXT: %arrayidx.idx = mul nsw i32 %i, 6 +; Make sure base is C[12]. +; CHECK-NEXT: %arrayidx.offs = add nsw i32 12, %arrayidx.idx +; CHECK-NEXT: %arrayidx = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 %arrayidx.offs) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: ret double %2 +define noundef double @fooD3_C2_B0(i32 noundef %i) local_unnamed_addr { +entry: + %arrayidx = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i + %0 = load double, ptr %arrayidx, align 2 + ret double %0 +} + +; Make sure indexing D3.C2.B1 from C[13].xyz +; CHECK: <3 x float> @fooD3_C2_B1(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; Make sure struct size is 6 x 4dwords. +; CHECK-NEXT: %B1.idx = mul nsw i32 %i, 6 +; Make sure base is C[12+1] +; CHECK-NEXT: %B1.offs = add nsw i32 12, %B1.idx +; CHECK-NEXT: %B1.offs1 = add nsw i32 %B1.offs, 1 +; CHECK-NEXT: %B1 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 1 +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 %B1.offs1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: %3 = insertelement <3 x float> poison, float %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f32 %1, 1 +; CHECK-NEXT: %5 = insertelement <3 x float> %3, float %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f32 %1, 2 +; CHECK-NEXT: %7 = insertelement <3 x float> %5, float %6, i64 2 +; CHECK-NEXT: ret <3 x float> %7 +define noundef <3 x float> @fooD3_C2_B1(i32 noundef %i) local_unnamed_addr { +entry: + %B1 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 1 + %0 = load <3 x float>, ptr %B1, align 2 + ret <3 x float> %0 +} + +; Make sure indexing D3.C2.B2 from C[13].w +; CHECK: float @fooD3_C2_B2(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; Make sure struct size is 6 x 4dwords. +; CHECK-NEXT: %B2.idx = mul nsw i32 %i, 6 +; Make sure base is C[12+1] +; CHECK-NEXT: %B2.offs = add nsw i32 12, %B2.idx +; CHECK-NEXT: %B2.offs1 = add nsw i32 %B2.offs, 1 +; CHECK-NEXT: %B2 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 2 +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 %B2.offs1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 3 +; CHECK-NEXT: ret float %2 +define noundef float @fooD3_C2_B2(i32 noundef %i) local_unnamed_addr { +entry: + %B2 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 2 + %0 = load float, ptr %B2, align 2 + ret float %0 +} + +; Make sure indexing C3.C2.B3 from C[14].xyzw + C[15].xy +; CHECK: <3 x double> @fooD3_C2_B3(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; Make sure struct size is 6 x 4dwords. +; CHECK-NEXT: %B3.idx = mul nsw i32 %i, 6 +; Make sure base is C[12 + 2] +; CHECK-NEXT: %B3.offs = add nsw i32 12, %B3.idx +; CHECK-NEXT: %B3.offs1 = add nsw i32 %B3.offs, 2 +; CHECK-NEXT: %B3 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 3 +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 %B3.offs1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: %3 = insertelement <3 x double> poison, double %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: %5 = insertelement <3 x double> %3, double %4, i64 1 +; Access C[15].xy. +; CHECK-NEXT: %6 = add i32 %B3.offs1, 1 +; CHECK-NEXT: %7 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 %6) +; CHECK-NEXT: %8 = extractvalue %dx.types.CBufRet.f64 %7, 0 +; CHECK-NEXT: %9 = insertelement <3 x double> %5, double %8, i64 2 +; CHECK-NEXT: ret <3 x double> %9 +define noundef <3 x double> @fooD3_C2_B3(i32 noundef %i) local_unnamed_addr { +entry: + %B3 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 3 + %0 = load <3 x double>, ptr %B3, align 2 + ret <3 x double> %0 +} + +; Make sure indexing D3.C2.B4 from low 16bit of C[15].z +; CHECK: half @fooD3_C2_B4(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; Make sure struct size is 6 x 4dwords. +; CHECK-NEXT: %B4.idx = mul nsw i32 %i, 6 +; Make sure base is C[12+3] +; CHECK-NEXT: %B4.offs = add nsw i32 12, %B4.idx +; CHECK-NEXT: %B4.offs1 = add nsw i32 %B4.offs, 3 +; CHECK-NEXT: %B4 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 4 +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 %B4.offs1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 4 +; CHECK-NEXT: ret half %2 +define noundef half @fooD3_C2_B4(i32 noundef %i) local_unnamed_addr { +entry: + %B4 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 4 + %0 = load half, ptr %B4, align 2 + ret half %0 +} + +; Make sure D3.C2.B5 indexing from C[16].xyzw +; CHECK: <2 x double> @fooD3_C2_B5(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; Make sure struct size is 6 x 4dwords. +; CHECK-NEXT: %B5.idx = mul nsw i32 %i, 6 +; Make sure base is C[12+4] +; CHECK-NEXT: %B5.offs = add nsw i32 12, %B5.idx +; CHECK-NEXT: %B5.offs1 = add nsw i32 %B5.offs, 4 +; CHECK-NEXT: %B5 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 5 +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f64 @dx.op.cbufferLoadLegacy.f64(i32 59, %dx.types.Handle %0, i32 %B5.offs1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f64 %1, 0 +; CHECK-NEXT: %3 = insertelement <2 x double> poison, double %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f64 %1, 1 +; CHECK-NEXT: %5 = insertelement <2 x double> %3, double %4, i64 1 +; CHECK-NEXT: ret <2 x double> %5 +define noundef <2 x double> @fooD3_C2_B5(i32 noundef %i) local_unnamed_addr { +entry: + %B5 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 5 + %0 = load <2 x double>, ptr %B5, align 2 + ret <2 x double> %0 +} + +; Make sure D3.C2.B6 indexing from C[17].x +; CHECK: float @fooD3_C2_B6(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; Make sure struct size is 6 x 4dwords. +; CHECK-NEXT: %B6.idx = mul nsw i32 %i, 6 +; Make sure base is C[12 + 5] +; CHECK-NEXT: %B6.offs = add nsw i32 12, %B6.idx +; CHECK-NEXT: %B6.offs1 = add nsw i32 %B6.offs, 5 +; CHECK-NEXT: %B6 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 6 +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %0, i32 %B6.offs1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f32 %1, 0 +; CHECK-NEXT: ret float %2 +define noundef float @fooD3_C2_B6(i32 noundef %i) local_unnamed_addr { +entry: + %B6 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 6 + %0 = load float, ptr %B6, align 2 + ret float %0 +} + +; Make sure D3.C2.B7 indexing from C[17].y and low 16bit of C[17].z +; CHECK: <3 x half> @fooD3_C2_B7(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; Make sure struct size is 6 x 4dwords. +; CHECK-NEXT: %B7.idx = mul nsw i32 %i, 6 +; Make sure base is C[12+5] +; CHECK-NEXT: %B7.offs = add nsw i32 12, %B7.idx +; CHECK-NEXT: %B7.offs1 = add nsw i32 %B7.offs, 5 +; CHECK-NEXT: %B7 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 7 +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 %B7.offs1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 2 +; CHECK-NEXT: %3 = insertelement <3 x half> poison, half %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f16.8 %1, 3 +; CHECK-NEXT: %5 = insertelement <3 x half> %3, half %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f16.8 %1, 4 +; CHECK-NEXT: %7 = insertelement <3 x half> %5, half %6, i64 2 +; CHECK-NEXT: ret <3 x half> %7 +define noundef <3 x half> @fooD3_C2_B7(i32 noundef %i) local_unnamed_addr { +entry: + %B7 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 7 + %0 = load <3 x half>, ptr %B7, align 2 + ret <3 x half> %0 +} + +; Make sure D3.C2.B8 indexing from high 16bit of C[17].z and C[17].w +; CHECK: <3 x half> @fooD3_C2_B8(i32 noundef %i) +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 2, i32 0, i32 -1, i1 false) +; CHECK-NEXT: %B8.idx = mul nsw i32 %i, 6 +; CHECK-NEXT: %B8.offs = add nsw i32 12, %B8.idx +; CHECK-NEXT: %B8.offs1 = add nsw i32 %B8.offs, 5 +; CHECK-NEXT: %B8 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 8 +; CHECK-NEXT: %1 = call %dx.types.CBufRet.f16.8 @dx.op.cbufferLoadLegacy.f16(i32 59, %dx.types.Handle %0, i32 %B8.offs1) +; CHECK-NEXT: %2 = extractvalue %dx.types.CBufRet.f16.8 %1, 5 +; CHECK-NEXT: %3 = insertelement <3 x half> poison, half %2, i64 0 +; CHECK-NEXT: %4 = extractvalue %dx.types.CBufRet.f16.8 %1, 6 +; CHECK-NEXT: %5 = insertelement <3 x half> %3, half %4, i64 1 +; CHECK-NEXT: %6 = extractvalue %dx.types.CBufRet.f16.8 %1, 7 +; CHECK-NEXT: %7 = insertelement <3 x half> %5, half %6, i64 2 +; CHECK-NEXT: ret <3 x half> %7 +define noundef <3 x half> @fooD3_C2_B8(i32 noundef %i) local_unnamed_addr { +entry: + %B8 = getelementptr inbounds { i32, %struct.B, half, %struct.C, double }, ptr @D.cb., i32 0, i32 3, i32 2, i32 %i, i32 8 + %0 = load <3 x half>, ptr %B8, align 2 + ret <3 x half> %0 +} + + +!hlsl.cbufs = !{!0} +!0 = !{ptr @D.cb., !"D.cb.ty", i32 0, i32 -1, i32 0}