diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1859,7 +1859,10 @@ // The legalizer preprocessed the intrinsic arguments. If we aren't using // NSA, these should have been packed into a single value in the first // address register - const bool UseNSA = NumVAddrRegs != 1 && NumVAddrDwords == NumVAddrRegs; + const bool UseNSA = + NumVAddrRegs != 1 && + (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs + : NumVAddrDwords == NumVAddrRegs); if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) { LLVM_DEBUG(dbgs() << "Trying to use NSA on non-NSA target\n"); return false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -4997,6 +4997,9 @@ return false; } + const unsigned NSAMaxSize = ST.getNSAMaxSize(); + const unsigned HasPartialNSA = ST.hasPartialNSAEncoding(); + if (IsA16 || IsG16) { if (Intr->NumVAddrs > 1) { SmallVector PackedRegs; @@ -5007,9 +5010,19 @@ // See also below in the non-a16 branch const bool UseNSA = ST.hasNSAEncoding() && PackedRegs.size() >= ST.getNSAThreshold(MF) && - PackedRegs.size() <= ST.getNSAMaxSize(); - - if (!UseNSA && PackedRegs.size() > 1) { + (PackedRegs.size() <= NSAMaxSize || HasPartialNSA); + const bool UsePartialNSA = + UseNSA && HasPartialNSA && PackedRegs.size() > NSAMaxSize; + + if (UsePartialNSA) { + // Pack registers that would go over NSAMaxSize into last VAddr register + LLT PackedAddrTy = + LLT::fixed_vector(2 * (PackedRegs.size() - NSAMaxSize + 1), 16); + auto Concat = B.buildConcatVectors( + PackedAddrTy, ArrayRef(PackedRegs).slice(NSAMaxSize - 1)); + PackedRegs[NSAMaxSize - 1] = Concat.getReg(0); + PackedRegs.resize(NSAMaxSize); + } else if (!UseNSA && PackedRegs.size() > 1) { LLT PackedAddrTy = LLT::fixed_vector(2 * PackedRegs.size(), 16); auto Concat = B.buildConcatVectors(PackedAddrTy, PackedRegs); PackedRegs[0] = Concat.getReg(0); @@ -5045,16 +5058,22 @@ // SIShrinkInstructions will convert NSA encodings to non-NSA after register // allocation when possible. // - // TODO: we can actually allow partial NSA where the final register is a - // contiguous set of the remaining addresses. - // This could help where there are more addresses than supported. + // Partial NSA is allowed on GFX11 where the final register is a contiguous + // set of the remaining addresses. const bool UseNSA = ST.hasNSAEncoding() && CorrectedNumVAddrs >= ST.getNSAThreshold(MF) && - CorrectedNumVAddrs <= ST.getNSAMaxSize(); - - if (!UseNSA && Intr->NumVAddrs > 1) + (CorrectedNumVAddrs <= NSAMaxSize || HasPartialNSA); + const bool UsePartialNSA = + UseNSA && HasPartialNSA && CorrectedNumVAddrs > NSAMaxSize; + + if (UsePartialNSA) { + convertImageAddrToPacked(B, MI, + ArgOffset + Intr->VAddrStart + NSAMaxSize - 1, + Intr->NumVAddrs - NSAMaxSize + 1); + } else if (!UseNSA && Intr->NumVAddrs > 1) { convertImageAddrToPacked(B, MI, ArgOffset + Intr->VAddrStart, Intr->NumVAddrs); + } } int Flags = 0; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6581,15 +6581,24 @@ // SIShrinkInstructions will convert NSA encodings to non-NSA after register // allocation when possible. // - // TODO: we can actually allow partial NSA where the final register is a - // contiguous set of the remaining addresses. - // This could help where there are more addresses than supported. - bool UseNSA = ST->hasFeature(AMDGPU::FeatureNSAEncoding) && - VAddrs.size() >= (unsigned)ST->getNSAThreshold(MF) && - VAddrs.size() <= (unsigned)ST->getNSAMaxSize(); + // Partial NSA is allowed on GFX11 where the final register is a contiguous + // set of the remaining addresses. + const unsigned NSAMaxSize = ST->getNSAMaxSize(); + const bool HasPartialNSAEncoding = ST->hasPartialNSAEncoding(); + const bool UseNSA = ST->hasNSAEncoding() && + VAddrs.size() >= ST->getNSAThreshold(MF) && + (VAddrs.size() <= NSAMaxSize || HasPartialNSAEncoding); + const bool UsePartialNSA = + UseNSA && HasPartialNSAEncoding && VAddrs.size() > NSAMaxSize; + SDValue VAddr; - if (!UseNSA) + if (UsePartialNSA) { + VAddr = getBuildDwordsVector(DAG, DL, + ArrayRef(VAddrs).drop_front(NSAMaxSize - 1)); + } + else if (!UseNSA) { VAddr = getBuildDwordsVector(DAG, DL, VAddrs); + } SDValue True = DAG.getTargetConstant(1, DL, MVT::i1); SDValue False = DAG.getTargetConstant(0, DL, MVT::i1); @@ -6657,7 +6666,11 @@ SmallVector Ops; if (BaseOpcode->Store || BaseOpcode->Atomic) Ops.push_back(VData); // vdata - if (UseNSA) + if (UsePartialNSA) { + append_range(Ops, ArrayRef(VAddrs).take_front(NSAMaxSize - 1)); + Ops.push_back(VAddr); + } + else if (UseNSA) append_range(Ops, VAddrs); else Ops.push_back(VAddr); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4636,9 +4636,12 @@ unsigned VAddrWords; if (IsNSA) { VAddrWords = SRsrcIdx - VAddr0Idx; + if (ST.hasPartialNSAEncoding() && AddrWords > ST.getNSAMaxSize()) { + unsigned LastVAddrIdx = SRsrcIdx - 1; + VAddrWords += getOpSize(MI, LastVAddrIdx) / 4 - 1; + } } else { - const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx); - VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32; + VAddrWords = getOpSize(MI, VAddr0Idx) / 4; if (AddrWords > 12) AddrWords = 16; } diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -308,7 +308,10 @@ unsigned NextVgpr = 0; bool IsUndef = true; bool IsKill = NewAddrDwords == Info->VAddrDwords; - for (unsigned Idx = 0; Idx < Info->VAddrOperands; ++Idx) { + const unsigned NSAMaxSize = ST->getNSAMaxSize(); + const bool IsPartialNSA = NewAddrDwords > NSAMaxSize; + const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands; + for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) { const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx); unsigned Vgpr = TRI->getHWRegIndex(Op.getReg()); unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32; @@ -361,13 +364,13 @@ MI.getOperand(VAddr0Idx).setIsUndef(IsUndef); MI.getOperand(VAddr0Idx).setIsKill(IsKill); - for (int i = 1; i < Info->VAddrOperands; ++i) + for (unsigned i = 1; i < EndVAddr; ++i) MI.removeOperand(VAddr0Idx + 1); if (ToUntie >= 0) { MI.tieOperands( AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata), - ToUntie - (Info->VAddrOperands - 1)); + ToUntie - (EndVAddr - 1)); } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX11 %s define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; GFX9-LABEL: name: sample_1d @@ -58,6 +58,33 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -124,6 +151,36 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -200,6 +257,41 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_3d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -276,6 +368,41 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cube + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -342,6 +469,36 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_1darray + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1darray), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -418,6 +575,41 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_2darray + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -488,6 +680,38 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -560,6 +784,39 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -626,6 +883,36 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -702,6 +989,41 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cl.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -774,6 +1096,39 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -853,6 +1208,42 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -925,6 +1316,39 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_b_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1001,6 +1425,41 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_b_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1076,6 +1535,40 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_b_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1155,6 +1648,42 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_b_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1231,6 +1760,41 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_b_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1312,6 +1876,43 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_b_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1391,6 +1992,42 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_b_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1476,6 +2113,45 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_b_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.b.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1553,6 +2229,41 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1640,6 +2351,46 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1748,6 +2499,57 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_3d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1829,6 +2631,43 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_d_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -1920,6 +2759,48 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_d_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2001,6 +2882,43 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2096,6 +3014,50 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_d_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2181,6 +3143,45 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_d_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2280,6 +3281,52 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_d_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2357,6 +3404,41 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cd_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2444,6 +3526,46 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cd_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2525,6 +3647,43 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cd_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2616,6 +3775,48 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cd_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2697,6 +3898,43 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cd_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2792,6 +4030,50 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_cd_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.cd.cl.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2877,6 +4159,45 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cd_cl_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.1d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -2976,6 +4297,52 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_cd_cl_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -3042,6 +4409,36 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_l_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.1d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -3118,6 +4515,41 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_l_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.l.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -3190,6 +4622,39 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_l_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -3269,6 +4734,42 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_l_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.l.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -3329,6 +4830,33 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_lz_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -3395,6 +4923,36 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_lz_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.lz.2d), 15, [[BUILD_VECTOR2]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -3465,6 +5023,38 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_lz_1d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.1d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -3537,6 +5127,39 @@ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX11-LABEL: name: sample_c_lz_2d + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY13]](s32) + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.lz.2d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 1 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -3633,6 +5256,51 @@ ; GFX10-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 7) ; GFX10-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX11-LABEL: name: sample_c_d_o_2darray_V1 + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (s32), addrspace 7) + ; GFX11-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret float %v @@ -3733,6 +5401,53 @@ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX11-LABEL: name: sample_c_d_o_2darray_V2 + ; GFX11: bb.1.main_body: + ; GFX11-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX11-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX11-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX11-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX11-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX11-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX11-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $sgpr10 + ; GFX11-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $sgpr11 + ; GFX11-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $sgpr12 + ; GFX11-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $sgpr13 + ; GFX11-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX11-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX11-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX11-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX11-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY14]](s32) + ; GFX11-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX11-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) + ; GFX11-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX11-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY16]](s32) + ; GFX11-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX11-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY17]](s32) + ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX11-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY18]](s32) + ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX11-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY19]](s32) + ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX11-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY20]](s32) + ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY12]](s32) + ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY13]](s32) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16) + ; GFX11-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC4]](s16), [[TRUNC5]](s16) + ; GFX11-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[DEF]](s16) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<2 x s32>), addrspace 7) + ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <2 x float> %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll @@ -65,8 +65,8 @@ ; GFX11-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr6 ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<9 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<9 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<5 x s32>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -143,8 +143,8 @@ ; GFX11-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<10 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[BUILD_VECTOR2]](<10 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<6 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -223,8 +223,8 @@ ; GFX11-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<11 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[BUILD_VECTOR2]](<11 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<7 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -305,8 +305,8 @@ ; GFX11-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr9 ; GFX11-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX11-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[BUILD_VECTOR2]](<12 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.o.3d), 15, [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[BUILD_VECTOR2]](<8 x s32>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll @@ -259,8 +259,8 @@ ; GFX11-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC5]](s16), [[DEF]](s16) ; GFX11-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC6]](s16), [[TRUNC7]](s16) ; GFX11-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC8]](s16), [[DEF]](s16) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR6]](<2 x s16>), [[BUILD_VECTOR7]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 3 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll @@ -251,8 +251,8 @@ ; GFX11-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.d.3d), 15, [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BUILD_VECTOR4]](<2 x s16>), [[BUILD_VECTOR5]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -759,8 +759,8 @@ ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -1425,8 +1425,8 @@ ; GFX11-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY17]](s32) ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[CONCAT_VECTORS]](<12 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.cd.cl.2d), 15, [[BITCAST]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<4 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -1519,8 +1519,8 @@ ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32), addrspace 7) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 4, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (s32), addrspace 7) ; GFX11-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 main_body: @@ -1611,8 +1611,8 @@ ; GFX11-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY18]](s32) ; GFX11-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY19]](s32) ; GFX11-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY20]](s32) - ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) - ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[CONCAT_VECTORS]](<14 x s16>), $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>), addrspace 7) + ; GFX11-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; GFX11-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.c.d.o.2darray), 6, [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>), [[CONCAT_VECTORS]](<6 x s16>), $noreg, $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 2 :: (dereferenceable load (<2 x s32>), addrspace 7) ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; GFX11-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX11-NEXT: $vgpr1 = COPY [[UV1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { ; GFX10-LABEL: sample_d_1d: @@ -8,6 +8,12 @@ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -21,6 +27,14 @@ ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 +; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -36,6 +50,14 @@ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_3d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: v_perm_b32 v1, v4, v3, 0x5040100 +; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v1, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -47,6 +69,12 @@ ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -60,6 +88,14 @@ ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: v_perm_b32 v2, v4, v3, 0x5040100 +; GFX11-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -71,6 +107,12 @@ ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -84,6 +126,14 @@ ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 +; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -95,6 +145,12 @@ ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -111,6 +167,14 @@ ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: v_perm_b32 v2, v4, v3, 0x5040100 +; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -129,6 +193,14 @@ ; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_o_2darray_V1: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: v_perm_b32 v3, v5, v4, 0x5040100 +; GFX11-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret float %v @@ -147,6 +219,14 @@ ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_o_2darray_V2: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: v_perm_b32 v3, v5, v4, 0x5040100 +; GFX11-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v[6:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <2 x float> %v diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll --- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll +++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll @@ -448,22 +448,20 @@ ; ; GFX11-LABEL: cluster_image_sample: ; GFX11: ; %bb.0: ; %entry -; GFX11-NEXT: v_cvt_f32_i32_e32 v9, v1 -; GFX11-NEXT: v_cvt_f32_i32_e32 v8, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_add_f32 v3, 1.0, v9 -; GFX11-NEXT: v_dual_mov_b32 v10, 1.0 :: v_dual_mov_b32 v7, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_dual_add_f32 v2, 1.0, v8 :: v_dual_mov_b32 v5, v4 -; GFX11-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_add_f32 v9, 2.0, v9 -; GFX11-NEXT: v_dual_add_f32 v8, 2.0, v8 :: v_dual_mov_b32 v11, v10 -; GFX11-NEXT: v_mov_b32_e32 v12, v10 -; GFX11-NEXT: v_mov_b32_e32 v13, v10 +; GFX11-NEXT: v_cvt_f32_i32_e32 v4, v0 +; GFX11-NEXT: v_cvt_f32_i32_e32 v5, v1 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_dual_mov_b32 v6, 1.0 :: v_dual_add_f32 v11, 2.0, v5 +; GFX11-NEXT: v_dual_add_f32 v9, 1.0, v5 :: v_dual_add_f32 v8, 1.0, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_add_f32 v10, 2.0, v4 +; GFX11-NEXT: v_mov_b32_e32 v7, v6 ; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: image_sample_d v[2:5], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D -; GFX11-NEXT: image_sample_d v[6:9], v[8:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: image_sample_d v[2:5], [v8, v9, v2, v2, v[2:3]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: image_sample_d v[6:9], [v10, v11, v6, v6, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_dual_add_f32 v4, v4, v8 :: v_dual_add_f32 v5, v5, v9 +; GFX11-NEXT: v_dual_add_f32 v5, v5, v9 :: v_dual_add_f32 v4, v4, v8 ; GFX11-NEXT: v_dual_add_f32 v3, v3, v7 :: v_dual_add_f32 v2, v2, v6 ; GFX11-NEXT: image_store v[2:5], v[0:1], s[12:19] dmask:0xf dim:SQ_RSRC_IMG_2D unorm ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll @@ -34,7 +34,7 @@ ; GCN-LABEL: {{^}}sample_d_3d: ; GFX1010-NSA: image_sample_d v[0:3], v[7:15], ; GFX1030-NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v4, v6, v0, v2, v1], -; GFX11-NSA: image_sample_d v[0:3], v[7:15], +; GFX11-NSA: image_sample_d v[0:3], [v3, v8, v7, v5, v[9:13]], define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %r, float %t, float %dsdh, float %dtdv, float %dsdv, float %drdv, float %drdh, float %dtdh) { main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) { ; GFX9-LABEL: sample_1d: @@ -21,6 +21,15 @@ ; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -47,6 +56,16 @@ ; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -73,6 +92,16 @@ ; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_3d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -99,6 +128,16 @@ ; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_cube: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half %s, half %t, half %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -125,6 +164,16 @@ ; GFX10-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_1darray: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half %s, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -151,6 +200,16 @@ ; GFX10-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_2darray: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -174,6 +233,15 @@ ; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -200,6 +268,16 @@ ; GFX10-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -226,6 +304,16 @@ ; GFX10-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_cl v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -252,6 +340,16 @@ ; GFX10-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_cl v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -278,6 +376,16 @@ ; GFX10-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_c_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -306,6 +414,16 @@ ; GFX10-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_c_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -329,6 +447,15 @@ ; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_b_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half %bias, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -355,6 +482,16 @@ ; GFX10-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_b_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -378,6 +515,15 @@ ; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_b_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -404,6 +550,16 @@ ; GFX10-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_b_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -430,6 +586,16 @@ ; GFX10-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_b_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_b_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -458,6 +624,16 @@ ; GFX10-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_b_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_b_cl v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -484,6 +660,16 @@ ; GFX10-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_b_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_c_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -513,6 +699,16 @@ ; GFX10-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_b_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: s_mov_b32 s12, exec_lo +; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 +; GFX11-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half %bias, float %zcompare, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -530,6 +726,12 @@ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -554,6 +756,15 @@ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 +; GFX11-NEXT: v_perm_b32 v3, v3, v2, 0x5040100 +; GFX11-NEXT: v_perm_b32 v2, v1, v0, 0x5040100 +; GFX11-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -584,6 +795,15 @@ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_3d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: v_perm_b32 v7, v7, v6, 0x5040100 +; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[7:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -601,6 +821,12 @@ ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -627,6 +853,15 @@ ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 +; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -647,6 +882,13 @@ ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -671,6 +913,15 @@ ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -691,6 +942,13 @@ ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 +; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -717,6 +975,15 @@ ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 +; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -737,6 +1004,13 @@ ; GFX10-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_l_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: image_sample_l v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -757,6 +1031,13 @@ ; GFX10-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_l_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v1, v1, v0, 0x5040100 +; GFX11-NEXT: image_sample_l v[0:3], v[1:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -777,6 +1058,13 @@ ; GFX10-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_l_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: image_sample_c_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -799,6 +1087,13 @@ ; GFX10-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_l_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: image_sample_c_l v[0:3], [v0, v1, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -816,6 +1111,12 @@ ; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_lz_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -836,6 +1137,13 @@ ; GFX10-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_lz_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -853,6 +1161,12 @@ ; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_lz_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float %zcompare, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -873,6 +1187,13 @@ ; GFX10-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_lz_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -903,6 +1224,15 @@ ; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[8:13], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_o_2darray_V1: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: v_perm_b32 v7, v7, v6, 0x5040100 +; GFX11-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[7:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret float %v @@ -933,6 +1263,15 @@ ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[8:13], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_o_2darray_V2: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: v_perm_b32 v7, v7, v6, 0x5040100 +; GFX11-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v[7:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <2 x float> %v diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -1631,12 +1631,12 @@ ; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe: ; GFX11: ; %bb.0: ; %main_body ; GFX11-NEXT: v_mov_b32_e32 v11, 0 +; GFX11-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v10, v0 ; GFX11-NEXT: v_mov_b32_e32 v12, v11 -; GFX11-NEXT: v_dual_mov_b32 v9, v11 :: v_dual_mov_b32 v10, v12 -; GFX11-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe +; GFX11-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12 +; GFX11-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v[4:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v0, v9 -; GFX11-NEXT: global_store_b32 v11, v10, s[12:13] +; GFX11-NEXT: global_store_b32 v11, v1, s[12:13] ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: ; return to shader part epilog main_body: @@ -1709,13 +1709,12 @@ ; ; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe: ; GFX11: ; %bb.0: ; %main_body -; GFX11-NEXT: v_mov_b32_e32 v9, 0 -; GFX11-NEXT: v_mov_b32_e32 v10, v9 -; GFX11-NEXT: v_mov_b32_e32 v11, v9 -; GFX11-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe +; GFX11-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, 0 +; GFX11-NEXT: v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v10, v1 +; GFX11-NEXT: v_mov_b32_e32 v1, v0 +; GFX11-NEXT: v_mov_b32_e32 v2, v0 +; GFX11-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v[4:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_mov_b32_e32 v2, v11 -; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 ; GFX11-NEXT: ; return to shader part epilog main_body: %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll @@ -53,12 +53,9 @@ ; ; GFX11-LABEL: sample_d_3d: ; GFX11: ; %bb.0: ; %main_body -; GFX11-NEXT: v_mov_b32_e32 v9, v3 ; encoding: [0x03,0x03,0x12,0x7e] -; GFX11-NEXT: v_mov_b32_e32 v3, v2 ; encoding: [0x02,0x03,0x06,0x7e] -; GFX11-NEXT: v_perm_b32 v2, v1, v0, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; encoding: [0x03,0x00,0x87,0xbf] -; GFX11-NEXT: v_perm_b32 v4, v4, v9, 0x5040100 ; encoding: [0x04,0x00,0x44,0xd6,0x04,0x13,0xfe,0x03,0x00,0x01,0x04,0x05] -; GFX11-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x0f,0xe4,0xf0,0x02,0x00,0x00,0x08] +; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 ; encoding: [0x03,0x00,0x44,0xd6,0x04,0x07,0xfe,0x03,0x00,0x01,0x04,0x05] +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 ; encoding: [0x00,0x00,0x44,0xd6,0x01,0x01,0xfe,0x03,0x00,0x01,0x04,0x05] +; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x0f,0xe4,0xf0,0x00,0x00,0x00,0x08,0x02,0x03,0x05,0x06] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] ; GFX11-NEXT: ; return to shader part epilog main_body: @@ -172,12 +169,9 @@ ; ; GFX11-LABEL: sample_c_d_cl_2d: ; GFX11: ; %bb.0: ; %main_body -; GFX11-NEXT: v_mov_b32_e32 v8, v2 ; encoding: [0x02,0x03,0x10,0x7e] -; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] -; GFX11-NEXT: v_perm_b32 v4, v4, v3, 0x5040100 ; encoding: [0x04,0x00,0x44,0xd6,0x04,0x07,0xfe,0x03,0x00,0x01,0x04,0x05] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; encoding: [0x03,0x00,0x87,0xbf] -; GFX11-NEXT: v_perm_b32 v3, v8, v1, 0x5040100 ; encoding: [0x03,0x00,0x44,0xd6,0x08,0x03,0xfe,0x03,0x00,0x01,0x04,0x05] -; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x0f,0x50,0xf1,0x02,0x00,0x00,0x08] +; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 ; encoding: [0x03,0x00,0x44,0xd6,0x04,0x07,0xfe,0x03,0x00,0x01,0x04,0x05] +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 ; encoding: [0x01,0x00,0x44,0xd6,0x02,0x03,0xfe,0x03,0x00,0x01,0x04,0x05] +; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x0f,0x50,0xf1,0x00,0x00,0x00,0x08,0x01,0x03,0x05,0x06] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] ; GFX11-NEXT: ; return to shader part epilog main_body: @@ -200,14 +194,9 @@ ; ; GFX11-LABEL: sample_c_d_o_2darray_V1: ; GFX11: ; %bb.0: ; %main_body -; GFX11-NEXT: v_mov_b32_e32 v9, v3 ; encoding: [0x03,0x03,0x12,0x7e] -; GFX11-NEXT: v_mov_b32_e32 v10, v2 ; encoding: [0x02,0x03,0x14,0x7e] -; GFX11-NEXT: v_mov_b32_e32 v3, v1 ; encoding: [0x01,0x03,0x06,0x7e] -; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] -; GFX11-NEXT: v_perm_b32 v5, v5, v4, 0x5040100 ; encoding: [0x05,0x00,0x44,0xd6,0x05,0x09,0xfe,0x03,0x00,0x01,0x04,0x05] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; encoding: [0x04,0x00,0x87,0xbf] -; GFX11-NEXT: v_perm_b32 v4, v9, v10, 0x5040100 ; encoding: [0x04,0x00,0x44,0xd6,0x09,0x15,0xfe,0x03,0x00,0x01,0x04,0x05] -; GFX11-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x14,0x04,0xf0,0xf0,0x02,0x00,0x00,0x08] +; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 ; encoding: [0x04,0x00,0x44,0xd6,0x05,0x09,0xfe,0x03,0x00,0x01,0x04,0x05] +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05] +; GFX11-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x04,0xf0,0xf0,0x00,0x00,0x00,0x08,0x01,0x02,0x04,0x06] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] ; GFX11-NEXT: ; return to shader part epilog main_body: @@ -230,14 +219,9 @@ ; ; GFX11-LABEL: sample_c_d_o_2darray_V2: ; GFX11: ; %bb.0: ; %main_body -; GFX11-NEXT: v_mov_b32_e32 v9, v3 ; encoding: [0x03,0x03,0x12,0x7e] -; GFX11-NEXT: v_mov_b32_e32 v10, v2 ; encoding: [0x02,0x03,0x14,0x7e] -; GFX11-NEXT: v_mov_b32_e32 v3, v1 ; encoding: [0x01,0x03,0x06,0x7e] -; GFX11-NEXT: v_mov_b32_e32 v2, v0 ; encoding: [0x00,0x03,0x04,0x7e] -; GFX11-NEXT: v_perm_b32 v5, v5, v4, 0x5040100 ; encoding: [0x05,0x00,0x44,0xd6,0x05,0x09,0xfe,0x03,0x00,0x01,0x04,0x05] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) ; encoding: [0x04,0x00,0x87,0xbf] -; GFX11-NEXT: v_perm_b32 v4, v9, v10, 0x5040100 ; encoding: [0x04,0x00,0x44,0xd6,0x09,0x15,0xfe,0x03,0x00,0x01,0x04,0x05] -; GFX11-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x14,0x06,0xf0,0xf0,0x02,0x00,0x00,0x08] +; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 ; encoding: [0x04,0x00,0x44,0xd6,0x05,0x09,0xfe,0x03,0x00,0x01,0x04,0x05] +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 ; encoding: [0x02,0x00,0x44,0xd6,0x03,0x05,0xfe,0x03,0x00,0x01,0x04,0x05] +; GFX11-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x15,0x06,0xf0,0xf0,0x00,0x00,0x00,0x08,0x01,0x02,0x04,0x06] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] ; GFX11-NEXT: ; return to shader part epilog main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s -; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { ; GFX10-LABEL: sample_d_1d: @@ -8,6 +8,12 @@ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -21,6 +27,14 @@ ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -36,6 +50,14 @@ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_3d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -47,6 +69,12 @@ ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -60,6 +88,14 @@ ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -71,6 +107,12 @@ ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -84,6 +126,14 @@ ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_d_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 +; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -95,6 +145,12 @@ ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_cl_1d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -110,6 +166,14 @@ ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_cl_2d: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 +; GFX11-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 +; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <4 x float> %v @@ -127,6 +191,14 @@ ; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_o_2darray_V1: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret float %v @@ -144,6 +216,14 @@ ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: sample_c_d_o_2darray_V2: +; GFX11: ; %bb.0: ; %main_body +; GFX11-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 +; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 +; GFX11-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v[6:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: ; return to shader part epilog main_body: %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) ret <2 x float> %v diff --git a/llvm/test/CodeGen/AMDGPU/verify-image-partial-nsa.mir b/llvm/test/CodeGen/AMDGPU/verify-image-partial-nsa.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/verify-image-partial-nsa.mir @@ -0,0 +1,28 @@ +# RUN: not --crash llc -march=amdgcn -mcpu=gfx11 -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX11-ERR %s + +--- +name: image_sample_d_v1v9_nsa_partial +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + + ; GFX11-ERR: *** Bad machine code: Operand has incorrect register class. *** + ; GFX11-ERR: - instruction: renamable $vgpr10 = IMAGE_SAMPLE_D_V1_V9_nsa_gfx11 renamable $vgpr1, renamable $vgpr0, renamable $vgpr2, renamable $vgpr3, renamable $vgpr4_vgpr5_vgpr6_vgpr7, renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GFX11-ERR: *** Bad machine code: Illegal physical register for instruction *** + ; GFX11-ERR: - instruction: renamable $vgpr10 = IMAGE_SAMPLE_D_V1_V9_nsa_gfx11 renamable $vgpr1, renamable $vgpr0, renamable $vgpr2, renamable $vgpr3, renamable $vgpr4_vgpr5_vgpr6_vgpr7, renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GFX11-ERR: - operand 5: renamable $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX11-ERR: $vgpr4_vgpr5_vgpr6_vgpr7 is not a VReg_160 register. + + renamable $vgpr10 = IMAGE_SAMPLE_D_V1_V9_nsa_gfx11 renamable $vgpr1, renamable $vgpr0, renamable $vgpr2, renamable $vgpr3, renamable $vgpr4_vgpr5_vgpr6_vgpr7, renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + + ; GFX11-ERR: *** Bad machine code: Operand has incorrect register class. *** + ; GFX11-ERR: - instruction: renamable $vgpr11 = IMAGE_SAMPLE_D_V1_V9_nsa_gfx11 renamable $vgpr1, renamable $vgpr0, renamable $vgpr2, renamable $vgpr3, renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9, renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GFX11-ERR: *** Bad machine code: Illegal physical register for instruction *** + ; GFX11-ERR: - instruction: renamable $vgpr11 = IMAGE_SAMPLE_D_V1_V9_nsa_gfx11 renamable $vgpr1, renamable $vgpr0, renamable $vgpr2, renamable $vgpr3, renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9, renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) + ; GFX11-ERR: - operand 5: renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 + ; GFX11-ERR: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 is not a VReg_160 register. + + renamable $vgpr11 = IMAGE_SAMPLE_D_V1_V9_nsa_gfx11 renamable $vgpr1, renamable $vgpr0, renamable $vgpr2, renamable $vgpr3, renamable $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9, renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7) +... + +# GFX11-ERR-NOT: *** Bad machine code