Index: lib/Target/AMDGPU/SILoadStoreOptimizer.cpp =================================================================== --- lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -97,6 +97,7 @@ S_BUFFER_LOAD_IMM, BUFFER_LOAD, BUFFER_STORE, + MIMG, }; enum RegisterEnum { @@ -105,6 +106,7 @@ SOFFSET = 0x4, VADDR = 0x8, ADDR = 0x10, + SSAMP = 0x20, }; class SILoadStoreOptimizer : public MachineFunctionPass { @@ -117,6 +119,8 @@ unsigned Width0; unsigned Width1; unsigned BaseOff; + unsigned DMask0; + unsigned DMask1; InstClassEnum InstClass; bool GLC0; bool GLC1; @@ -205,6 +209,7 @@ AliasAnalysis *AA = nullptr; bool OptimizeAgain; + static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII); static bool offsetsCanBeCombined(CombineInfo &CI); static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI); static unsigned getNewOpcode(const CombineInfo &CI); @@ -220,6 +225,7 @@ unsigned write2Opcode(unsigned EltSize) const; unsigned write2ST64Opcode(unsigned EltSize) const; MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI); + MachineBasicBlock::iterator mergeImagePair(CombineInfo &CI); MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI); @@ -273,6 +279,11 @@ // FIXME: Handle d16 correctly return AMDGPU::getMUBUFElements(Opc); } + if (TII.isMIMG(MI)) { + uint64_t DMaskImm = + TII.getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm(); + return countPopulation(DMaskImm); + } switch (Opc) { case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: @@ -306,6 +317,14 @@ return BUFFER_STORE; } } + if (TII.isMIMG(Opc)) { + // Ignore instructions encoded without vaddr. + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1) + return UNKNOWN; + if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || TII.isGather4(Opc)) + return UNKNOWN; + return MIMG; + } return UNKNOWN; case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: @@ -331,6 +350,11 @@ default: if (TII.isMUBUF(Opc)) return AMDGPU::getMUBUFBaseOpcode(Opc); + if (TII.isMIMG(Opc)) { + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + assert(Info); + return Info->BaseOpcode; + } return -1; case AMDGPU::DS_READ_B32: case AMDGPU::DS_READ_B32_gfx9: @@ -367,6 +391,14 @@ return result; } + if (TII.isMIMG(Opc)) { + unsigned result = VADDR | SRSRC; + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler) + result |= SSAMP; + return result; + } + switch (Opc) { default: return 0; @@ -416,14 +448,18 @@ break; } - int OffsetIdx = - AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset); - Offset0 = I->getOperand(OffsetIdx).getImm(); + if (InstClass == MIMG) { + DMask0 = TII.getNamedOperand(*I, AMDGPU::OpName::dmask)->getImm(); + } else { + int OffsetIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset); + Offset0 = I->getOperand(OffsetIdx).getImm(); + } + Width0 = getOpcodeWidth(*I, TII); if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) { Offset0 &= 0xffff; - } else { + } else if (InstClass != MIMG) { GLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::glc)->getImm(); if (InstClass != S_BUFFER_LOAD_IMM) { SLC0 = TII.getNamedOperand(*I, AMDGPU::OpName::slc)->getImm(); @@ -455,6 +491,10 @@ AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; } + if (Regs & SSAMP) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::ssamp; + } + for (unsigned i = 0; i < NumAddresses; i++) { AddrIdx[i] = AMDGPU::getNamedOperandIdx(I->getOpcode(), AddrOpName[i]); AddrReg[i] = &I->getOperand(AddrIdx[i]); @@ -467,13 +507,19 @@ const SIInstrInfo &TII) { Paired = MI; assert(InstClass == getInstClass(Paired->getOpcode(), TII)); - int OffsetIdx = - AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset); - Offset1 = Paired->getOperand(OffsetIdx).getImm(); + + if (InstClass == MIMG) { + DMask1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::dmask)->getImm(); + } else { + int OffsetIdx = + AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::offset); + Offset1 = Paired->getOperand(OffsetIdx).getImm(); + } + Width1 = getOpcodeWidth(*Paired, TII); if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) { Offset1 &= 0xffff; - } else { + } else if (InstClass != MIMG) { GLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::glc)->getImm(); if (InstClass != S_BUFFER_LOAD_IMM) { SLC1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::slc)->getImm(); @@ -588,7 +634,44 @@ return MMO; } +bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII) { + assert(CI.InstClass == MIMG); + + // Ignore instructions with tfe/lwe set. + const auto *TFEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::tfe); + const auto *LWEOp = TII.getNamedOperand(*CI.I, AMDGPU::OpName::lwe); + + if ((TFEOp && TFEOp->getImm()) || (LWEOp && LWEOp->getImm())) + return false; + + // Check other optional immediate operands for equality. + unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc, + AMDGPU::OpName::d16, AMDGPU::OpName::unorm, + AMDGPU::OpName::da, AMDGPU::OpName::r128}; + + for (auto op : OperandsToMatch) { + int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op); + if (AMDGPU::getNamedOperandIdx(CI.Paired->getOpcode(), op) != Idx) + return false; + if (Idx != -1 && + CI.I->getOperand(Idx).getImm() != CI.Paired->getOperand(Idx).getImm()) + return false; + } + + // Check DMask for overlaps. + unsigned MaxMask = std::max(CI.DMask0, CI.DMask1); + unsigned MinMask = std::min(CI.DMask0, CI.DMask1); + + unsigned AllowedBitsForMin = llvm::countTrailingZeros(MaxMask); + if ((1u << AllowedBitsForMin) <= MinMask) + return false; + + return true; +} + bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) { + assert(CI.InstClass != MIMG); + // XXX - Would the same offset be OK? Is there any reason this would happen or // be useful? if (CI.Offset0 == CI.Offset1) @@ -744,13 +827,18 @@ if (Match) { CI.setPaired(MBBI, *TII); - // Check both offsets fit in the reduced range. + // Check both offsets (or masks for MIMG) can be combined and fit in the + // reduced range. + bool canBeCombined = + CI.InstClass == MIMG + ? dmasksCanBeCombined(CI, *TII) + : widthsFit(*STM, CI) && offsetsCanBeCombined(CI); + // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. - if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI)) - if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) - return true; + if (canBeCombined && canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) + return true; } // We've found a load/store that we couldn't merge for some reason. @@ -945,6 +1033,60 @@ return Write2; } +MachineBasicBlock::iterator +SILoadStoreOptimizer::mergeImagePair(CombineInfo &CI) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + const unsigned Opcode = getNewOpcode(CI); + + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); + + Register DestReg = MRI->createVirtualRegister(SuperRC); + unsigned MergedDMask = CI.DMask0 | CI.DMask1; + unsigned DMaskIdx = + AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask); + + auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg); + for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) { + if (I == DMaskIdx) + MIB.addImm(MergedDMask); + else + MIB.add((*CI.I).getOperand(I)); + } + + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = MIB.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + + std::pair SubRegIdx = getSubRegIdxs(CI); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the old destination registers. + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); + const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata); + + BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest0) // Copy to same destination including flags and sub reg. + .addReg(DestReg, 0, SubRegIdx0); + MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); + + moveInstsAfter(Copy1, CI.InstsToMove); + + CI.I->eraseFromParent(); + CI.Paired->eraseFromParent(); + return New; +} + MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); @@ -1077,6 +1219,9 @@ case 4: return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM; } + case MIMG: + assert("No overlaps" && (countPopulation(CI.DMask0 | CI.DMask1) == Width)); + return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width); } } @@ -1086,7 +1231,13 @@ if (CI.Width0 == 0 || CI.Width0 == 0 || CI.Width0 + CI.Width1 > 4) return std::make_pair(0, 0); - bool ReverseOrder = CI.Offset0 > CI.Offset1; + bool ReverseOrder; + if (CI.InstClass == MIMG) { + assert((countPopulation(CI.DMask0 | CI.DMask1) == CI.Width0 + CI.Width1) && + "No overlaps"); + ReverseOrder = CI.DMask0 > CI.DMask1; + } else + ReverseOrder = CI.Offset0 > CI.Offset1; static const unsigned Idxs[4][4] = { {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3}, @@ -1655,6 +1806,15 @@ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; } break; + case MIMG: + if (findMatchingInst(CI)) { + Modified = true; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeImagePair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; + } + break; } // Clear the InstsToMove after we have finished searching so we don't have // stale values left over if we search for this CI again in another pass Index: test/CodeGen/AMDGPU/merge-image-load.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/merge-image-load.mir @@ -0,0 +1,471 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s + +# GFX9-LABEL: name: image_load_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- +# GFX9-LABEL: name: image_load_merged_v1v3_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 + +name: image_load_merged_v1v3_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_merged_v2v2 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 + +name: image_load_merged_v2v2 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_merged_v2v2_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 + +name: image_load_merged_v2v2_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_merged_v3v1 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 + +name: image_load_merged_v3v1 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_merged_v3v1_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 + +name: image_load_merged_v3v1_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_divided_merged +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) + +name: image_load_divided_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %9:vreg_96 = IMAGE_LOAD_V3_V4 %7:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %11:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_divided_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_divided_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vreg_128 = COPY %2 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sreg_256, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_dmask_overlapped_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_dmask_overlapped_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_dmask_not_disjoint_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_dmask_not_disjoint_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_0 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_0 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_1 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_1 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %5:vgpr_32 = COPY %2.sub3 + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_10 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_10 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_3 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_4 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_4 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_5 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_5 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_6 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_6 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_7 +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_7 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_8 +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_8 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_not_merged_9 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_load_not_merged_9 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_mip_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_mip_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + + +# GFX9-LABEL: name: image_load_mip_pck_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_mip_pck_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + + +# GFX9-LABEL: name: image_load_mip_pck_sgn_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_mip_pck_sgn_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_pck_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_pck_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_pck_sgn_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_pck_sgn_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 =COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- Index: test/CodeGen/AMDGPU/merge-image-sample.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/merge-image-sample.mir @@ -0,0 +1,1173 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s + +# GFX9-LABEL: name: image_sample_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- +# GFX9-LABEL: name: image_sample_l_merged_v1v3_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 + +name: image_sample_l_merged_v1v3_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_merged_v2v2 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 + +name: image_sample_l_merged_v2v2 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_merged_v2v2_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 + +name: image_sample_l_merged_v2v2_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) + %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 8, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_merged_v3v1 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 + +name: image_sample_l_merged_v3v1 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_merged_v3v1_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 + +name: image_sample_l_merged_v3v1_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_divided_merged +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) + +name: image_sample_l_divided_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %9:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %7:vreg_128, %3:sreg_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %11:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_divided_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_divided_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vreg_128 = COPY %2 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sreg_256, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_dmask_overlapped_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_dmask_overlapped_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_dmask_not_disjoint_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_dmask_not_disjoint_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_0 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_0 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_1 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_1 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %5:vgpr_32 = COPY %2.sub3 + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_2 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_2 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sgpr_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 + %4:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %5:vgpr_32 = COPY %2.sub3 + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_3 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_4 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_4 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_5 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_5 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_6 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_6 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_7 +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_7 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_8 +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_8 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 8, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_9 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_9 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_10 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + +name: image_sample_l_not_merged_10 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + + + +# GFX9-LABEL: name: image_sample_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_b_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_b_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_b_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_b_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_b_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_b_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_b_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_b_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cd_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cd_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cd_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cd_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cd_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cd_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cd_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cd_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_b_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_b_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_b_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_b_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_b_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_b_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_b_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cd_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cd_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cd_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cd_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cd_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cd_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cd_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_d_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_d_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_d_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_d_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_d_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_d_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_d_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_lz_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_lz_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_lz_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_lz_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_l_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_l_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_d_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_d_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_d_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_d_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_d_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_d_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_d_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_d_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_lz_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_lz_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_lz_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_lz_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_l_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_l_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec, implicit $exec :: (dereferenceable load 16, align 4, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sgpr_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +---