Index: lib/Target/AMDGPU/SILoadStoreOptimizer.cpp =================================================================== --- lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -42,10 +42,7 @@ // // Future improvements: // -// - This currently relies on the scheduler to place loads and stores next to -// each other, and then only merges adjacent pairs of instructions. It would -// be good to be more flexible with interleaved instructions, and possibly run -// before scheduling. It currently missing stores of constants because loading +// - This is currently missing stores of constants because loading // the constant into the data register is placed between the stores, although // this is arguably a scheduling problem. // @@ -98,14 +95,9 @@ DS_READ, DS_WRITE, S_BUFFER_LOAD_IMM, - BUFFER_LOAD_OFFEN = AMDGPU::BUFFER_LOAD_DWORD_OFFEN, - BUFFER_LOAD_OFFSET = AMDGPU::BUFFER_LOAD_DWORD_OFFSET, - BUFFER_STORE_OFFEN = AMDGPU::BUFFER_STORE_DWORD_OFFEN, - BUFFER_STORE_OFFSET = AMDGPU::BUFFER_STORE_DWORD_OFFSET, - BUFFER_LOAD_OFFEN_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact, - BUFFER_LOAD_OFFSET_exact = AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact, - BUFFER_STORE_OFFEN_exact = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact, - BUFFER_STORE_OFFSET_exact = AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact, + BUFFER_LOAD, + BUFFER_STORE, + MIMG, }; enum RegisterEnum { @@ -114,6 +106,7 @@ SOFFSET = 0x4, VADDR = 0x8, ADDR = 0x10, + SSAMP = 0x20, }; class SILoadStoreOptimizer : public MachineFunctionPass { @@ -126,6 +119,8 @@ unsigned Width0; unsigned Width1; unsigned BaseOff; + unsigned DMask0; + unsigned DMask1; InstClassEnum InstClass; bool GLC0; bool GLC1; @@ -160,13 +155,20 @@ AliasAnalysis *AA = nullptr; bool OptimizeAgain; + bool dmasksCanBeCombined(CombineInfo &CI); static bool offsetsCanBeCombined(CombineInfo &CI); static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI); static unsigned getNewOpcode(const CombineInfo &CI); static std::pair getSubRegIdxs(const CombineInfo &CI); const TargetRegisterClass *getTargetRegisterClass(const CombineInfo &CI); unsigned getOpcodeWidth(const MachineInstr &MI); + + /// Determines instruction subclass from opcode. Only instructions + /// of the same subclass can be merged together. + unsigned getInstSubclass(unsigned Opc); + /// Maps instruction opcode to enum InstClassEnum. InstClassEnum getInstClass(unsigned Opc); + unsigned getRegs(unsigned Opc); bool findMatchingInst(CombineInfo &CI); @@ -178,6 +180,7 @@ unsigned write2Opcode(unsigned EltSize) const; unsigned write2ST64Opcode(unsigned EltSize) const; MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI); + MachineBasicBlock::iterator mergeImageSamplePair(CombineInfo &CI); MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI); @@ -307,7 +310,44 @@ return true; } +bool SILoadStoreOptimizer::dmasksCanBeCombined(CombineInfo &CI) { + assert(CI.InstClass == MIMG); + + // Ignore instructions with tfe/lwe set. + const auto *TFEOp = TII->getNamedOperand(*CI.I, AMDGPU::OpName::tfe); + const auto *LWEOp = TII->getNamedOperand(*CI.I, AMDGPU::OpName::lwe); + + if ((TFEOp && TFEOp->getImm()) || (LWEOp && LWEOp->getImm())) + return false; + + // Check other optional immediate operands for equality. + unsigned OperandsToMatch[] = {AMDGPU::OpName::glc, AMDGPU::OpName::slc, + AMDGPU::OpName::d16, AMDGPU::OpName::unorm, + AMDGPU::OpName::da, AMDGPU::OpName::r128}; + + for (auto op : OperandsToMatch) { + int Idx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), op); + if (AMDGPU::getNamedOperandIdx(CI.Paired->getOpcode(), op) != Idx) + return false; + if (Idx != -1 && + CI.I->getOperand(Idx).getImm() != CI.Paired->getOperand(Idx).getImm()) + return false; + } + + // Check DMask for overlaps. + unsigned MaxMask = std::max(CI.DMask0, CI.DMask1); + unsigned MinMask = std::min(CI.DMask0, CI.DMask1); + + unsigned AllowedBitsForMin = llvm::countTrailingZeros(MaxMask); + if ((1u << AllowedBitsForMin) <= MinMask) + return false; + + return true; +} + bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) { + assert(CI.InstClass != MIMG); + // XXX - Would the same offset be OK? Is there any reason this would happen or // be useful? if (CI.Offset0 == CI.Offset1) @@ -391,6 +431,12 @@ return AMDGPU::getMUBUFDwords(Opc); } + if (TII->isMIMG(MI)) { + uint64_t DMaskImm = + TII->getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm(); + return countPopulation(DMaskImm); + } + switch (Opc) { default: return 0; @@ -403,52 +449,72 @@ } } -InstClassEnum SILoadStoreOptimizer::getInstClass(unsigned Opc) { - if (TII->isMUBUF(Opc)) { - const int baseOpcode = AMDGPU::getMUBUFBaseOpcode(Opc); - - // If we couldn't identify the opcode, bail out. - if (baseOpcode == -1) { - return UNKNOWN; - } - - switch (baseOpcode) { - default: - return UNKNOWN; - case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: - return BUFFER_LOAD_OFFEN; - case AMDGPU::BUFFER_LOAD_DWORD_OFFSET: - return BUFFER_LOAD_OFFSET; - case AMDGPU::BUFFER_STORE_DWORD_OFFEN: - return BUFFER_STORE_OFFEN; - case AMDGPU::BUFFER_STORE_DWORD_OFFSET: - return BUFFER_STORE_OFFSET; - case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact: - return BUFFER_LOAD_OFFEN_exact; - case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact: - return BUFFER_LOAD_OFFSET_exact; - case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact: - return BUFFER_STORE_OFFEN_exact; - case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact: - return BUFFER_STORE_OFFSET_exact; +unsigned SILoadStoreOptimizer::getInstSubclass(unsigned Opc) { + switch (Opc) { + default: + if (TII->isMUBUF(Opc)) + return AMDGPU::getMUBUFBaseOpcode(Opc); + if (TII->isMIMG(Opc)) { + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + if (Info) + return Info->BaseOpcode; } + return -1; + case AMDGPU::DS_READ_B32: + case AMDGPU::DS_READ_B32_gfx9: + case AMDGPU::DS_READ_B64: + case AMDGPU::DS_READ_B64_gfx9: + case AMDGPU::DS_WRITE_B32: + case AMDGPU::DS_WRITE_B32_gfx9: + case AMDGPU::DS_WRITE_B64: + case AMDGPU::DS_WRITE_B64_gfx9: + return Opc; + case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: + case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: + return AMDGPU::S_BUFFER_LOAD_DWORD_IMM; } +} +InstClassEnum SILoadStoreOptimizer::getInstClass(unsigned Opc) { switch (Opc) { default: + if (TII->isMUBUF(Opc)) { + switch(AMDGPU::getMUBUFBaseOpcode(Opc)) { + default: + return UNKNOWN; + case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: + case AMDGPU::BUFFER_LOAD_DWORD_OFFEN_exact: + case AMDGPU::BUFFER_LOAD_DWORD_OFFSET: + case AMDGPU::BUFFER_LOAD_DWORD_OFFSET_exact: + return BUFFER_LOAD; + case AMDGPU::BUFFER_STORE_DWORD_OFFEN: + case AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact: + case AMDGPU::BUFFER_STORE_DWORD_OFFSET: + case AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact: + return BUFFER_STORE; + } + } else if (TII->isMIMG(Opc)) { + /* Ignore instructions encoded without vaddr */ + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1) + return UNKNOWN; + if (!TII->get(Opc).mayLoad() || TII->isGather4(Opc)) + return UNKNOWN; + return MIMG; + } return UNKNOWN; case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: return S_BUFFER_LOAD_IMM; case AMDGPU::DS_READ_B32: - case AMDGPU::DS_READ_B64: case AMDGPU::DS_READ_B32_gfx9: + case AMDGPU::DS_READ_B64: case AMDGPU::DS_READ_B64_gfx9: return DS_READ; case AMDGPU::DS_WRITE_B32: - case AMDGPU::DS_WRITE_B64: case AMDGPU::DS_WRITE_B32_gfx9: + case AMDGPU::DS_WRITE_B64: case AMDGPU::DS_WRITE_B64_gfx9: return DS_WRITE; } @@ -473,6 +539,14 @@ return result; } + if (TII->isMIMG(Opc)) { + unsigned result = VADDR | SRSRC; + const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); + if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler) + result |= SSAMP; + return result; + } + switch (Opc) { default: return 0; @@ -503,6 +577,7 @@ if (InstClass == UNKNOWN) { return false; } + const unsigned InstSubclass = getInstSubclass(Opc); const unsigned Regs = getRegs(Opc); @@ -531,6 +606,10 @@ AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; } + if (Regs & SSAMP) { + AddrOpName[NumAddresses++] = AMDGPU::OpName::ssamp; + } + for (unsigned i = 0; i < NumAddresses; i++) { AddrIdx[i] = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AddrOpName[i]); AddrReg[i] = &CI.I->getOperand(AddrIdx[i]); @@ -550,10 +629,9 @@ addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove); for (; MBBI != E; ++MBBI) { - const bool IsDS = (InstClass == DS_READ) || (InstClass == DS_WRITE); if ((getInstClass(MBBI->getOpcode()) != InstClass) || - (IsDS && (MBBI->getOpcode() != Opc))) { + (getInstSubclass(MBBI->getOpcode()) != InstSubclass)) { // This is not a matching DS instruction, but we can keep looking as // long as one of these conditions are met: // 1. It is safe to move I down past MBBI. @@ -622,18 +700,28 @@ } if (Match) { - int OffsetIdx = - AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::offset); - CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm(); + + // For MIMG match DMask, not Offset. + if (CI.InstClass != MIMG) { + int OffsetIdx = AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), + AMDGPU::OpName::offset); + CI.Offset0 = CI.I->getOperand(OffsetIdx).getImm(); + CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm(); + } else { + CI.DMask0 = + TII->getNamedOperand(*CI.I, AMDGPU::OpName::dmask)->getImm(); + CI.DMask1 = + TII->getNamedOperand(*MBBI, AMDGPU::OpName::dmask)->getImm(); + } + CI.Width0 = getOpcodeWidth(*CI.I); - CI.Offset1 = MBBI->getOperand(OffsetIdx).getImm(); CI.Width1 = getOpcodeWidth(*MBBI); CI.Paired = MBBI; if ((CI.InstClass == DS_READ) || (CI.InstClass == DS_WRITE)) { CI.Offset0 &= 0xffff; CI.Offset1 &= 0xffff; - } else { + } else if (CI.InstClass != MIMG) { CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm(); CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm(); if (CI.InstClass != S_BUFFER_LOAD_IMM) { @@ -644,13 +732,18 @@ CI.DLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::dlc)->getImm(); } - // Check both offsets fit in the reduced range. + // Check both offsets (or masks for MIMG) fit in the reduced + // range. + bool canBeCombined = + CI.InstClass == MIMG + ? dmasksCanBeCombined(CI) + : widthsFit(*STM, CI) && offsetsCanBeCombined(CI); + // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged // instruction. - if (widthsFit(*STM, CI) && offsetsCanBeCombined(CI)) - if (canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) - return true; + if (canBeCombined && canMoveInstsAcrossMemOp(*MBBI, CI.InstsToMove, AA)) + return true; } // We've found a load/store that we couldn't merge for some reason. @@ -847,6 +940,49 @@ return Next; } +MachineBasicBlock::iterator +SILoadStoreOptimizer::mergeImageSamplePair(CombineInfo &CI) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + const unsigned Opcode = getNewOpcode(CI); + + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); + + unsigned DestReg = MRI->createVirtualRegister(SuperRC); + unsigned MergedDMask = CI.DMask0 | CI.DMask1; + unsigned DMaskIdx = + AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::dmask); + + auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg); + for (unsigned I = 1, E = (*CI.I).getNumOperands(); I != E; ++I) { + (I == DMaskIdx) ? MIB.addImm(MergedDMask) : MIB.add((*CI.I).getOperand(I)); + } + MIB.cloneMergedMemRefs({&*CI.I, &*CI.Paired}); + + std::pair SubRegIdx = getSubRegIdxs(CI); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the old destination registers. + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); + const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata); + + BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest0) // Copy to same destination including flags and sub reg. + .addReg(DestReg, 0, SubRegIdx0); + MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); + + moveInstsAfter(Copy1, CI.InstsToMove); + + MachineBasicBlock::iterator Next = std::next(CI.I); + CI.I->eraseFromParent(); + CI.Paired->eraseFromParent(); + return Next; +} + MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair(CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); @@ -947,7 +1083,9 @@ switch (CI.InstClass) { default: - return AMDGPU::getMUBUFOpcode(CI.InstClass, Width); + assert(CI.InstClass == BUFFER_LOAD || CI.InstClass == BUFFER_STORE); + return AMDGPU::getMUBUFOpcode(AMDGPU::getMUBUFBaseOpcode(CI.I->getOpcode()), + Width); case UNKNOWN: llvm_unreachable("Unknown instruction class"); case S_BUFFER_LOAD_IMM: @@ -959,76 +1097,47 @@ case 4: return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM; } + case MIMG: + assert("No overlaps" && (countPopulation(CI.DMask0 | CI.DMask1) == Width)); + return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width); } } std::pair SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI) { - if (CI.Offset0 > CI.Offset1) { - switch (CI.Width0) { - default: - return std::make_pair(0, 0); - case 1: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub1, AMDGPU::sub0); - case 2: - return std::make_pair(AMDGPU::sub2, AMDGPU::sub0_sub1); - case 3: - return std::make_pair(AMDGPU::sub3, AMDGPU::sub0_sub1_sub2); - } - case 2: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub1_sub2, AMDGPU::sub0); - case 2: - return std::make_pair(AMDGPU::sub2_sub3, AMDGPU::sub0_sub1); - } - case 3: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub1_sub2_sub3, AMDGPU::sub0); - } - } + + if (CI.Width0 == 0 || CI.Width0 == 0 || CI.Width0 + CI.Width1 > 4) + return std::make_pair(0, 0); + + bool ReverseOrder; + if (CI.InstClass == MIMG) { + assert("No overlaps" && + (countPopulation(CI.DMask0 | CI.DMask1) == CI.Width0 + CI.Width1)); + ReverseOrder = CI.DMask0 > CI.DMask1; + } else + ReverseOrder = CI.Offset0 > CI.Offset1; + + unsigned Idxs [4][4] = { + {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3}, + {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0}, + {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0}, + {AMDGPU::sub3, 0, 0, 0}, + }; + unsigned Idx0; + unsigned Idx1; + + assert(CI.Width0 >= 1 && CI.Width0 <= 3); + assert(CI.Width1 >= 1 && CI.Width1 <= 3); + + if (ReverseOrder) { + Idx1 = Idxs[0][CI.Width1 - 1]; + Idx0 = Idxs[CI.Width1][CI.Width0 - 1]; } else { - switch (CI.Width0) { - default: - return std::make_pair(0, 0); - case 1: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub0, AMDGPU::sub1); - case 2: - return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2); - case 3: - return std::make_pair(AMDGPU::sub0, AMDGPU::sub1_sub2_sub3); - } - case 2: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2); - case 2: - return std::make_pair(AMDGPU::sub0_sub1, AMDGPU::sub2_sub3); - } - case 3: - switch (CI.Width1) { - default: - return std::make_pair(0, 0); - case 1: - return std::make_pair(AMDGPU::sub0_sub1_sub2, AMDGPU::sub3); - } - } + Idx0 = Idxs[0][CI.Width0 - 1]; + Idx1 = Idxs[CI.Width0][CI.Width1 - 1]; } + + return std::make_pair(Idx0, Idx1); } const TargetRegisterClass * @@ -1483,10 +1592,7 @@ ++I; } continue; - case BUFFER_LOAD_OFFEN: - case BUFFER_LOAD_OFFSET: - case BUFFER_LOAD_OFFEN_exact: - case BUFFER_LOAD_OFFSET_exact: + case BUFFER_LOAD: CI.EltSize = 4; if (findMatchingInst(CI)) { Modified = true; @@ -1496,10 +1602,7 @@ ++I; } continue; - case BUFFER_STORE_OFFEN: - case BUFFER_STORE_OFFSET: - case BUFFER_STORE_OFFEN_exact: - case BUFFER_STORE_OFFSET_exact: + case BUFFER_STORE: CI.EltSize = 4; if (findMatchingInst(CI)) { Modified = true; @@ -1509,6 +1612,14 @@ ++I; } continue; + case MIMG: + if (findMatchingInst(CI)) { + Modified = true; + I = mergeImageSamplePair(CI); + } else { + ++I; + } + continue; } ++I; Index: test/CodeGen/AMDGPU/merge-image-load.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/merge-image-load.mir @@ -0,0 +1,471 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s + +# GFX9-LABEL: name: image_load_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- +# GFX9-LABEL: name: image_load_l_merged_v1v3_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 + +name: image_load_l_merged_v1v3_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_merged_v2v2 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 + +name: image_load_l_merged_v2v2 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_merged_v2v2_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 + +name: image_load_l_merged_v2v2_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_64 = IMAGE_LOAD_V2_V4 %5:vreg_128, %3:sreg_256, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_merged_v3v1 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 + +name: image_load_l_merged_v3v1 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_merged_v3v1_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 + +name: image_load_l_merged_v3v1_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_divided_merged +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_divided_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %9:vreg_96 = IMAGE_LOAD_V3_V4 %7:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %11:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_divided_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_divided_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vreg_128 = COPY %2 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sreg_256, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_dmask_overlapped_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_dmask_overlapped_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_dmask_not_disjoint_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_dmask_not_disjoint_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sreg_256, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sreg_256, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_0 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_not_merged_0 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_1 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_not_merged_1 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %5:vgpr_32 = COPY %2.sub3 + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_LOAD_V1_V4 %6, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_LOAD_V3_V4 %6, %4, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_10 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec + +name: image_load_l_not_merged_10 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_3 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_not_merged_3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_4 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_not_merged_4 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_5 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_not_merged_5 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_6 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec + +name: image_load_l_not_merged_6 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_7 +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_not_merged_7 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_8 +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_load_l_not_merged_8 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_LOAD_V2_V4 %5, %3, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_l_not_merged_9 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + +name: image_load_l_not_merged_9 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_mip_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_mip_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_MIP_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + + +# GFX9-LABEL: name: image_load_mip_pck_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_mip_pck_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_PCK_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + + +# GFX9-LABEL: name: image_load_mip_pck_sgn_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_MIP_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_mip_pck_sgn_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_MIP_PCK_SGN_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_MIP_PCK_SGN_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_pck_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_pck_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_PCK_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_PCK_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_load_pck_sgn_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_LOAD_PCK_SGN_V4_V4 %5, %3, 15, 0, 0, 0, 0, 0, 0, -1, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_load_pck_sgn_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_LOAD_PCK_SGN_V1_V4 %5:vreg_128, %3:sreg_256, 1, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_LOAD_PCK_SGN_V3_V4 %5:vreg_128, %3:sreg_256, 14, 0, 0, 0, 0, 0, 0, -1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- Index: test/CodeGen/AMDGPU/merge-image-sample.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/merge-image-sample.mir @@ -0,0 +1,1173 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s + +# GFX9-LABEL: name: image_sample_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- +# GFX9-LABEL: name: image_sample_l_merged_v1v3_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub3 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub0_sub1_sub2 + +name: image_sample_l_merged_v1v3_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_merged_v2v2 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub2_sub3 + +name: image_sample_l_merged_v2v2 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_merged_v2v2_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %8.sub2_sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %8.sub0_sub1 + +name: image_sample_l_merged_v2v2_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 12, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_merged_v3v1 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub3 + +name: image_sample_l_merged_v3v1 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_merged_v3v1_reversed +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = COPY %8.sub1_sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %8.sub0 + +name: image_sample_l_merged_v3v1_reversed +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_divided_merged +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_divided_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %8:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %9:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %7:vreg_128, %3:sreg_256, %2:sreg_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) + %10:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %11:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_divided_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_divided_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vreg_128 = COPY %2 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + IMAGE_STORE_V4_V4 %4:vreg_128, %5:vreg_128, %3:sreg_256, 15, -1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_dmask_overlapped_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_dmask_overlapped_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_dmask_not_disjoint_not_merged +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_dmask_not_disjoint_not_merged +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 4, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 11, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_0 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_not_merged_0 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_1 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_not_merged_1 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %5:vgpr_32 = COPY %2.sub3 + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_2 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_not_merged_2 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_128 = COPY $sgpr92_sgpr93_sgpr94_sgpr95 + %4:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %5:vgpr_32 = COPY %2.sub3 + %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %7:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %6, %4, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %8:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %6, %4, %3, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_3 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_not_merged_3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_4 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_not_merged_4 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 1, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_5 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_not_merged_5 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 1, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_6 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_not_merged_6 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 1, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_7 +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_not_merged_7 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 1, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_8 +# GFX9: %{{[0-9]+}}:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec + +name: image_sample_l_not_merged_8 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vreg_64 = IMAGE_SAMPLE_L_V2_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 1, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_9 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec + +name: image_sample_l_not_merged_9 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + +# GFX9-LABEL: name: image_sample_l_not_merged_10 +# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec + +name: image_sample_l_not_merged_10 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 8, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 7, 0, 0, 0, 0, 0, 0, -1, 1, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + + + +# GFX9-LABEL: name: image_sample_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_b_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_b_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_B_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_b_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_b_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_B_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_b_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_b_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_B_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_b_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_b_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_B_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_B_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cd_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cd_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CD_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cd_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cd_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cd_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cd_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CD_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cd_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cd_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CD_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CD_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_b_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_b_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_b_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_b_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_b_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_b_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_b_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_B_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_b_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_B_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_B_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cd_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cd_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cd_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cd_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cd_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cd_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cd_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CD_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cd_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CD_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CD_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_d_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_d_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_d_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_d_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_d_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_d_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_d_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_d_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_D_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_D_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_l_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_l_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_L_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_L_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_lz_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_lz_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_LZ_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_lz_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_lz_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_LZ_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_LZ_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_l_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_l_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_L_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_L_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_c_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_C_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_c_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_C_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_C_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_d_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_d_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_D_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_d_cl_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_d_cl_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_D_CL_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_CL_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_d_cl_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_CL_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_d_cl_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_D_CL_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_CL_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_d_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_D_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_d_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_D_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_D_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_lz_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_lz_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_LZ_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_lz_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_LZ_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_lz_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_LZ_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_LZ_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_l_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_L_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_l_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_L_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_L_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +--- + + +# GFX9-LABEL: name: image_sample_o_merged_v1v3 +# GFX9: %{{[0-9]+}}:vreg_128 = IMAGE_SAMPLE_O_V4_V4 %5, %3, %2, 15, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %8.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %8.sub1_sub2_sub3 + +name: image_sample_o_merged_v1v3 +body: | + bb.0.entry: + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0, 0 + %2:sreg_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99 + %3:sreg_256 = S_LOAD_DWORDX8_IMM %1, 208, 0, 0 + %4:vgpr_32 = COPY %2.sub3 + %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 16) + %6:vgpr_32 = IMAGE_SAMPLE_O_V1_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 1, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 4, addrspace 4) + %7:vreg_96 = IMAGE_SAMPLE_O_V3_V4 %5:vreg_128, %3:sreg_256, %2:sreg_128, 14, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load 12, align 16, addrspace 4) +... +---