diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -30,6 +30,147 @@ def : RsrcIntrinsic(intr)>; } +class GcnBufferFormatBase f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> { + bits<8> Format = f; + bits<8> BitsPerComp = bpc; + bits<8> NumComponents = numc; + bits<8> NumFormat = nfmt; + bits<8> DataFormat = dfmt; +} + +class Gfx9BufferFormat f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase; +class Gfx10PlusBufferFormat f, bits<8> bpc, bits<8> numc, bits<8> nfmt, bits<8> dfmt> : GcnBufferFormatBase; + +class GcnBufferFormatTable : GenericTable { + let CppTypeName = "GcnBufferFormatInfo"; + let Fields = ["Format", "BitsPerComp", "NumComponents", "NumFormat", "DataFormat"]; + let PrimaryKey = ["BitsPerComp", "NumComponents", "NumFormat"]; +} + +def Gfx9BufferFormat : GcnBufferFormatTable { + let FilterClass = "Gfx9BufferFormat"; + let PrimaryKeyName = "getGfx9BufferFormatInfo"; +} +def Gfx10PlusBufferFormat : GcnBufferFormatTable { + let FilterClass = "Gfx10PlusBufferFormat"; + let PrimaryKeyName = "getGfx10PlusBufferFormatInfo"; +} + +def getGfx9BufferFormatInfo : SearchIndex { + let Table = Gfx9BufferFormat; + let Key = ["Format"]; +} +def getGfx10PlusBufferFormatInfo : SearchIndex { + let Table = Gfx10PlusBufferFormat; + let Key = ["Format"]; +} + +// Buffer formats with equal component sizes (GFX9 and earlier) +def : Gfx9BufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>; +def : Gfx9BufferFormat< /*FORMAT_8_SNORM*/ 0x11, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>; +def : Gfx9BufferFormat< /*FORMAT_8_USCALED*/ 0x21, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>; +def : Gfx9BufferFormat< /*FORMAT_8_SSCALED*/ 0x31, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>; +def : Gfx9BufferFormat< /*FORMAT_8_UINT*/ 0x41, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>; +def : Gfx9BufferFormat< /*FORMAT_8_SINT*/ 0x51, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>; +def : Gfx9BufferFormat< /*FORMAT_16_UNORM*/ 0x02, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>; +def : Gfx9BufferFormat< /*FORMAT_16_SNORM*/ 0x12, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>; +def : Gfx9BufferFormat< /*FORMAT_16_USCALED*/ 0x22, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>; +def : Gfx9BufferFormat< /*FORMAT_16_SSCALED*/ 0x32, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>; +def : Gfx9BufferFormat< /*FORMAT_16_UINT*/ 0x42, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>; +def : Gfx9BufferFormat< /*FORMAT_16_SINT*/ 0x52, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>; +def : Gfx9BufferFormat< /*FORMAT_16_FLOAT*/ 0x72, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>; +def : Gfx9BufferFormat< /*FORMAT_8_8_UNORM*/ 0x03, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx9BufferFormat< /*FORMAT_8_8_SNORM*/ 0x13, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx9BufferFormat< /*FORMAT_8_8_USCALED*/ 0x23, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx9BufferFormat< /*FORMAT_8_8_SSCALED*/ 0x33, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx9BufferFormat< /*FORMAT_8_8_UINT*/ 0x43, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx9BufferFormat< /*FORMAT_8_8_SINT*/ 0x53, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx9BufferFormat< /*FORMAT_32_UINT*/ 0x44, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>; +def : Gfx9BufferFormat< /*FORMAT_32_SINT*/ 0x54, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>; +def : Gfx9BufferFormat< /*FORMAT_32_FLOAT*/ 0x74, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>; +def : Gfx9BufferFormat< /*FORMAT_16_16_UNORM*/ 0x05, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx9BufferFormat< /*FORMAT_16_16_SNORM*/ 0x15, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx9BufferFormat< /*FORMAT_16_16_USCALED*/ 0x25, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx9BufferFormat< /*FORMAT_16_16_SSCALED*/ 0x35, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx9BufferFormat< /*FORMAT_16_16_UINT*/ 0x45, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx9BufferFormat< /*FORMAT_16_16_SINT*/ 0x55, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx9BufferFormat< /*FORMAT_16_16_FLOAT*/ 0x75, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x0A, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x1A, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x2A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x4A, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx9BufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x5A, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx9BufferFormat< /*FORMAT_32_32_UINT*/ 0x4B, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx9BufferFormat< /*FORMAT_32_32_SINT*/ 0x5B, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx9BufferFormat< /*FORMAT_32_32_FLOAT*/ 0x7B, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x0C, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x1C, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x2C, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x3C, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x4C, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x5C, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx9BufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x7C, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx9BufferFormat< /*FORMAT_32_32_32_UINT*/ 0x4D, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx9BufferFormat< /*FORMAT_32_32_32_SINT*/ 0x5D, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx9BufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x7D, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4E, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>; +def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x5E, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>; +def : Gfx9BufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x7E, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>; + +// Buffer formats with equal component sizes (GFX10 and later) +def : Gfx10PlusBufferFormat< /*FORMAT_8_UNORM*/ 0x01, 8, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8*/ 1>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_SNORM*/ 0x02, 8, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8*/ 1>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_USCALED*/ 0x03, 8, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8*/ 1>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_SSCALED*/ 0x04, 8, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8*/ 1>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_UINT*/ 0x05, 8, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8*/ 1>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_SINT*/ 0x06, 8, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8*/ 1>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_UNORM*/ 0x07, 16, 1, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16*/ 2>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_SNORM*/ 0x08, 16, 1, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16*/ 2>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_USCALED*/ 0x09, 16, 1, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16*/ 2>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_SSCALED*/ 0x0A, 16, 1, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16*/ 2>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_UINT*/ 0x0B, 16, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16*/ 2>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_SINT*/ 0x0C, 16, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16*/ 2>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_FLOAT*/ 0x0D, 16, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16*/ 2>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UNORM*/ 0x0E, 8, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SNORM*/ 0x0F, 8, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_USCALED*/ 0x10, 8, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SSCALED*/ 0x11, 8, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_UINT*/ 0x12, 8, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_SINT*/ 0x13, 8, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8*/ 3>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_UINT*/ 0x14, 32, 1, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32*/ 4>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_SINT*/ 0x15, 32, 1, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32*/ 4>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_FLOAT*/ 0x16, 32, 1, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32*/ 4>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UNORM*/ 0x17, 16, 2, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SNORM*/ 0x18, 16, 2, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_USCALED*/ 0x19, 16, 2, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SSCALED*/ 0x1A, 16, 2, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_UINT*/ 0x1B, 16, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_SINT*/ 0x1C, 16, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_FLOAT*/ 0x1D, 16, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16*/ 5>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UNORM*/ 0x38, 8, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SNORM*/ 0x39, 8, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_USCALED*/ 0x3A, 8, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SSCALED*/ 0x3B, 8, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_UINT*/ 0x3C, 8, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10PlusBufferFormat< /*FORMAT_8_8_8_8_SINT*/ 0x3D, 8, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_8_8_8_8*/ 10>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_32_UINT*/ 0x3E, 32, 2, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_32_SINT*/ 0x3F, 32, 2, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_32_FLOAT*/ 0x40, 32, 2, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32*/ 11>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UNORM*/ 0x41, 16, 4, /*NUM_FORMAT_UNORM*/ 0, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SNORM*/ 0x42, 16, 4, /*NUM_FORMAT_SNORM*/ 1, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_USCALED*/ 0x43, 16, 4, /*NUM_FORMAT_USCALED*/ 2, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SSCALED*/ 0x44, 16, 4, /*NUM_FORMAT_SSCALED*/ 3, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_UINT*/ 0x45, 16, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_SINT*/ 0x46, 16, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10PlusBufferFormat< /*FORMAT_16_16_16_16_FLOAT*/ 0x47, 16, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_16_16_16_16*/ 12>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_UINT*/ 0x48, 32, 3, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_SINT*/ 0x49, 32, 3, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_FLOAT*/ 0x4A, 32, 3, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32*/ 13>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_UINT*/ 0x4B, 32, 4, /*NUM_FORMAT_UINT*/ 4, /*DATA_FORMAT_32_32_32_32*/ 14>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_SINT*/ 0x4C, 32, 4, /*NUM_FORMAT_SINT*/ 5, /*DATA_FORMAT_32_32_32_32*/ 14>; +def : Gfx10PlusBufferFormat< /*FORMAT_32_32_32_32_FLOAT*/ 0x4D, 32, 4, /*NUM_FORMAT_FLOAT*/ 7, /*DATA_FORMAT_32_32_32_32*/ 14>; + class SourceOfDivergence { Intrinsic Intr = intr; } diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -99,6 +99,8 @@ BUFFER_LOAD, BUFFER_STORE, MIMG, + TBUFFER_LOAD, + TBUFFER_STORE, }; enum RegisterEnum { @@ -119,6 +121,8 @@ unsigned Offset1; unsigned Width0; unsigned Width1; + unsigned Format0; + unsigned Format1; unsigned BaseOff; unsigned DMask0; unsigned DMask1; @@ -206,12 +210,14 @@ const GCNSubtarget *STM = nullptr; const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; + const MCSubtargetInfo *STI = nullptr; MachineRegisterInfo *MRI = nullptr; AliasAnalysis *AA = nullptr; bool OptimizeAgain; - static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII); - static bool offsetsCanBeCombined(CombineInfo &CI); + static bool dmasksCanBeCombined(const CombineInfo &CI, + const SIInstrInfo &TII); + static bool offsetsCanBeCombined(CombineInfo &CI, const MCSubtargetInfo &STI); static bool widthsFit(const GCNSubtarget &STM, const CombineInfo &CI); static unsigned getNewOpcode(const CombineInfo &CI); static std::pair getSubRegIdxs(const CombineInfo &CI); @@ -230,6 +236,8 @@ MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI); MachineBasicBlock::iterator mergeBufferStorePair(CombineInfo &CI); + MachineBasicBlock::iterator mergeTBufferLoadPair(CombineInfo &CI); + MachineBasicBlock::iterator mergeTBufferStorePair(CombineInfo &CI); void updateBaseAndOffset(MachineInstr &I, unsigned NewBase, int32_t NewOffset) const; @@ -285,6 +293,9 @@ TII.getNamedOperand(MI, AMDGPU::OpName::dmask)->getImm(); return countPopulation(DMaskImm); } + if (TII.isMTBUF(Opc)) { + return AMDGPU::getMTBUFElements(Opc); + } switch (Opc) { case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: @@ -323,10 +334,27 @@ if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1) return UNKNOWN; // TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD. - if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || TII.isGather4(Opc)) + if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() || + TII.isGather4(Opc)) return UNKNOWN; return MIMG; } + if (TII.isMTBUF(Opc)) { + switch (AMDGPU::getMTBUFBaseOpcode(Opc)) { + default: + return UNKNOWN; + case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFEN: + case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFEN_exact: + case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET: + case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET_exact: + return TBUFFER_LOAD; + case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN: + case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN_exact: + case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFSET: + case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFSET_exact: + return TBUFFER_STORE; + } + } return UNKNOWN; case AMDGPU::S_BUFFER_LOAD_DWORD_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: @@ -357,6 +385,8 @@ assert(Info); return Info->BaseOpcode; } + if (TII.isMTBUF(Opc)) + return AMDGPU::getMTBUFBaseOpcode(Opc); return -1; case AMDGPU::DS_READ_B32: case AMDGPU::DS_READ_B32_gfx9: @@ -398,6 +428,24 @@ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); if (Info && AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler) result |= SSAMP; + + return result; + } + if (TII.isMTBUF(Opc)) { + unsigned result = 0; + + if (AMDGPU::getMTBUFHasVAddr(Opc)) { + result |= VADDR; + } + + if (AMDGPU::getMTBUFHasSrsrc(Opc)) { + result |= SRSRC; + } + + if (AMDGPU::getMTBUFHasSoffset(Opc)) { + result |= SOFFSET; + } + return result; } @@ -420,7 +468,6 @@ } } - void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI, const SIInstrInfo &TII, const GCNSubtarget &STM) { @@ -457,6 +504,9 @@ Offset0 = I->getOperand(OffsetIdx).getImm(); } + if (InstClass == TBUFFER_LOAD || InstClass == TBUFFER_STORE) + Format0 = TII.getNamedOperand(*I, AMDGPU::OpName::format)->getImm(); + Width0 = getOpcodeWidth(*I, TII); if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) { @@ -518,6 +568,9 @@ Offset1 = Paired->getOperand(OffsetIdx).getImm(); } + if (InstClass == TBUFFER_LOAD || InstClass == TBUFFER_STORE) + Format1 = TII.getNamedOperand(*Paired, AMDGPU::OpName::format)->getImm(); + Width1 = getOpcodeWidth(*Paired, TII); if ((InstClass == DS_READ) || (InstClass == DS_WRITE)) { Offset1 &= 0xffff; @@ -530,7 +583,6 @@ } } - } // end anonymous namespace. INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE, @@ -671,7 +723,33 @@ return true; } -bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) { +static unsigned getBufferFormatWithCompCount(unsigned OldFormat, + unsigned ComponentCount, + const MCSubtargetInfo &STI) { + if (ComponentCount > 4) + return 0; + + const llvm::AMDGPU::GcnBufferFormatInfo *OldFormatInfo = + llvm::AMDGPU::getGcnBufferFormatInfo(OldFormat, STI); + if (!OldFormatInfo) + return 0; + + const llvm::AMDGPU::GcnBufferFormatInfo *NewFormatInfo = + llvm::AMDGPU::getGcnBufferFormatInfo(OldFormatInfo->BitsPerComp, + ComponentCount, + OldFormatInfo->NumFormat, STI); + + if (!NewFormatInfo) + return 0; + + assert(NewFormatInfo->NumFormat == OldFormatInfo->NumFormat && + NewFormatInfo->BitsPerComp == OldFormatInfo->BitsPerComp); + + return NewFormatInfo->Format; +} + +bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI, + const MCSubtargetInfo &STI) { assert(CI.InstClass != MIMG); // XXX - Would the same offset be OK? Is there any reason this would happen or @@ -683,6 +761,30 @@ if ((CI.Offset0 % CI.EltSize != 0) || (CI.Offset1 % CI.EltSize != 0)) return false; + if (CI.InstClass == TBUFFER_LOAD || CI.InstClass == TBUFFER_STORE) { + + const llvm::AMDGPU::GcnBufferFormatInfo *Info0 = + llvm::AMDGPU::getGcnBufferFormatInfo(CI.Format0, STI); + if (!Info0) + return false; + const llvm::AMDGPU::GcnBufferFormatInfo *Info1 = + llvm::AMDGPU::getGcnBufferFormatInfo(CI.Format1, STI); + if (!Info1) + return false; + + if (Info0->BitsPerComp != Info1->BitsPerComp || + Info0->NumFormat != Info1->NumFormat) + return false; + + // TODO: Should be possible to support more formats, but if format loads + // are not dword-aligned, the merged load might not be valid. + if (Info0->BitsPerComp != 32) + return false; + + if (getBufferFormatWithCompCount(CI.Format0, CI.Width0 + CI.Width1, STI) == 0) + return false; + } + unsigned EltOffset0 = CI.Offset0 / CI.EltSize; unsigned EltOffset1 = CI.Offset1 / CI.EltSize; CI.UseST64 = false; @@ -814,6 +916,11 @@ if (MBBI->hasOrderedMemoryRef()) return false; + int Swizzled = + AMDGPU::getNamedOperandIdx(MBBI->getOpcode(), AMDGPU::OpName::swz); + if (Swizzled != -1 && MBBI->getOperand(Swizzled).getImm()) + return false; + // Handle a case like // DS_WRITE_B32 addr, v, idx0 // w = DS_READ_B32 addr, idx0 @@ -834,7 +941,7 @@ bool canBeCombined = CI.InstClass == MIMG ? dmasksCanBeCombined(CI, *TII) - : widthsFit(*STM, CI) && offsetsCanBeCombined(CI); + : widthsFit(*STM, CI) && offsetsCanBeCombined(CI, *STI); // We also need to go through the list of instructions that we plan to // move and make sure they are all safe to move down past the merged @@ -1201,6 +1308,136 @@ return New; } +MachineBasicBlock::iterator +SILoadStoreOptimizer::mergeTBufferLoadPair(CombineInfo &CI) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + + const unsigned Opcode = getNewOpcode(CI); + + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); + + // Copy to the new source register. + Register DestReg = MRI->createVirtualRegister(SuperRC); + unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1); + + auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg); + + const unsigned Regs = getRegs(Opcode, *TII); + + if (Regs & VADDR) + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)); + + unsigned JoinedFormat = + getBufferFormatWithCompCount(CI.Format0, CI.Width0 + CI.Width1, *STI); + + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) + .addImm(MergedOffset) // offset + .addImm(JoinedFormat) // format + .addImm(CI.GLC0) // glc + .addImm(CI.SLC0) // slc + .addImm(0) // tfe + .addImm(CI.DLC0) // dlc + .addImm(0) // swz + .addMemOperand( + combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + + std::pair SubRegIdx = getSubRegIdxs(CI); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the old destination registers. + const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY); + const auto *Dest0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); + const auto *Dest1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata); + + BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest0) // Copy to same destination including flags and sub reg. + .addReg(DestReg, 0, SubRegIdx0); + MachineInstr *Copy1 = BuildMI(*MBB, CI.Paired, DL, CopyDesc) + .add(*Dest1) + .addReg(DestReg, RegState::Kill, SubRegIdx1); + + moveInstsAfter(Copy1, CI.InstsToMove); + + CI.I->eraseFromParent(); + CI.Paired->eraseFromParent(); + return New; +} + +MachineBasicBlock::iterator +SILoadStoreOptimizer::mergeTBufferStorePair(CombineInfo &CI) { + MachineBasicBlock *MBB = CI.I->getParent(); + DebugLoc DL = CI.I->getDebugLoc(); + + const unsigned Opcode = getNewOpcode(CI); + + std::pair SubRegIdx = getSubRegIdxs(CI); + const unsigned SubRegIdx0 = std::get<0>(SubRegIdx); + const unsigned SubRegIdx1 = std::get<1>(SubRegIdx); + + // Copy to the new source register. + const TargetRegisterClass *SuperRC = getTargetRegisterClass(CI); + Register SrcReg = MRI->createVirtualRegister(SuperRC); + + const auto *Src0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::vdata); + const auto *Src1 = TII->getNamedOperand(*CI.Paired, AMDGPU::OpName::vdata); + + BuildMI(*MBB, CI.Paired, DL, TII->get(AMDGPU::REG_SEQUENCE), SrcReg) + .add(*Src0) + .addImm(SubRegIdx0) + .add(*Src1) + .addImm(SubRegIdx1); + + auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode)) + .addReg(SrcReg, RegState::Kill); + + const unsigned Regs = getRegs(Opcode, *TII); + + if (Regs & VADDR) + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)); + + unsigned JoinedFormat = + getBufferFormatWithCompCount(CI.Format0, CI.Width0 + CI.Width1, *STI); + + // It shouldn't be possible to get this far if the two instructions + // don't have a single memoperand, because MachineInstr::mayAlias() + // will return true if this is the case. + assert(CI.I->hasOneMemOperand() && CI.Paired->hasOneMemOperand()); + + const MachineMemOperand *MMOa = *CI.I->memoperands_begin(); + const MachineMemOperand *MMOb = *CI.Paired->memoperands_begin(); + + MachineInstr *New = + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) + .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) + .addImm(std::min(CI.Offset0, CI.Offset1)) // offset + .addImm(JoinedFormat) // format + .addImm(CI.GLC0) // glc + .addImm(CI.SLC0) // slc + .addImm(0) // tfe + .addImm(CI.DLC0) // dlc + .addImm(0) // swz + .addMemOperand( + combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); + + moveInstsAfter(MIB, CI.InstsToMove); + + CI.I->eraseFromParent(); + CI.Paired->eraseFromParent(); + return New; +} + unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI) { const unsigned Width = CI.Width0 + CI.Width1; @@ -1210,6 +1447,11 @@ // FIXME: Handle d16 correctly return AMDGPU::getMUBUFOpcode(AMDGPU::getMUBUFBaseOpcode(CI.I->getOpcode()), Width); + case TBUFFER_LOAD: + case TBUFFER_STORE: + return AMDGPU::getMTBUFOpcode(AMDGPU::getMTBUFBaseOpcode(CI.I->getOpcode()), + Width); + case UNKNOWN: llvm_unreachable("Unknown instruction class"); case S_BUFFER_LOAD_IMM: @@ -1819,6 +2061,24 @@ OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; } break; + case TBUFFER_LOAD: + if (findMatchingInst(CI)) { + Modified = true; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeTBufferLoadPair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; + } + break; + case TBUFFER_STORE: + if (findMatchingInst(CI)) { + Modified = true; + removeCombinedInst(MergeList, *CI.Paired); + MachineBasicBlock::iterator NewMI = mergeTBufferStorePair(CI); + CI.setMI(NewMI, *TII, *STM); + OptimizeListAgain |= (CI.Width0 + CI.Width1) < 4; + } + break; } // Clear the InstsToMove after we have finished searching so we don't have // stale values left over if we search for this CI again in another pass @@ -1839,6 +2099,7 @@ TII = STM->getInstrInfo(); TRI = &TII->getRegisterInfo(); + STI = &MF.getSubtarget(); MRI = &MF.getRegInfo(); AA = &getAnalysis().getAAResults(); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -41,6 +41,14 @@ namespace AMDGPU { +struct GcnBufferFormatInfo { + unsigned Format; + unsigned BitsPerComp; + unsigned NumComponents; + unsigned NumFormat; + unsigned DataFormat; +}; + #define GET_MIMGBaseOpcode_DECL #define GET_MIMGDim_DECL #define GET_MIMGEncoding_DECL @@ -300,6 +308,15 @@ bool getMUBUFHasSoffset(unsigned Opc); LLVM_READONLY +const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, + uint8_t NumComponents, + uint8_t NumFormat, + const MCSubtargetInfo &STI); +LLVM_READONLY +const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, + const MCSubtargetInfo &STI); + +LLVM_READONLY int getMCOpcode(uint16_t Opcode, unsigned Gen); void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, @@ -646,7 +663,6 @@ /// \returns true if the intrinsic is divergent bool isIntrinsicSourceOfDivergence(unsigned IntrID); - // Track defaults for fields in the MODE registser. struct SIModeRegisterDefaults { /// Floating point opcodes that support exception flag gathering quiet and diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1328,6 +1328,8 @@ const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); #define GET_SourcesOfDivergence_IMPL +#define GET_Gfx9BufferFormat_IMPL +#define GET_Gfx10PlusBufferFormat_IMPL #include "AMDGPUGenSearchableTables.inc" } // end anonymous namespace @@ -1336,5 +1338,21 @@ return lookupSourceOfDivergence(IntrID); } +const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, + uint8_t NumComponents, + uint8_t NumFormat, + const MCSubtargetInfo &STI) { + return isGFX10(STI) + ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents, + NumFormat) + : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat); +} + +const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, + const MCSubtargetInfo &STI) { + return isGFX10(STI) ? getGfx10PlusBufferFormatInfo(Format) + : getGfx9BufferFormatInfo(Format); +} + } // namespace AMDGPU } // namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir @@ -0,0 +1,1559 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefix=GFX10 %s + +# +# GFX9 tests +# + +# GFX9-LABEL: name: gfx9_tbuffer_load_x_xyz +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 +name: gfx9_tbuffer_load_x_xyz +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_load_xyz_x +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 +name: gfx9_tbuffer_load_xyz_x +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_load_xy_xy +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 +name: gfx9_tbuffer_load_xy_xy +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) + %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_load_x_xy +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 +name: gfx9_tbuffer_load_x_xy +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_load_xy_x +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 +name: gfx9_tbuffer_load_xy_x +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + + +# GFX9-LABEL: name: gfx9_tbuffer_load_x_x +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 + +name: gfx9_tbuffer_load_x_x +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_load_x_x_format_32_32_32_32 +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 + +name: gfx9_tbuffer_load_x_x_format_32_32_32_32 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + + +# GFX9-LABEL: name: gfx9_tbuffer_load_float_32 +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1 + +name: gfx9_tbuffer_load_float_32 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_load_sint_32 +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 91, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 93, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1 + +name: gfx9_tbuffer_load_sint_32 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_load_uint_32 +# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 75, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX9: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1 + +name: gfx9_tbuffer_load_uint_32 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_data_format_mismatch +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + + +name: gfx9_tbuffer_load_not_merged_data_format_mismatch +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_num_format_mismatch +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +name: gfx9_tbuffer_load_not_merged_num_format_mismatch +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_x_xyz +# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2 +# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %9, %subreg.sub1_sub2_sub3 +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +name: gfx9_tbuffer_store_x_xyz +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +... +--- + + + +# GFX9-LABEL: name: gfx9_tbuffer_store_xyz_x +# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2 +# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1_sub2, %0, %subreg.sub3 +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +name: gfx9_tbuffer_store_xyz_x +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 + TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_xy_xy +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1, %10, %subreg.sub2_sub3 +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +name: gfx9_tbuffer_store_xy_xy +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 + %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_x_xy +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_64, %subreg.sub1_sub2 +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx9_tbuffer_store_x_xy +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_xy_x +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %9, %subreg.sub0_sub1, %0, %subreg.sub2 +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx9_tbuffer_store_xy_x +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + + +# GFX9-LABEL: name: gfx9_tbuffer_store_x_x +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +name: gfx9_tbuffer_store_x_x +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_x_x_format_32_32_32_32 +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +name: gfx9_tbuffer_store_x_x_format_32_32_32_32 +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_float32 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 123, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3 +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 126, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2 +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 125, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx9_tbuffer_store_float32 +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_sint32 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 91, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3 +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 94, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2 +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 93, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx9_tbuffer_store_sint32 +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_uint32 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 +# GFX9: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 75, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3 +# GFX9: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 78, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 +# GFX9: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2 +# GFX9: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx9_tbuffer_store_uint32 +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_not_merged_data_format_mismatch +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +name: gfx9_tbuffer_store_not_merged_data_format_mismatch +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 84, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX9-LABEL: name: gfx9_tbuffer_store_not_merged_num_format_mismatch +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX9: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX9: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX9: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +name: gfx9_tbuffer_store_not_merged_num_format_mismatch +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 114, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + + +# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_0 +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +name: gfx9_tbuffer_load_not_merged_swizzled_0 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + + +# GFX9-LABEL: name: gfx9_tbuffer_load_not_merged_swizzled_1 +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +name: gfx9_tbuffer_load_not_merged_swizzled_1 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + + +# +# GFX10 tests +# + +# GFX10-LABEL: name: gfx10_tbuffer_load_x_xyz +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vreg_96 = COPY killed %7.sub1_sub2_sub3 +name: gfx10_tbuffer_load_x_xyz +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 8, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_load_xyz_x +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_96 = COPY %7.sub0_sub1_sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub3 +name: gfx10_tbuffer_load_xyz_x +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %5:sgpr_128, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_load_xy_xy +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub2_sub3 +name: gfx10_tbuffer_load_xy_xy +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) + %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 12, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_load_x_xy +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %7.sub1_sub2 +name: gfx10_tbuffer_load_x_xy +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 8, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_load_xy_x +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = COPY %7.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub2 +name: gfx10_tbuffer_load_xy_x +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %5:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + + +# GFX10-LABEL: name: gfx10_tbuffer_load_x_x +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 + +name: gfx10_tbuffer_load_x_x +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_load_x_x_format_32_32_32_32 +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %7.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %7.sub1 + +name: gfx10_tbuffer_load_x_x_format_32_32_32_32 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + + +# GFX10-LABEL: name: gfx10_tbuffer_load_float_32 +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1 + +name: gfx10_tbuffer_load_float_32 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_load_sint_32 +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 63, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 76, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 73, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1 + +name: gfx10_tbuffer_load_sint_32 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_load_uint_32 +# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 62, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %14.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %14.sub1 +# GFX10: %{{[0-9]+}}:vreg_128 = TBUFFER_LOAD_FORMAT_XYZW_OFFSET %4, 0, 16, 75, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = COPY %18.sub0_sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = COPY killed %18.sub2_sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %15.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %15.sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %16.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %16.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = TBUFFER_LOAD_FORMAT_XYZ_OFFSET %4, 0, 36, 72, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = COPY %19.sub0_sub1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %19.sub2 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY %17.sub0 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY killed %17.sub1 + +name: gfx10_tbuffer_load_uint_32 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_data_format_mismatch +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + + +name: gfx10_tbuffer_load_not_merged_data_format_mismatch +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_num_format_mismatch +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +name: gfx10_tbuffer_load_not_merged_num_format_mismatch +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %10:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %11:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %12:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %13:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %14:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %15:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + + + +# GFX10-LABEL: name: gfx10_tbuffer_store_x_xyz +# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2 +# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %0, %subreg.sub0, %9, %subreg.sub1_sub2_sub3 +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +name: gfx10_tbuffer_store_x_xyz +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 8, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +... +--- + + +# GFX10-LABEL: name: gfx10_tbuffer_store_xyz_x +# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1, %1, %subreg.sub2 +# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1_sub2, %0, %subreg.sub3 +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %10, %8, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +name: gfx10_tbuffer_store_xyz_x +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_96 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1, %6:vgpr_32, %subreg.sub2 + TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact %14:vreg_96, %13:sgpr_128, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_store_xy_xy +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE %9, %subreg.sub0_sub1, %10, %subreg.sub2_sub3 +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %11, %8, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +name: gfx10_tbuffer_store_xy_xy +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 + %15:vreg_64 = REG_SEQUENCE %6:vgpr_32, %subreg.sub0, %7:vgpr_32, %subreg.sub1 + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 12, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_store_x_xy +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_64, %subreg.sub1_sub2 +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %11, %8, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx10_tbuffer_store_x_xy +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %15:vreg_64, %13:sgpr_128, 0, 8, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_store_xy_x +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE %9, %subreg.sub0_sub1, %0, %subreg.sub2 +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %10, %8, 0, 4, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx10_tbuffer_store_xy_x +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %14:vreg_64 = REG_SEQUENCE %4:vgpr_32, %subreg.sub0, %5:vgpr_32, %subreg.sub1 + TBUFFER_STORE_FORMAT_XY_OFFSET_exact %14:vreg_64, %13:sgpr_128, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 12, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + + +# GFX10-LABEL: name: gfx10_tbuffer_store_x_x +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +name: gfx10_tbuffer_store_x_x +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_store_x_x_format_32_32_32_32 +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %1, %subreg.sub0, %0, %subreg.sub1 +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %9, %8, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +name: gfx10_tbuffer_store_x_x_format_32_32_32_32 +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 4, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 8, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_store_float32 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3 +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 77, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2 +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 74, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx10_tbuffer_store_float32 +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_store_sint32 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 63, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3 +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 76, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2 +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 73, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx10_tbuffer_store_sint32 +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_store_uint32 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %8, %subreg.sub0, %7, %subreg.sub1 +# GFX10: TBUFFER_STORE_FORMAT_XY_OFFSET_exact killed %14, %13, 0, 4, 62, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %6, %subreg.sub0, %5, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %4, %subreg.sub0, %3, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_128 = REG_SEQUENCE killed %15, %subreg.sub0_sub1, killed %16, %subreg.sub2_sub3 +# GFX10: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed %18, %13, 0, 16, 75, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %1, %subreg.sub1 +# GFX10: %{{[0-9]+}}:vreg_96 = REG_SEQUENCE killed %17, %subreg.sub0_sub1, %0, %subreg.sub2 +# GFX10: TBUFFER_STORE_FORMAT_XYZ_OFFSET_exact killed %19, %13, 0, 36, 72, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12, align 1, addrspace 4) +name: gfx10_tbuffer_store_uint32 +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 20, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_store_not_merged_data_format_mismatch +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +name: gfx10_tbuffer_store_not_merged_data_format_mismatch +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 21, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + +# GFX10-LABEL: name: gfx10_tbuffer_store_not_merged_num_format_mismatch +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr8 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr7 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr6 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr5 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr4 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr3 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr2 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr1 +# GFX10: %{{[0-9]+}}:vgpr_32 = COPY $vgpr0 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr3 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr2 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr1 +# GFX10: %{{[0-9]+}}:sgpr_32 = COPY $sgpr0 +# GFX10: %{{[0-9]+}}:sgpr_128 = REG_SEQUENCE %12, %subreg.sub0, %11, %subreg.sub1, %10, %subreg.sub2, %9, %subreg.sub3 +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %8, %13, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %7, %13, 0, 8, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %6, %13, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %5, %13, 0, 20, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %13, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %3, %13, 0, 28, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %2, %13, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %1, %13, 0, 40, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +# GFX10: TBUFFER_STORE_FORMAT_X_OFFSET_exact %0, %13, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +name: gfx10_tbuffer_store_not_merged_num_format_mismatch +body: | + bb.0.entry: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + %12:vgpr_32 = COPY $vgpr8 + %11:vgpr_32 = COPY $vgpr7 + %10:vgpr_32 = COPY $vgpr6 + %9:vgpr_32 = COPY $vgpr5 + %8:vgpr_32 = COPY $vgpr4 + %7:vgpr_32 = COPY $vgpr3 + %6:vgpr_32 = COPY $vgpr2 + %5:vgpr_32 = COPY $vgpr1 + %4:vgpr_32 = COPY $vgpr0 + %3:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %1:sgpr_32 = COPY $sgpr1 + %0:sgpr_32 = COPY $sgpr0 + %13:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %13:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %5:vgpr_32, %13:sgpr_128, 0, 8, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %6:vgpr_32, %13:sgpr_128, 0, 16, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %7:vgpr_32, %13:sgpr_128, 0, 20, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %8:vgpr_32, %13:sgpr_128, 0, 24, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %9:vgpr_32, %13:sgpr_128, 0, 28, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %10:vgpr_32, %13:sgpr_128, 0, 36, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %11:vgpr_32, %13:sgpr_128, 0, 40, 13, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) + TBUFFER_STORE_FORMAT_X_OFFSET_exact %12:vgpr_32, %13:sgpr_128, 0, 44, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4, align 1, addrspace 4) +... +--- + + +# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_0 +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +name: gfx10_tbuffer_load_not_merged_swizzled_0 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- + + +# GFX10-LABEL: name: gfx10_tbuffer_load_not_merged_swizzled_1 +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +name: gfx10_tbuffer_load_not_merged_swizzled_1 +body: | + bb.0.entry: + %0:sgpr_32 = COPY $sgpr0 + %1:sgpr_32 = COPY $sgpr1 + %2:sgpr_32 = COPY $sgpr2 + %3:sgpr_32 = COPY $sgpr3 + %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3 + %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) + %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable load 4, align 1, addrspace 4) +... +--- +