Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3693,10 +3693,13 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; if (ST.isAmdHsaOS()) { - RsrcDataFormat |= (1ULL << 56); + // Set ATC = 1. GFX9 doesn't have this bit. + if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) + RsrcDataFormat |= (1ULL << 56); - if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) - // Set MTYPE = 2 + // Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this. + // BTW, it disables TC L2 and therefore decreases performance. + if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS) RsrcDataFormat |= (2ULL << 59); } @@ -3708,11 +3711,14 @@ AMDGPU::RSRC_TID_ENABLE | 0xffffffff; // Size; - uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; + // GFX9 doesn't have ELEMENT_SIZE. + if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) { + uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; + Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT; + } - Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) | - // IndexStride = 64 - (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT); + // IndexStride = 64. + Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT; // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. // Clear them unless we want a huge stride. Index: test/CodeGen/AMDGPU/large-alloca-compute.ll =================================================================== --- test/CodeGen/AMDGPU/large-alloca-compute.ll +++ test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s ; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s ; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s ; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s @@ -14,6 +15,7 @@ ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1 ; CI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe8f000 ; VI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe80000 +; GFX9-DAG: s_mov_b32 s{{[0-9]+}}, 0xe00000 ; GCNHSA: .amd_kernel_code_t Index: test/CodeGen/AMDGPU/large-alloca-graphics.ll =================================================================== --- test/CodeGen/AMDGPU/large-alloca-graphics.ll +++ test/CodeGen/AMDGPU/large-alloca-graphics.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s ; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s ; ALL-LABEL: {{^}}large_alloca_pixel_shader: ; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 @@ -7,6 +8,7 @@ ; GCN-DAG: s_mov_b32 s10, -1 ; CI-DAG: s_mov_b32 s11, 0xe8f000 ; VI-DAG: s_mov_b32 s11, 0xe80000 +; GFX9-DAG: s_mov_b32 s11, 0xe00000 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen @@ -28,6 +30,7 @@ ; GCN-DAG: s_mov_b32 s10, -1 ; CI-DAG: s_mov_b32 s11, 0xe8f000 ; VI-DAG: s_mov_b32 s11, 0xe80000 +; GFX9-DAG: s_mov_b32 s11, 0xe00000 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll =================================================================== --- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s ; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s ; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s @@ -21,10 +22,11 @@ ; GCNMESA-DAG: s_mov_b32 s16, s3 ; GCNMESA-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GCNMESA--DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCNMESA-DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GCNMESA-DAG: s_mov_b32 s14, -1 ; SIMESA-DAG: s_mov_b32 s15, 0xe8f000 ; VIMESA-DAG: s_mov_b32 s15, 0xe80000 +; GFX9MESA-DAG: s_mov_b32 s15, 0xe00000 ; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll =================================================================== --- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -1,5 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; This ends up using all 255 registers and requires register ; scavenging which will fail to find an unsued register. @@ -18,6 +19,7 @@ ; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1 ; SI-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe8f000 ; VI-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe80000 +; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000 ; OFFREG is offset system SGPR ; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s[[OFFREG]] offset:{{[0-9]+}} ; 4-byte Folded Spill