Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -333,7 +333,9 @@ class SISubtarget final : public AMDGPUSubtarget { public: enum { - FIXED_SGPR_COUNT_FOR_INIT_BUG = 80 + // The closed Vulkan driver sets 96, which limits the wave count to 8 but + // doesn't spill SGPRs as much as when 80 is set. + FIXED_SGPR_COUNT_FOR_INIT_BUG = 96 }; private: Index: llvm/trunk/test/CodeGen/AMDGPU/elf.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/elf.ll +++ llvm/trunk/test/CodeGen/AMDGPU/elf.ll @@ -21,7 +21,7 @@ ; CONFIG: .section .AMDGPU.config ; CONFIG-NEXT: .long 45096 ; TYPICAL-NEXT: .long 0 -; TONGA-NEXT: .long 576 +; TONGA-NEXT: .long 704 ; CONFIG: .p2align 8 ; CONFIG: test: define amdgpu_ps void @test(i32 %p) { Index: llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll +++ llvm/trunk/test/CodeGen/AMDGPU/load-constant-i32.ll @@ -277,47 +277,47 @@ ; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64: ; GCN: s_load_dwordx16 -; GCN: s_load_dwordx16 +; GCN-DAG: s_load_dwordx16 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 -; GCN-NOHSA: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 +; GCN-NOHSA-DAG: buffer_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 -; GCN-HSA: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 +; GCN-HSA-DAG: flat_store_dwordx4 define void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 { %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in