Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2720,8 +2720,13 @@ uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; - if (ST.isAmdHsaOS()) + if (ST.isAmdHsaOS()) { RsrcDataFormat |= (1ULL << 56); + if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + // Set MTYPE = 2 + RsrcDataFormat |= (2ULL << 59); + } + return RsrcDataFormat; } Index: llvm/trunk/test/CodeGen/AMDGPU/hsa.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/hsa.ll +++ llvm/trunk/test/CodeGen/AMDGPU/hsa.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI --check-prefix=HSA %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI --check-prefix=HSA %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -s -sd | FileCheck --check-prefix=ELF %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF @@ -13,14 +14,18 @@ ; ELF: 0040: 50550000 ; HSA: .hsa_code_object_version 1,0 -; HSA: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" +; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" +; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA: {{^}}simple: ; HSA: .amd_kernel_code_t ; HSA: .end_amd_kernel_code_t ; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0 + ; Make sure we are setting the ATC bit: -; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000 +; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000 +; On VI+ we also need to set MTYPE = 2 +; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000 ; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0 define void @simple(i32 addrspace(1)* %out) {