Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -724,6 +724,10 @@ return ScalarFlatScratchInsts; } + bool hasMultiDwordFlatScratchAddressing() const { + return getGeneration() >= GFX9; + } + bool hasFlatSegmentOffsetBug() const { return HasFlatSegmentOffsetBug; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7380,7 +7380,8 @@ SIMachineFunctionInfo *MFI = MF.getInfo(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUAS::FLAT_ADDRESS && + !Subtarget->hasMultiDwordFlatScratchAddressing()) AS = MFI->hasFlatScratchInit() ? AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; @@ -7883,7 +7884,8 @@ SIMachineFunctionInfo *MFI = MF.getInfo(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUAS::FLAT_ADDRESS && + !Subtarget->hasMultiDwordFlatScratchAddressing()) AS = MFI->hasFlatScratchInit() ? AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; Index: llvm/test/CodeGen/AMDGPU/flat-address-space.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/flat-address-space.ll +++ llvm/test/CodeGen/AMDGPU/flat-address-space.ll @@ -1,7 +1,8 @@ ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefixes=CHECK,CIVI %s ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI,HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,CIVI,HSA,CIVI-HSA %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=CHECK,HSA,GFX10 %s ; CHECK-LABEL: {{^}}store_flat_i32: ; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]], @@ -145,8 +146,10 @@ } ; CHECK-LABEL: flat_scratch_multidword_load: -; HSA: flat_load_dword -; HSA: flat_load_dword +; CIVI-HSA: flat_load_dword v +; CIVI-HSA: flat_load_dword v +; GFX9: flat_load_dwordx2 +; GFX10: flat_load_dwordx2 ; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr define amdgpu_kernel void @flat_scratch_multidword_load() { %scratch = alloca <2 x i32>, addrspace(5) @@ -156,8 +159,10 @@ } ; CHECK-LABEL: flat_scratch_multidword_store: -; HSA: flat_store_dword -; HSA: flat_store_dword +; CIVI-HSA: flat_store_dword v +; CIVI-HSA: flat_store_dword v +; GFX9: flat_store_dwordx2 +; GFX10: flat_store_dwordx2 ; FIXME: These tests are broken for os = mesa3d, becasue it doesn't initialize flat_scr define amdgpu_kernel void @flat_scratch_multidword_store() { %scratch = alloca <2 x i32>, addrspace(5)