Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -118,6 +118,13 @@ FlatForGlobal = true; } + // Use MUBUF instructions for global address space access in GFX60x + if (getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { + FlatForGlobal = false; + if (FS.contains("+flat-for-global")) + errs() << "GFX6 do not support Flat instructions for Global Address Space\n"; + } + // Set defaults if needed. if (MaxPrivateElementSize == 0) MaxPrivateElementSize = 4; @@ -182,7 +189,9 @@ AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT), TargetTriple(TT), - Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS), + Gen(!GPU.contains("generic") ? SOUTHERN_ISLANDS : + (TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS)), + InstrItins(getInstrItineraryForCPU(GPU)), LDSBankCount(0), MaxPrivateElementSize(0), Index: llvm/test/CodeGen/AMDGPU/lower-kernargs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-kernargs.ll +++ llvm/test/CodeGen/AMDGPU/lower-kernargs.ll @@ -533,10 +533,7 @@ define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 { ; HSA-LABEL: @kern_lds_ptr_si( ; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() -; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* -; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 -; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4 +; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_lds_ptr_si( Index: llvm/test/CodeGen/AMDGPU/si-global-buffer.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/si-global-buffer.ll @@ -0,0 +1,18 @@ +; RUN: llc --mtriple=amdgcn-amd-amdhsa --mcpu=gfx600 -mattr=+flat-for-global -verify-machineinstrs <%s | FileCheck -check-prefix=SI %s +; RUN: llc --mtriple=amdgcn --mcpu=gfx600 -mattr=+flat-for-global -verify-machineinstrs <%s | FileCheck -check-prefix=SI %s + +define void @test(i32 addrspace(1)* %out){ + ; SI-LABEL: test: + ; SI: ; %bb.0: + ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) + ; SI-NEXT: s_mov_b32 s6, 0 + ; SI-NEXT: s_mov_b32 s7, {{.*}} + ; SI-NEXT: s_mov_b32 s4, s6 + ; SI-NEXT: s_mov_b32 s5, s6 + ; SI-NEXT: v_mov_b32_e32 v2, 0 + ; SI-NEXT: buffer_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 + ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) + ; SI-NEXT: s_setpc_b64 s[30:31] + store i32 0, i32 addrspace(1)* %out + ret void +}