Index: llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFixupVectorISel.cpp @@ -43,6 +43,11 @@ using namespace llvm; +static cl::opt EnableGlobalSGPRAddr( + "amdgpu-enable-global-sgpr-addr", + cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), + cl::init(false)); + STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities"); STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted"); @@ -155,6 +160,8 @@ const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI) { + if (!EnableGlobalSGPRAddr) + return false; bool FuncModified = false; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Index: llvm/trunk/test/CodeGen/AMDGPU/ds_write2.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/ds_write2.ll +++ llvm/trunk/test/CodeGen/AMDGPU/ds_write2.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,CI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s @lds = addrspace(3) global [512 x float] undef, align 4 @lds.f64 = addrspace(3) global [512 x double] undef, align 8 Index: llvm/trunk/test/CodeGen/AMDGPU/ds_write2st64.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/ds_write2st64.ll +++ llvm/trunk/test/CodeGen/AMDGPU/ds_write2st64.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s @lds = addrspace(3) global [512 x float] undef, align 4 Index: llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir +++ llvm/trunk/test/CodeGen/AMDGPU/global-load-store-atomics.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-fixup-vector-isel %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-fixup-vector-isel -amdgpu-enable-global-sgpr-addr %s -o - | FileCheck -check-prefix=GCN %s # Coverage tests for GLOBAL_* to their _SADDR equivalent. Index: llvm/trunk/test/CodeGen/AMDGPU/global-saddr.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/global-saddr.ll +++ llvm/trunk/test/CodeGen/AMDGPU/global-saddr.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefix=GFX9 %s ; Test for a conv2d like sequence of loads. Index: llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll +++ llvm/trunk/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s ; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr: Index: llvm/trunk/test/CodeGen/AMDGPU/madak.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/madak.ll +++ llvm/trunk/test/CodeGen/AMDGPU/madak.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8_9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8_9 %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone Index: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll +++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-load.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s declare i32 @llvm.amdgcn.workitem.id.x() Index: llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll +++ llvm/trunk/test/CodeGen/AMDGPU/memory-legalizer-store.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s declare i32 @llvm.amdgcn.workitem.id.x() Index: llvm/trunk/test/CodeGen/AMDGPU/memory_clause.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/memory_clause.ll +++ llvm/trunk/test/CodeGen/AMDGPU/memory_clause.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}vector_clause: ; GCN: global_load_dwordx4 Index: llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ llvm/trunk/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -1,5 +1,5 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)