Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -875,8 +875,12 @@ if (HasStackObjects) Info->setHasNonSpillStackObjects(true); + // Everything live out of a block is spilled with fast regalloc, so it's + // almost certain that spilling will be required. + if (getTargetMachine().getOptLevel() == CodeGenOpt::None) + HasStackObjects = true; + if (ST.isAmdCodeObjectV2()) { - // TODO: Assume we will spill without optimizations. if (HasStackObjects) { // If we have stack objects, we unquestionably need the private buffer // resource. For the Code Object V2 ABI, this will be the first 4 user Index: test/CodeGen/AMDGPU/private-access-no-objects.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/private-access-no-objects.ll @@ -0,0 +1,33 @@ +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=OPTNONE %s + +; GCN-LABEL: {{^}}store_to_undef: + +; -O0 should assume spilling, so the input scratch resource descriptor +; -should be used directly without any copies. + +; OPTNONE-NOT: s_mov_b32 +; OPTNONE: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], s7 offen{{$}} +define void @store_to_undef() #0 { + store volatile i32 0, i32* undef + ret void +} + +; GCN-LABEL: {{^}}store_to_inttoptr: +define void @store_to_inttoptr() #0 { + store volatile i32 0, i32* inttoptr (i32 123 to i32*) + ret void +} + +; GCN-LABEL: {{^}}load_from_undef: +define void @load_from_undef() #0 { + %ld = load volatile i32, i32* undef + ret void +} + +; GCN-LABEL: {{^}}load_from_inttoptr: +define void @load_from_inttoptr() #0 { + %ld = load volatile i32, i32* inttoptr (i32 123 to i32*) + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/spill-m0.ll =================================================================== --- test/CodeGen/AMDGPU/spill-m0.ll +++ test/CodeGen/AMDGPU/spill-m0.ll @@ -18,7 +18,7 @@ ; TOSMEM: s_mov_b32 vcc_hi, m0 ; TOSMEM-NOT: vcc_hi -; TOSMEM: s_buffer_store_dword vcc_hi, s[84:87], s89 ; 4-byte Folded Spill +; TOSMEM: s_buffer_store_dword vcc_hi, s[84:87], s3 ; 4-byte Folded Spill ; TOSMEM: s_waitcnt lgkmcnt(0) ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] @@ -32,7 +32,7 @@ ; TOVMEM: v_readfirstlane_b32 vcc_hi, [[RELOAD_VREG]] ; TOVMEM: s_mov_b32 m0, vcc_hi -; TOSMEM: s_buffer_load_dword vcc_hi, s[84:87], s89 ; 4-byte Folded Reload +; TOSMEM: s_buffer_load_dword vcc_hi, s[84:87], s3 ; 4-byte Folded Reload ; TOSMEM-NOT: vcc_hi ; TOSMEM: s_mov_b32 m0, vcc_hi