This is an archive of the discontinued LLVM Phabricator instance.

Fix typo in GCNSchedStrategy
ClosedPublic

Authored by vpykhtin on Jan 21 2017, 6:25 AM.

Download Raw Diff

Details

Reviewers

• tstellarAMD
arsenm

Commits

rG75d1de903f8a: [AMDGPU] Fix typo in GCNSchedStrategy
rL293171: [AMDGPU] Fix typo in GCNSchedStrategy

Summary

With this fix attr-amdgpu-num-sgpr.ll lit test started to fail, but I don't know what it is actually testing so I don't know how to fix it.

Below is a diff of the test output after the fix:

*** before Sat Jan 21 17:06:55 2017
--- after Sat Jan 21 16:52:19 2017
***************
*** 21,27 ****
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
! granulated_workitem_vgpr_count = 0
granulated_wavefront_sgpr_count = 1
priority = 0
float_mode = 192
--- 21,27 ----
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
! granulated_workitem_vgpr_count = 1
granulated_wavefront_sgpr_count = 1
priority = 0
float_mode = 192
***************
*** 29,35 ****
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
! enable_sgpr_private_segment_wave_byte_offset = 1
user_sgpr_count = 6
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
--- 29,35 ----
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
! enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 6
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
***************
*** 55,67 ****
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
! workitem_private_segment_byte_size = 20
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 48
workgroup_fbarrier_count = 0
! wavefront_sgpr_count = 14
! workitem_vgpr_count = 3
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
--- 55,67 ----
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
! workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 48
workgroup_fbarrier_count = 0
! wavefront_sgpr_count = 9
! workitem_vgpr_count = 5
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
***************
*** 76,142 ****
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
! s_mov_b64 s[10:11], s[2:3]
! s_mov_b64 s[8:9], s[0:1]
! s_load_dwordx2 s[0:1], s[4:5], 0x10
! s_add_u32 m0, s7, 0x200
! s_load_dwordx2 s[2:3], s[4:5], 0x0
! s_load_dword s6, s[4:5], 0x20
! s_load_dwordx2 vcc, s[4:5], 0x8
! s_nop 0
! s_waitcnt lgkmcnt(0)
! s_buffer_store_dwordx2 s[0:1], s[8:11], m0 ; 8-byte Folded Spill
! s_waitcnt lgkmcnt(0)
s_load_dwordx2 s[0:1], s[4:5], 0x18
- s_mov_b32 m0, s7
- v_mov_b32_e32 v0, s2
- v_mov_b32_e32 v1, s3
- v_mov_b32_e32 v2, s6
- s_waitcnt lgkmcnt(0)
- s_buffer_store_dwordx2 s[0:1], s[8:11], m0 ; 8-byte Folded Spill
- s_waitcnt lgkmcnt(0)
- s_load_dword s0, s[4:5], 0x24
s_waitcnt lgkmcnt(0)
! flat_store_dword v[0:1], v2
s_waitcnt vmcnt(0) lgkmcnt(0)
v_mov_b32_e32 v0, vcc_lo
v_mov_b32_e32 v1, vcc_hi
! s_add_u32 m0, s7, 0x200
! v_mov_b32_e32 v2, s0
! flat_store_dword v[0:1], v2
! s_buffer_load_dwordx2 s[2:3], s[8:11], m0 ; 8-byte Folded Reload
! s_load_dword s1, s[4:5], 0x28
! s_mov_b32 m0, s7
! s_load_dword s4, s[4:5], 0x2c
! s_waitcnt vmcnt(0) lgkmcnt(0)
! v_mov_b32_e32 v0, s2
! v_mov_b32_e32 v1, s3
! v_mov_b32_e32 v2, s1
flat_store_dword v[0:1], v2
- s_buffer_load_dwordx2 s[0:1], s[8:11], m0 ; 8-byte Folded Reload
s_waitcnt vmcnt(0) lgkmcnt(0)
- v_mov_b32_e32 v2, s4
v_mov_b32_e32 v0, s0
v_mov_b32_e32 v1, s1
flat_store_dword v[0:1], v2
- s_dcache_wb
s_endpgm
.Lfunc_end0:
.size max_12_sgprs, .Lfunc_end0-max_12_sgprs

.section .AMDGPU.csdata
; Kernel info:
! ; codeLenInByte = 256
! ; NumSgprs: 14
! ; NumVgprs: 3
; FloatMode: 192
; IeeeMode: 1
! ; ScratchSize: 20
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 1
! ; VGPRBlocks: 0
! ; NumSGPRsForWavesPerEU: 14
! ; NumVGPRsForWavesPerEU: 3
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 6
--- 76,127 ----
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
; BB#0:
! s_load_dwordx2 s[2:3], s[4:5], 0x8
! s_load_dwordx2 s[0:1], s[4:5], 0x0
! s_load_dwordx2 vcc, s[4:5], 0x10
! s_load_dword s6, s[4:5], 0x28
! s_waitcnt lgkmcnt(0)
! v_mov_b32_e32 v2, s2
! v_mov_b32_e32 v3, s3
! s_load_dword s2, s[4:5], 0x20
! s_load_dword s3, s[4:5], 0x24
! v_mov_b32_e32 v0, s0
! v_mov_b32_e32 v1, s1
s_load_dwordx2 s[0:1], s[4:5], 0x18
s_waitcnt lgkmcnt(0)
! v_mov_b32_e32 v4, s2
! s_load_dword s4, s[4:5], 0x2c
! flat_store_dword v[0:1], v4
! s_waitcnt vmcnt(0) lgkmcnt(0)
! v_mov_b32_e32 v0, s3
! flat_store_dword v[2:3], v0
s_waitcnt vmcnt(0) lgkmcnt(0)
v_mov_b32_e32 v0, vcc_lo
v_mov_b32_e32 v1, vcc_hi
! v_mov_b32_e32 v2, s6
flat_store_dword v[0:1], v2
s_waitcnt vmcnt(0) lgkmcnt(0)
v_mov_b32_e32 v0, s0
v_mov_b32_e32 v1, s1
+ v_mov_b32_e32 v2, s4
flat_store_dword v[0:1], v2
s_endpgm
.Lfunc_end0:
.size max_12_sgprs, .Lfunc_end0-max_12_sgprs

.section .AMDGPU.csdata
; Kernel info:
! ; codeLenInByte = 168
! ; NumSgprs: 9
! ; NumVgprs: 5
; FloatMode: 192
; IeeeMode: 1
! ; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 1
! ; VGPRBlocks: 1
! ; NumSGPRsForWavesPerEU: 9
! ; NumVGPRsForWavesPerEU: 5
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 6
***************
*** 159,165 ****
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
! granulated_workitem_vgpr_count = 0
granulated_wavefront_sgpr_count = 1
priority = 0
float_mode = 192
--- 144,150 ----
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
max_scratch_backing_memory_byte_size = 0
! granulated_workitem_vgpr_count = 2
granulated_wavefront_sgpr_count = 1
priority = 0
float_mode = 192
***************
*** 167,173 ****
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
! enable_sgpr_private_segment_wave_byte_offset = 1
user_sgpr_count = 12
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
--- 152,158 ----
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
! enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 12
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
***************
*** 193,205 ****
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
! workitem_private_segment_byte_size = 40
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 48
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 16
! workitem_vgpr_count = 4
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
--- 178,190 ----
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
! workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 48
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 16
! workitem_vgpr_count = 11
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
***************
*** 216,316 ****
; BB#0:
s_mov_b64 s[10:11], s[2:3]
s_mov_b64 s[8:9], s[0:1]
! s_load_dwordx2 s[0:1], s[8:9], 0x8
s_mov_b32 s7, s13
! s_add_u32 m0, s7, 0x700
! s_buffer_store_dwordx2 s[6:7], s[8:11], m0 ; 8-byte Folded Spill
! s_add_u32 m0, s7, 0x500
! s_waitcnt lgkmcnt(0)
! s_buffer_store_dwordx2 s[0:1], s[8:11], m0 ; 8-byte Folded Spill
! s_waitcnt lgkmcnt(0)
! s_load_dwordx2 s[0:1], s[8:9], 0x10
! s_add_u32 m0, s7, 0x200
! s_load_dword vcc_lo, s[8:9], 0x2c
v_mov_b32_e32 v0, s10
! v_mov_b32_e32 v2, s12
! s_waitcnt lgkmcnt(0)
! s_buffer_store_dwordx2 s[0:1], s[8:11], m0 ; 8-byte Folded Spill
! s_waitcnt lgkmcnt(0)
! s_load_dwordx2 s[0:1], s[8:9], 0x18
! s_mov_b32 m0, s7
v_mov_b32_e32 v1, s11
! v_mov_b32_e32 v3, 0
! s_load_dwordx2 s[2:3], s[8:9], 0x0
! s_load_dword s6, s[8:9], 0x20
! s_nop 0
s_waitcnt lgkmcnt(0)
! s_buffer_store_dwordx2 s[0:1], s[8:11], m0 ; 8-byte Folded Spill
! s_add_u32 m0, s7, 0x400
! s_buffer_store_dword vcc_lo, s[8:11], m0 ; 4-byte Folded Spill
! buffer_store_dword v3, v0, s[8:11], s7 offen
s_nop 0
! flat_store_dword v[0:1], v2
s_nop 0
! flat_store_dword v[0:1], v2
! s_nop 0
! flat_store_dword v[0:1], v2
s_nop 0
flat_store_dwordx2 v[0:1], v[0:1]
s_waitcnt vmcnt(0) lgkmcnt(0)
! v_mov_b32_e32 v0, s4
! v_mov_b32_e32 v1, s5
! flat_store_dwordx2 v[0:1], v[0:1]
! s_add_u32 m0, s7, 0x700
! s_buffer_load_dwordx2 s[4:5], s[8:11], m0 ; 8-byte Folded Reload
! v_mov_b32_e32 v2, s6
! s_add_u32 m0, s7, 0x500
! s_load_dword s0, s[8:9], 0x24
! s_load_dword s1, s[8:9], 0x28
s_waitcnt vmcnt(0) lgkmcnt(0)
v_mov_b32_e32 v0, s4
v_mov_b32_e32 v1, s5
! flat_store_dwordx2 v[0:1], v[0:1]
! s_waitcnt vmcnt(0) lgkmcnt(0)
! v_mov_b32_e32 v0, s2
! v_mov_b32_e32 v1, s3
flat_store_dword v[0:1], v2
- s_buffer_load_dwordx2 s[2:3], s[8:11], m0 ; 8-byte Folded Reload
- s_waitcnt vmcnt(0) lgkmcnt(0)
- v_mov_b32_e32 v2, s0
- s_add_u32 m0, s7, 0x200
- v_mov_b32_e32 v0, s2
- v_mov_b32_e32 v1, s3
- flat_store_dword v[0:1], v2
- s_buffer_load_dwordx2 s[2:3], s[8:11], m0 ; 8-byte Folded Reload
- s_waitcnt vmcnt(0) lgkmcnt(0)
- v_mov_b32_e32 v2, s1
- s_mov_b32 m0, s7
- v_mov_b32_e32 v0, s2
- v_mov_b32_e32 v1, s3
- flat_store_dword v[0:1], v2
- s_buffer_load_dwordx2 s[0:1], s[8:11], m0 ; 8-byte Folded Reload
- s_add_u32 m0, s7, 0x400
s_waitcnt vmcnt(0) lgkmcnt(0)
v_mov_b32_e32 v0, s0
v_mov_b32_e32 v1, s1
! s_buffer_load_dword s0, s[8:11], m0 ; 4-byte Folded Reload
! s_waitcnt lgkmcnt(0)
! v_mov_b32_e32 v2, s0
flat_store_dword v[0:1], v2
- s_dcache_wb
s_endpgm
.Lfunc_end1:
.size max_12_sgprs_14_input_sgprs, .Lfunc_end1-max_12_sgprs_14_input_sgprs

.section .AMDGPU.csdata
; Kernel info:
! ; codeLenInByte = 476
; NumSgprs: 16
! ; NumVgprs: 4
; FloatMode: 192
; IeeeMode: 1
! ; ScratchSize: 40
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 1
! ; VGPRBlocks: 0
; NumSGPRsForWavesPerEU: 16
! ; NumVGPRsForWavesPerEU: 4
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 12
--- 201,275 ----
; BB#0:
s_mov_b64 s[10:11], s[2:3]
s_mov_b64 s[8:9], s[0:1]
! s_load_dwordx2 s[2:3], s[8:9], 0x0
s_mov_b32 s7, s13
! s_mov_b64 s[0:1], s[6:7]
! v_mov_b32_e32 v5, s1
! v_mov_b32_e32 v4, s0
! s_load_dword s0, s[8:9], 0x20
! s_waitcnt lgkmcnt(0)
! v_mov_b32_e32 v7, s3
! s_load_dwordx2 vcc, s[8:9], 0x8
! v_mov_b32_e32 v6, s2
! v_mov_b32_e32 v2, s4
! s_load_dword s2, s[8:9], 0x24
! v_mov_b32_e32 v3, s5
! s_load_dwordx2 s[4:5], s[8:9], 0x10
v_mov_b32_e32 v0, s10
! s_load_dword s3, s[8:9], 0x28
! v_mov_b32_e32 v9, s0
! v_mov_b32_e32 v10, 0
! v_mov_b32_e32 v8, s12
v_mov_b32_e32 v1, s11
! s_load_dwordx2 s[0:1], s[8:9], 0x18
! buffer_store_dword v10, v0, s[8:11], s7 offen
s_waitcnt lgkmcnt(0)
! flat_store_dword v[0:1], v8
s_nop 0
! flat_store_dword v[0:1], v8
s_nop 0
! flat_store_dword v[0:1], v8
s_nop 0
flat_store_dwordx2 v[0:1], v[0:1]
+ s_nop 0
+ flat_store_dwordx2 v[0:1], v[2:3]
+ s_nop 0
+ flat_store_dwordx2 v[0:1], v[4:5]
s_waitcnt vmcnt(0) lgkmcnt(0)
! v_mov_b32_e32 v0, vcc_lo
! s_load_dword s6, s[8:9], 0x2c
! v_mov_b32_e32 v1, vcc_hi
! v_mov_b32_e32 v2, s2
! flat_store_dword v[6:7], v9
! s_nop 0
! flat_store_dword v[0:1], v2
s_waitcnt vmcnt(0) lgkmcnt(0)
v_mov_b32_e32 v0, s4
v_mov_b32_e32 v1, s5
! v_mov_b32_e32 v2, s3
flat_store_dword v[0:1], v2
s_waitcnt vmcnt(0) lgkmcnt(0)
v_mov_b32_e32 v0, s0
v_mov_b32_e32 v1, s1
! v_mov_b32_e32 v2, s6
flat_store_dword v[0:1], v2
s_endpgm
.Lfunc_end1:
.size max_12_sgprs_14_input_sgprs, .Lfunc_end1-max_12_sgprs_14_input_sgprs

.section .AMDGPU.csdata
; Kernel info:
! ; codeLenInByte = 296
; NumSgprs: 16
! ; NumVgprs: 11
; FloatMode: 192
; IeeeMode: 1
! ; ScratchSize: 0
; LDSByteSize: 0 bytes/workgroup (compile time only)
; SGPRBlocks: 1
! ; VGPRBlocks: 2
; NumSGPRsForWavesPerEU: 16
! ; NumVGPRsForWavesPerEU: 11
; ReservedVGPRFirst: 0
; ReservedVGPRCount: 0
; COMPUTE_PGM_RSRC2:USER_SGPR: 12

Diff Detail

Repository: rL LLVM

Event Timeline

vpykhtin created this revision.Jan 21 2017, 6:25 AM

Herald edited edge metadata. · View Herald TranscriptJan 21 2017, 6:25 AM

Herald added subscribers: nhaehnle, wdng. · View Herald Transcript

vpykhtin edited the summary of this revision. (Show Details)Jan 21 2017, 6:29 AM

vpykhtin edited the summary of this revision. (Show Details)

attr-amdgpu-num-sgpr.ll is a problematic test. The attribute has some problems when you specify a very small number of SGPRs. I'm wondering if we should just remove it and use the more general attribute instead

In D28980#653730, @arsenm wrote:

attr-amdgpu-num-sgpr.ll is a problematic test. The attribute has some problems when you specify a very small number of SGPRs. I'm wondering if we should just remove it and use the more general attribute instead

Should we mark this test as expected fail and submit?

In D28980#654779, @vpykhtin wrote:

In D28980#653730, @arsenm wrote:

attr-amdgpu-num-sgpr.ll is a problematic test. The attribute has some problems when you specify a very small number of SGPRs. I'm wondering if we should just remove it and use the more general attribute instead

Should we mark this test as expected fail and submit?

I think that's fine for now

This revision is now accepted and ready to land.Jan 24 2017, 11:21 AM

I updated attr-amdgpu-num-sgpr.ll so it is now passing.

LGTM

Closed by commit rL293171: [AMDGPU] Fix typo in GCNSchedStrategy (authored by vpykhtin). · Explain WhyJan 26 2017, 3:02 AM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

AMDGPU/

GCNSchedStrategy.cpp

2 lines

test/

CodeGen/

AMDGPU/

attr-amdgpu-num-sgpr.ll

11 lines

Diff 85877

llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Show First 20 Lines • Show All 97 Lines • ▼ Show 20 Lines	void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
// are compared with instructions that increase the register pressure.		// are compared with instructions that increase the register pressure.
if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {		if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());		Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);		Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
}		}

if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {		if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());		Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit);		Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
}		}

// Register pressure is considered 'CRITICAL' if it is approaching a value		// Register pressure is considered 'CRITICAL' if it is approaching a value
// that would reduce the wave occupancy for the execution unit. When		// that would reduce the wave occupancy for the execution unit. When
// register pressure is 'CRITICAL', increading SGPR and VGPR pressure both		// register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
// has the same cost, so we don't need to prefer one over the other.		// has the same cost, so we don't need to prefer one over the other.

VGPRCriticalLimit -= ErrorMargin;		VGPRCriticalLimit -= ErrorMargin;
▲ Show 20 Lines • Show All 198 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll

	; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s \| FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s			; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s \| FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
	; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s \| FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s			; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s \| FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s

	; If spilling to smem, additional registers are used for the resource			; If spilling to smem, additional registers are used for the resource
	; descriptor.			; descriptor.

	; ALL-LABEL: {{^}}max_12_sgprs:			; ALL-LABEL: {{^}}max_9_sgprs:

	; FIXME: Should be ablo to skip this copying of the private segment
	; buffer because all the SGPR spills are to VGPRs.

	; ALL: s_mov_b64 s[10:11], s[2:3]
	; ALL: s_mov_b64 s[8:9], s[0:1]
	; ALL: SGPRBlocks: 1			; ALL: SGPRBlocks: 1
	; ALL: NumSGPRsForWavesPerEU: 14			; ALL: NumSGPRsForWavesPerEU: 9
	define void @max_12_sgprs(i32 addrspace(1)* %out1,			define void @max_9_sgprs(i32 addrspace(1)* %out1,

	i32 addrspace(1)* %out2,			i32 addrspace(1)* %out2,
	i32 addrspace(1)* %out3,			i32 addrspace(1)* %out3,
	i32 addrspace(1)* %out4,			i32 addrspace(1)* %out4,
	i32 %one, i32 %two, i32 %three, i32 %four) #0 {			i32 %one, i32 %two, i32 %three, i32 %four) #0 {
	store i32 %one, i32 addrspace(1)* %out1			store i32 %one, i32 addrspace(1)* %out1
	store i32 %two, i32 addrspace(1)* %out2			store i32 %two, i32 addrspace(1)* %out2
	store i32 %three, i32 addrspace(1)* %out3			store i32 %three, i32 addrspace(1)* %out3
	▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines