Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -1070,7 +1070,7 @@ let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "BUNDLE"; - let hasSideEffects = 1; + let hasSideEffects = 0; } def LIFETIME_START : StandardPseudoInstruction { let OutOperandList = (outs); Index: test/CodeGen/AMDGPU/byval-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/byval-frame-setup.ll +++ test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -171,20 +171,22 @@ ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel: ; GCN: s_mov_b32 s33, s7 -; GCN: s_add_u32 s32, s33, 0xc00{{$}} +; GCN-NOT: s_add_u32 s32, s32, 0x800 -; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 -; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 -; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 +; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 +; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 +; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 ; GCN-NOT: s_add_u32 s32, s32, 0x800 - ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 +; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}} ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 +; GCN: s_getpc_b64 + ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 @@ -249,24 +251,27 @@ ; Make sure the byval alignment is respected in the call frame setup ; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_kernel: ; GCN: s_mov_b32 s33, s7 -; GCN: s_add_u32 s32, s33, 0xc00{{$}} +; GCN-NOT: s_add_u32 s32, s32, 0x800 -; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 -; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 -; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 +; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 +; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 +; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 + ; GCN-NOT: s_add_u32 s32, s32, 0x800 -; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 -; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 -; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 -; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 +; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 +; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 +; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}} +; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 +; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 + +; GCN: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 +; GCN: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 +; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} -; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} -; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 -; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 -; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 ; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 ; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 Index: test/CodeGen/AMDGPU/call-argument-types.ll =================================================================== --- test/CodeGen/AMDGPU/call-argument-types.ll +++ test/CodeGen/AMDGPU/call-argument-types.ll @@ -83,14 +83,14 @@ ; HSA: buffer_load_ubyte [[VAR:v[0-9]+]] ; HSA: s_mov_b32 s32, s33 +; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]] +; MESA-DAG: s_mov_b32 s32, s33{{$}} + ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+4 -; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]] -; MESA-DAG: s_mov_b32 s32, s33{{$}} - ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}} @@ -108,12 +108,13 @@ ; HSA: buffer_load_ubyte v0 ; HSA-DAG: s_mov_b32 s32, s33{{$}} +; MESA: buffer_load_ubyte v0 +; MESA-DAG: s_mov_b32 s32, s33{{$}} + ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+4 -; MESA: buffer_load_ubyte v0 -; MESA-DAG: s_mov_b32 s32, s33{{$}} ; GCN: s_waitcnt vmcnt(0) ; GCN-NEXT: v_and_b32_e32 v0, 1, v0 @@ -770,9 +771,11 @@ ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:20 + +; GCN: s_getpc_b64 + ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} -; GCN: s_getpc_b64 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-NOT: s32 @@ -790,9 +793,9 @@ ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:4 +; GCN: s_getpc_b64 ; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} ; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GCN: s_getpc_b64 ; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GCN-NOT: s32 Index: test/CodeGen/AMDGPU/call-preserved-registers.ll =================================================================== --- test/CodeGen/AMDGPU/call-preserved-registers.ll +++ test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -130,12 +130,12 @@ ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33: ; GCN: s_mov_b32 s33, s9 ; GCN: s_mov_b32 s32, s33 -; GCN: #ASMSTART -; GCN-NEXT: ; def s33 -; GCN-NEXT: #ASMEND ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+4 +; GCN: #ASMSTART +; GCN-NEXT: ; def s33 +; GCN-NEXT: #ASMEND ; GCN: s_swappc_b64 s[30:31], s[4:5] ; GCN: ;;#ASMSTART ; GCN-NEXT: ; use s33 @@ -152,16 +152,17 @@ ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: ; GCN: s_mov_b32 s33, s9 ; GCN-NOT: s34 -; GCN: ;;#ASMSTART -; GCN-NEXT: ; def s34 -; GCN-NEXT: ;;#ASMEND - ; GCN-NOT: s34 ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+4 +; GCN-NOT: s34 +; GCN: ;;#ASMSTART +; GCN-NEXT: ; def s34 +; GCN-NEXT: ;;#ASMEND + ; GCN-NOT: s34 ; GCN: s_swappc_b64 s[30:31], s[4:5] @@ -181,10 +182,6 @@ ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v32: ; GCN: s_mov_b32 s33, s9 -; GCN: ;;#ASMSTART -; GCN-NEXT: ; def v32 -; GCN-NEXT: ;;#ASMEND - ; GCN-NOT: v32 ; GCN: s_getpc_b64 s[4:5] ; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 @@ -192,6 +189,10 @@ ; GCN-NOT: v32 ; GCN-DAG: s_mov_b32 s32, s33 +; GCN: ;;#ASMSTART +; GCN-NEXT: ; def v32 +; GCN-NEXT: ;;#ASMEND + ; GCN: s_swappc_b64 s[30:31], s[4:5] ; GCN-NOT: v32 Index: test/CodeGen/AMDGPU/call-waitcnt.ll =================================================================== --- test/CodeGen/AMDGPU/call-waitcnt.ll +++ test/CodeGen/AMDGPU/call-waitcnt.ll @@ -30,16 +30,16 @@ ; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; GCN-NEXT: s_mov_b32 s33, s9 ; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s33 -; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 ; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: global_store_dword v[0:1], v2, off +; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, func@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s7, s7, func@rel32@hi+4 -; GCN-NEXT: global_store_dword v[0:1], v2, off -; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: s_mov_b32 s32, s33 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GCN-NEXT: s_endpgm @@ -135,10 +135,10 @@ ; GCN-LABEL: tail_call_memory_arg_load: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: ds_read_b32 v0, v0 ; GCN-NEXT: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, func@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s7, s7, func@rel32@hi+4 -; GCN-NEXT: ds_read_b32 v0, v0 ; GCN-NEXT: s_setpc_b64 s[6:7] %vgpr = load volatile i32, i32 addrspace(3)* %ptr tail call void @func(i32 %vgpr) Index: test/CodeGen/AMDGPU/sibling-call.ll =================================================================== --- test/CodeGen/AMDGPU/sibling-call.ll +++ test/CodeGen/AMDGPU/sibling-call.ll @@ -208,24 +208,23 @@ ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s5 offset:8 ; GCN-NEXT: s_mov_b64 exec +; GCN-DAG: s_getpc_b64 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill ; GCN: buffer_store_dword v33, off, s[0:3], s5 ; 4-byte Folded Spill ; GCN-DAG: v_writelane_b32 v34, s34, 0 ; GCN-DAG: v_writelane_b32 v34, s35, 1 -; GCN-DAG: s_getpc_b64 ; GCN: s_swappc_b64 -; GCN: s_getpc_b64 s[6:7] -; GCN: s_add_u32 s6, s6, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 -; GCN: s_addc_u32 s7, s7, sibling_call_i32_fastcc_i32_i32@rel32@hi+4 - ; GCN-DAG: v_readlane_b32 s34, v34, 0 ; GCN-DAG: v_readlane_b32 s35, v34, 1 ; GCN: buffer_load_dword v33, off, s[0:3], s5 ; 4-byte Folded Reload ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload +; GCN: s_getpc_b64 s[6:7] +; GCN: s_add_u32 s6, s6, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 +; GCN: s_addc_u32 s7, s7, sibling_call_i32_fastcc_i32_i32@rel32@hi+4 ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s5 offset:8 ; GCN-NEXT: s_mov_b64 exec Index: test/CodeGen/ARM/Windows/tls.ll =================================================================== --- test/CodeGen/ARM/Windows/tls.ll +++ test/CodeGen/ARM/Windows/tls.ll @@ -15,11 +15,10 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -36,11 +35,10 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -57,11 +55,10 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -78,11 +75,10 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -99,11 +95,10 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -120,11 +115,10 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]] @@ -141,11 +135,10 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] - +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] ; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]