Index: include/llvm/Target/Target.td =================================================================== --- include/llvm/Target/Target.td +++ include/llvm/Target/Target.td @@ -912,7 +912,7 @@ let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "BUNDLE"; - let hasSideEffects = 1; + let hasSideEffects = 0; } def LIFETIME_START : Instruction { let OutOperandList = (outs); Index: test/CodeGen/AMDGPU/byval-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/byval-frame-setup.ll +++ test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -72,7 +72,6 @@ ; GCN: s_add_u32 s32, s32, 0xc00{{$}} ; GCN: v_writelane_b32 -; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 @@ -84,6 +83,7 @@ ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20 +; GCN: s_add_u32 s32, s32, 0x800{{$}} ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 @@ -128,20 +128,20 @@ ; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel: ; GCN: s_mov_b32 s33, s7 -; GCN: s_add_u32 s32, s33, 0xa00{{$}} ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24 -; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} - ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 ; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 ; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 +; GCN: s_add_u32 s32, s33, 0xa00{{$}} +; GCN: s_add_u32 s32, s32, 0x800{{$}} + ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}} ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8 ; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12 Index: test/CodeGen/AMDGPU/call-argument-types.ll =================================================================== --- test/CodeGen/AMDGPU/call-argument-types.ll +++ test/CodeGen/AMDGPU/call-argument-types.ll @@ -67,10 +67,10 @@ ; MESA: s_mov_b32 s33, s3{{$}} ; HSA: s_mov_b32 s33, s9{{$}} +; GCN: buffer_load_ubyte [[VAR:v[0-9]+]] ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+4 -; GCN-NEXT: buffer_load_ubyte [[VAR:v[0-9]+]] ; HSA-NEXT: s_mov_b32 s4, s33 ; HSA-NEXT: s_mov_b32 s32, s33 @@ -91,10 +91,10 @@ ; GCN-LABEL: {{^}}test_call_external_void_func_i1_zeroext: ; MESA: s_mov_b32 s33, s3{{$}} +; GCN: buffer_load_ubyte v0 ; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} ; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+4 -; GCN-NEXT: buffer_load_ubyte v0 ; GCN-DAG: s_mov_b32 s4, s33{{$}} ; GCN-DAG: s_mov_b32 s32, s33{{$}} @@ -400,8 +400,8 @@ ; GCN-DAG: buffer_load_dwordx4 v[24:27], off ; GCN-DAG: buffer_load_dwordx4 v[28:31], off -; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SP_REG]] offset:4{{$}} ; GCN: s_waitcnt +; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SP_REG]] offset:4{{$}} ; GCN-NEXT: s_swappc_b64 ; GCN-NEXT: s_endpgm define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { @@ -447,23 +447,8 @@ ; HSA-DAG: buffer_store_byte [[VAL0]], off, s[0:3], s33 offset:8 ; HSA-DAG: buffer_store_dword [[VAL1]], off, s[0:3], s33 offset:12 -; GCN: s_add_u32 [[SP]], [[SP]], 0x200 - -; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8 -; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12 - -; HSA: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:8 -; HSA: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]] offset:4 - - -; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], s33 offset:8 -; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], s33 offset:12 - -; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8 -; MESA: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]] offset:4 - -; GCN-NEXT: s_swappc_b64 -; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200 +; GCN: s_swappc_b64 +; GCN: s_sub_u32 [[SP]], [[SP]], 0x200 define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { %val = alloca { i8, i32 }, align 4 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %val, i32 0, i32 0 Index: test/CodeGen/AMDGPU/call-preserved-registers.ll =================================================================== --- test/CodeGen/AMDGPU/call-preserved-registers.ll +++ test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -113,13 +113,14 @@ ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33: ; GCN: s_mov_b32 s34, s9 -; GCN: ; def s33 -; GCN-NEXT: #ASMEND ; GCN: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4 ; GCN-NEXT: s_mov_b32 s4, s34 ; GCN-NEXT: s_mov_b32 s32, s34 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s33 +; GCN-NEXT: #ASMEND ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s33 @@ -134,13 +135,14 @@ ; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v32: ; GCN: s_mov_b32 s33, s9 -; GCN: ; def v32 -; GCN-NEXT: #ASMEND ; GCN: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4 ; GCN-NEXT: s_mov_b32 s4, s33 ; GCN-NEXT: s_mov_b32 s32, s33 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def v32 +; GCN-NEXT: #ASMEND ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use v32 Index: test/CodeGen/AMDGPU/callee-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/callee-frame-setup.ll +++ test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -44,7 +44,7 @@ ; GCN-DAG: v_writelane_b32 v32, s35, ; GCN-DAG: s_add_u32 s32, s32, 0x300{{$}} ; GCN-DAG: v_mov_b32_e32 v0, 0{{$}} -; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}} +; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}} ; GCN-DAG: s_mov_b32 s33, s5 Index: test/CodeGen/AMDGPU/sibling-call.ll =================================================================== --- test/CodeGen/AMDGPU/sibling-call.ll +++ test/CodeGen/AMDGPU/sibling-call.ll @@ -161,28 +161,27 @@ ; Have another non-tail in the function ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_other_call: ; GCN: s_mov_b32 s5, s32 +; GCN: s_add_u32 s32, s32, 0x400 ; GCN: buffer_store_dword v34, off, s[0:3], s5 offset:12 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill ; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill ; GCN-DAG: v_writelane_b32 v34, s33, 0 ; GCN-DAG: v_writelane_b32 v34, s34, 1 ; GCN-DAG: v_writelane_b32 v34, s35, 2 -; GCN-DAG: s_add_u32 s32, s32, 0x400 -; GCN: s_getpc_b64 ; GCN: s_swappc_b64 -; GCN: s_getpc_b64 s[6:7] -; GCN: s_add_u32 s6, s6, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 -; GCN: s_addc_u32 s7, s7, sibling_call_i32_fastcc_i32_i32@rel32@hi+4 - +; GCN-DAG: s_getpc_b64 ; GCN-DAG: v_readlane_b32 s33, v34, 0 ; GCN-DAG: v_readlane_b32 s34, v34, 1 ; GCN-DAG: v_readlane_b32 s35, v34, 2 -; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:4 -; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 -; GCN: buffer_load_dword v34, off, s[0:3], s5 offset:12 +; GCN-DAG: buffer_load_dword v33, off, s[0:3], s5 offset:4 +; GCN-DAG: buffer_load_dword v32, off, s[0:3], s5 offset:8 +; GCN-DAG: buffer_load_dword v34, off, s[0:3], s5 offset:12 +; GCN-DAG: s_getpc_b64 s[6:7] +; GCN: s_add_u32 s6, s6, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 +; GCN: s_addc_u32 s7, s7, sibling_call_i32_fastcc_i32_i32@rel32@hi+4 ; GCN: s_sub_u32 s32, s32, 0x400 ; GCN: s_setpc_b64 s[6:7] define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { Index: test/CodeGen/ARM/Windows/tls.ll =================================================================== --- test/CodeGen/ARM/Windows/tls.ll +++ test/CodeGen/ARM/Windows/tls.ll @@ -15,9 +15,9 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] @@ -36,9 +36,9 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] @@ -57,9 +57,9 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] @@ -78,9 +78,9 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] @@ -99,9 +99,9 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] @@ -120,9 +120,9 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2] @@ -141,9 +141,9 @@ ; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2 -; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index ; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index +; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44] ; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]] ; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]