Index: test/CodeGen/AMDGPU/add_i64.ll =================================================================== --- test/CodeGen/AMDGPU/add_i64.ll +++ test/CodeGen/AMDGPU/add_i64.ll @@ -1,13 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() readnone +declare i32 @llvm.amdgcn.workitem.id.x() readnone ; SI-LABEL: {{^}}test_i64_vreg: ; SI: v_add_i32 ; SI: v_addc_u32 define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid %a = load i64, i64 addrspace(1)* %a_ptr @@ -59,7 +59,7 @@ ; SI: v_add_i32 ; SI: v_addc_u32 define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr Index: test/CodeGen/AMDGPU/addrspacecast.ll =================================================================== --- test/CodeGen/AMDGPU/addrspacecast.ll +++ test/CodeGen/AMDGPU/addrspacecast.ll @@ -47,19 +47,19 @@ ; CHECK: flat_load_dword define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { %alloca = alloca i32, i32 9, align 4 - %x = call i32 @llvm.r600.read.tidig.x() #3 + %x = call i32 @llvm.amdgcn.workitem.id.x() #3 %pptr = getelementptr i32, i32* %alloca, i32 %x %fptr = addrspacecast i32* %pptr to i32 addrspace(4)* store i32 %x, i32 addrspace(4)* %fptr ; Dummy call - call void @llvm.AMDGPU.barrier.local() #1 + call void @llvm.amdgcn.s.barrier() #1 %reload = load i32, i32 addrspace(4)* %fptr, align 4 store i32 %reload, i32 addrspace(1)* %out, align 4 ret void } -declare void @llvm.AMDGPU.barrier.local() #1 -declare i32 @llvm.r600.read.tidig.x() #3 +declare void @llvm.amdgcn.s.barrier() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #3 attributes #0 = { nounwind } attributes #1 = { nounwind convergent } Index: test/CodeGen/AMDGPU/array-ptr-calc-i32.ll =================================================================== --- test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -1,8 +1,9 @@ ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s -declare i32 @llvm.SI.tid() nounwind readnone -declare void @llvm.AMDGPU.barrier.local() nounwind convergent +declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1 +declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 +declare void @llvm.amdgcn.s.barrier() #2 ; The required pointer calculations for the alloca'd actually requires ; an add and won't be folded into the addressing, which fails with a @@ -24,9 +25,10 @@ ; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16 ; SI-PROMOTE: ds_write_b32 [[PTRREG]] -define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { +define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 { %alloca = alloca [4 x i32], i32 4, align 16 - %tid = call i32 @llvm.SI.tid() readnone + %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0); + %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo) %a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid %b_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inB, i32 %tid %a = load i32, i32 addrspace(1)* %a_ptr @@ -35,10 +37,13 @@ %alloca_ptr = getelementptr inbounds [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b store i32 %result, i32* %alloca_ptr, align 4 ; Dummy call - call void @llvm.AMDGPU.barrier.local() nounwind convergent + call void @llvm.amdgcn.s.barrier() %reload = load i32, i32* %alloca_ptr, align 4 %out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid store i32 %reload, i32 addrspace(1)* %out_ptr, align 4 ret void } +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind convergent } Index: test/CodeGen/AMDGPU/cgp-addressing-modes.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -5,8 +5,6 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -declare i32 @llvm.r600.read.tidig.x() #0 - ; OPT-LABEL: @test_sink_global_small_offset_i32( ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in ; OPT-VI: getelementptr i32, i32 addrspace(1)* %in Index: test/CodeGen/AMDGPU/commute-compares.ll =================================================================== --- test/CodeGen/AMDGPU/commute-compares.ll +++ test/CodeGen/AMDGPU/commute-compares.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 ; -------------------------------------------------------------------------------- ; i32 compares @@ -9,7 +9,7 @@ ; GCN-LABEL: {{^}}commute_eq_64_i32: ; GCN: v_cmp_eq_i32_e32 vcc, 64, v{{[0-9]+}} define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -22,7 +22,7 @@ ; GCN-LABEL: {{^}}commute_ne_64_i32: ; GCN: v_cmp_ne_i32_e32 vcc, 64, v{{[0-9]+}} define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -37,7 +37,7 @@ ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039 ; GCN: v_cmp_ne_i32_e32 vcc, [[K]], v{{[0-9]+}} define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -50,7 +50,7 @@ ; GCN-LABEL: {{^}}commute_ugt_64_i32: ; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}} define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -63,7 +63,7 @@ ; GCN-LABEL: {{^}}commute_uge_64_i32: ; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}} define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -76,7 +76,7 @@ ; GCN-LABEL: {{^}}commute_ult_64_i32: ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}} define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -89,7 +89,7 @@ ; GCN-LABEL: {{^}}commute_ule_63_i32: ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}} define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -105,7 +105,7 @@ ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}} ; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}} define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -118,7 +118,7 @@ ; GCN-LABEL: {{^}}commute_sgt_neg1_i32: ; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}} define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -131,7 +131,7 @@ ; GCN-LABEL: {{^}}commute_sge_neg2_i32: ; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}} define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -144,7 +144,7 @@ ; GCN-LABEL: {{^}}commute_slt_neg16_i32: ; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}} define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -157,7 +157,7 @@ ; GCN-LABEL: {{^}}commute_sle_5_i32: ; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}} define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in @@ -174,7 +174,7 @@ ; GCN-LABEL: {{^}}commute_eq_64_i64: ; GCN: v_cmp_eq_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -187,7 +187,7 @@ ; GCN-LABEL: {{^}}commute_ne_64_i64: ; GCN: v_cmp_ne_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -200,7 +200,7 @@ ; GCN-LABEL: {{^}}commute_ugt_64_i64: ; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -213,7 +213,7 @@ ; GCN-LABEL: {{^}}commute_uge_64_i64: ; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}} define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -226,7 +226,7 @@ ; GCN-LABEL: {{^}}commute_ult_64_i64: ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -239,7 +239,7 @@ ; GCN-LABEL: {{^}}commute_ule_63_i64: ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -255,7 +255,7 @@ ; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}} ; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -268,7 +268,7 @@ ; GCN-LABEL: {{^}}commute_sgt_neg1_i64: ; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}} define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -281,7 +281,7 @@ ; GCN-LABEL: {{^}}commute_sge_neg2_i64: ; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}} define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -294,7 +294,7 @@ ; GCN-LABEL: {{^}}commute_slt_neg16_i64: ; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}} define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -307,7 +307,7 @@ ; GCN-LABEL: {{^}}commute_sle_5_i64: ; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}} define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %gep.in @@ -325,7 +325,7 @@ ; GCN-LABEL: {{^}}commute_oeq_2.0_f32: ; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -339,7 +339,7 @@ ; GCN-LABEL: {{^}}commute_ogt_2.0_f32: ; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -352,7 +352,7 @@ ; GCN-LABEL: {{^}}commute_oge_2.0_f32: ; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -365,7 +365,7 @@ ; GCN-LABEL: {{^}}commute_olt_2.0_f32: ; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -378,7 +378,7 @@ ; GCN-LABEL: {{^}}commute_ole_2.0_f32: ; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -391,7 +391,7 @@ ; GCN-LABEL: {{^}}commute_one_2.0_f32: ; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -404,7 +404,7 @@ ; GCN-LABEL: {{^}}commute_ord_2.0_f32: ; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]] define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -417,7 +417,7 @@ ; GCN-LABEL: {{^}}commute_ueq_2.0_f32: ; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -430,7 +430,7 @@ ; GCN-LABEL: {{^}}commute_ugt_2.0_f32: ; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -443,7 +443,7 @@ ; GCN-LABEL: {{^}}commute_uge_2.0_f32: ; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -456,7 +456,7 @@ ; GCN-LABEL: {{^}}commute_ult_2.0_f32: ; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -469,7 +469,7 @@ ; GCN-LABEL: {{^}}commute_ule_2.0_f32: ; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -482,7 +482,7 @@ ; GCN-LABEL: {{^}}commute_une_2.0_f32: ; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}} define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -495,7 +495,7 @@ ; GCN-LABEL: {{^}}commute_uno_2.0_f32: ; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]] define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load float, float addrspace(1)* %gep.in @@ -513,7 +513,7 @@ ; GCN-LABEL: {{^}}commute_oeq_2.0_f64: ; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -527,7 +527,7 @@ ; GCN-LABEL: {{^}}commute_ogt_2.0_f64: ; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -540,7 +540,7 @@ ; GCN-LABEL: {{^}}commute_oge_2.0_f64: ; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -553,7 +553,7 @@ ; GCN-LABEL: {{^}}commute_olt_2.0_f64: ; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -566,7 +566,7 @@ ; GCN-LABEL: {{^}}commute_ole_2.0_f64: ; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -579,7 +579,7 @@ ; GCN-LABEL: {{^}}commute_one_2.0_f64: ; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -592,7 +592,7 @@ ; GCN-LABEL: {{^}}commute_ord_2.0_f64: ; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -605,7 +605,7 @@ ; GCN-LABEL: {{^}}commute_ueq_2.0_f64: ; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -618,7 +618,7 @@ ; GCN-LABEL: {{^}}commute_ugt_2.0_f64: ; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -631,7 +631,7 @@ ; GCN-LABEL: {{^}}commute_uge_2.0_f64: ; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -644,7 +644,7 @@ ; GCN-LABEL: {{^}}commute_ult_2.0_f64: ; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -657,7 +657,7 @@ ; GCN-LABEL: {{^}}commute_ule_2.0_f64: ; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -670,7 +670,7 @@ ; GCN-LABEL: {{^}}commute_une_2.0_f64: ; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}} define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in @@ -683,7 +683,7 @@ ; GCN-LABEL: {{^}}commute_uno_2.0_f64: ; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]] define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load double, double addrspace(1)* %gep.in Index: test/CodeGen/AMDGPU/commute_modifiers.ll =================================================================== --- test/CodeGen/AMDGPU/commute_modifiers.ll +++ test/CodeGen/AMDGPU/commute_modifiers.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fabs.f32(float) #1 declare float @llvm.fma.f32(float, float, float) nounwind readnone @@ -9,7 +9,7 @@ ; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %x = load float, float addrspace(1)* %gep.0 %x.fabs = call float @llvm.fabs.f32(float %x) #1 @@ -23,7 +23,7 @@ ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %x = load float, float addrspace(1)* %gep.0 %x.fabs = call float @llvm.fabs.f32(float %x) #1 @@ -38,7 +38,7 @@ ; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]] ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %x = load float, float addrspace(1)* %gep.0 %x.fneg = fsub float -0.000000e+00, %x @@ -54,7 +54,7 @@ ; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]] ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %x = load float, float addrspace(1)* %gep.0 %x.fabs = call float @llvm.fabs.f32(float %x) #1 @@ -69,7 +69,7 @@ ; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %x = load float, float addrspace(1)* %gep.0 @@ -86,7 +86,7 @@ ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]] ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %x = load float, float addrspace(1)* %gep.0 @@ -103,7 +103,7 @@ ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %x = load float, float addrspace(1)* %gep.0 @@ -122,7 +122,7 @@ ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %x = load float, float addrspace(1)* %gep.0 @@ -140,7 +140,7 @@ ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]| ; SI-NEXT: buffer_store_dword [[REG]] define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %x = load float, float addrspace(1)* %gep.0 @@ -162,7 +162,7 @@ ; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], |[[R2]]| ; SI: buffer_store_dword [[RESULT]] define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid Index: test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll =================================================================== --- test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll +++ test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll @@ -12,7 +12,7 @@ ; GCN: s_endpgm define void @reschedule_global_load_lds_store(i32 addrspace(1)* noalias %gptr0, i32 addrspace(1)* noalias %gptr1, i32 addrspace(3)* noalias %lptr, i32 %c) #0 { entry: - %tid = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx = shl i32 %tid, 2 %gep0 = getelementptr i32, i32 addrspace(1)* %gptr0, i32 %idx %gep1 = getelementptr i32, i32 addrspace(1)* %gptr1, i32 %idx @@ -42,10 +42,7 @@ } ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.x() #1 - -; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tgid.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll =================================================================== --- test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -2,8 +2,8 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s -declare i32 @llvm.r600.read.tidig.x() #0 -declare void @llvm.AMDGPU.barrier.local() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare void @llvm.amdgcn.s.barrier() #1 ; Function Attrs: nounwind ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop: @@ -25,7 +25,7 @@ ; CHECK: s_endpgm define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 { entry: - %x.i = tail call i32 @llvm.r600.read.tidig.x() #0 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %mul = shl nsw i32 %x.i, 1 br label %for.body @@ -33,7 +33,7 @@ %sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ] %offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ] %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - tail call void @llvm.AMDGPU.barrier.local() #1 + tail call void @llvm.amdgcn.s.barrier() #1 %arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02 %tmp = load float, float addrspace(3)* %arrayidx, align 4 %add1 = add nsw i32 %offset.02, 1 Index: test/CodeGen/AMDGPU/ds-sub-offset.ll =================================================================== --- test/CodeGen/AMDGPU/ds-sub-offset.ll +++ test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -1,7 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -declare void @llvm.AMDGPU.barrier.local() #2 -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 @lds.obj = addrspace(3) global [256 x i32] undef, align 4 @@ -12,7 +11,7 @@ ; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12 define void @write_ds_sub0_offset0_global() #0 { entry: - %x.i = call i32 @llvm.r600.read.tidig.x() #1 + %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1 %sub1 = sub i32 0, %x.i %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1 %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3 @@ -26,7 +25,7 @@ ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 ; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535 define void @add_x_shl_neg_to_sub_max_offset() #1 { - %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 %neg = sub i32 0, %x.i %shl = shl i32 %neg, 2 %add = add i32 65535, %shl @@ -41,7 +40,7 @@ ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 ; GCN: ds_write_b8 [[NEG]], [[K]]{{$}} define void @add_x_shl_neg_to_sub_max_offset_p1() #1 { - %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 %neg = sub i32 0, %x.i %shl = shl i32 %neg, 2 %add = add i32 65536, %shl @@ -60,7 +59,7 @@ ; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}} ; GCN: s_endpgm define void @add_x_shl_neg_to_sub_multi_use() #1 { - %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 %neg = sub i32 0, %x.i %shl = shl i32 %neg, 2 %add0 = add i32 123, %shl @@ -82,7 +81,7 @@ ; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}} ; GCN: s_endpgm define void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 { - %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 %neg = sub i32 0, %x.i %shl = shl i32 %neg, 2 %add = add i32 123, %shl @@ -97,7 +96,7 @@ ; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] ; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255 define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { - %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 %neg = sub i32 0, %x.i %shl = shl i32 %neg, 2 %add = add i32 1019, %shl @@ -111,7 +110,7 @@ ; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]] ; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}} define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 { - %x.i = call i32 @llvm.r600.read.tidig.x() #0 + %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 %neg = sub i32 0, %x.i %shl = shl i32 %neg, 2 %add = add i32 1020, %shl Index: test/CodeGen/AMDGPU/ds_read2.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2.ll +++ test/CodeGen/AMDGPU/ds_read2.ll @@ -13,7 +13,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @simple_read2_f32(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 8 @@ -32,7 +32,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 255 @@ -50,7 +50,7 @@ ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 ; SI: s_endpgm define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 257 @@ -67,7 +67,7 @@ ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 ; SI: s_endpgm define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 0 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -99,7 +99,7 @@ ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 ; SI: s_endpgm define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 0 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -109,7 +109,7 @@ %val1 = load float, float addrspace(3)* %arrayidx1, align 4 %sum.0 = fadd float %val0, %val1 - call void @llvm.AMDGPU.barrier.local() #2 + call void @llvm.amdgcn.s.barrier() #2 %idx.2 = add nsw i32 %tid.x, 11 %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2 @@ -134,7 +134,7 @@ ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27 ; SI: s_endpgm define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -171,7 +171,7 @@ ; SI: ds_read_b32 ; SI: s_endpgm define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0 %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1 @@ -197,7 +197,7 @@ ; SI: ds_read_b32 ; SI: s_endpgm define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0 %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1 @@ -220,7 +220,7 @@ ; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}} ; SI: s_endpgm define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0 %ptr.1 = insertelement <2 x [512 x float] addrspace(3)*> %ptr.0, [512 x float] addrspace(3)* @lds, i32 1 %x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0 @@ -244,7 +244,7 @@ ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 ; SI: s_endpgm define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 8 @@ -262,7 +262,7 @@ ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32 ; SI: s_endpgm define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 8 @@ -281,7 +281,7 @@ ; SI-NOT: ds_read2_b32 ; SI: s_endpgm define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 1 %add.x = add nsw i32 %x.i, 8 @@ -297,7 +297,7 @@ ; SI-NOT: ds_read2_b32 ; SI: s_endpgm define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 2 %add.x = add nsw i32 %x.i, 8 @@ -316,7 +316,7 @@ ; SI: buffer_store_dwordx2 [[RESULT]] ; SI: s_endpgm define void @simple_read2_f64(double addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 8 %add.x = add nsw i32 %x.i, 8 @@ -332,7 +332,7 @@ ; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255 ; SI: s_endpgm define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 8 %add.x = add nsw i32 %x.i, 255 @@ -350,7 +350,7 @@ ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056 ; SI: s_endpgm define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 8 %add.x = add nsw i32 %x.i, 257 @@ -368,7 +368,7 @@ ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15 ; SI: s_endpgm define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 7 @@ -438,8 +438,8 @@ @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4 define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 { - %x.i = tail call i32 @llvm.r600.read.tgid.x() #1 - %y.i = tail call i32 @llvm.r600.read.tidig.y() #1 + %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1 + %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1 %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i %tmp16 = load float, float addrspace(3)* %arrayidx44, align 4 %add47 = add nsw i32 %x.i, 1 @@ -494,19 +494,19 @@ } ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tgid.x() #1 +declare i32 @llvm.amdgcn.workgroup.id.x() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tgid.y() #1 +declare i32 @llvm.amdgcn.workgroup.id.y() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.y() #1 +declare i32 @llvm.amdgcn.workitem.id.y() #1 ; Function Attrs: convergent nounwind -declare void @llvm.AMDGPU.barrier.local() #2 +declare void @llvm.amdgcn.s.barrier() #2 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/ds_read2_superreg.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2_superreg.ll +++ test/CodeGen/AMDGPU/ds_read2_superreg.ll @@ -13,7 +13,7 @@ ; CI: buffer_store_dwordx2 [[RESULT]] ; CI: s_endpgm define void @simple_read2_v2f32_superreg_align4(<2 x float> addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x <2 x float>], [512 x <2 x float>] addrspace(3)* @lds.v2, i32 0, i32 %x.i %val0 = load <2 x float>, <2 x float> addrspace(3)* %arrayidx0, align 4 %out.gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %out, i32 %x.i @@ -27,7 +27,7 @@ ; CI: buffer_store_dwordx2 [[RESULT]] ; CI: s_endpgm define void @simple_read2_v2f32_superreg(<2 x float> addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x <2 x float>], [512 x <2 x float>] addrspace(3)* @lds.v2, i32 0, i32 %x.i %val0 = load <2 x float>, <2 x float> addrspace(3)* %arrayidx0 %out.gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %out, i32 %x.i @@ -44,7 +44,7 @@ ; CI: buffer_store_dword v[[ADD2]] ; CI: s_endpgm define void @simple_read2_v4f32_superreg_align4(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x <4 x float>], [512 x <4 x float>] addrspace(3)* @lds.v4, i32 0, i32 %x.i %val0 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx0, align 4 %elt0 = extractelement <4 x float> %val0, i32 0 @@ -69,7 +69,7 @@ ; CI: buffer_store_dword v[[ADD1]] ; CI: s_endpgm define void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x <3 x float>], [512 x <3 x float>] addrspace(3)* @lds.v3, i32 0, i32 %x.i %val0 = load <3 x float>, <3 x float> addrspace(3)* %arrayidx0, align 4 %elt0 = extractelement <3 x float> %val0, i32 0 @@ -95,7 +95,7 @@ ; CI: buffer_store_dwordx4 ; CI: s_endpgm define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x <4 x float>], [512 x <4 x float>] addrspace(3)* @lds.v4, i32 0, i32 %x.i %val0 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx0, align 8 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i32 %x.i @@ -110,7 +110,7 @@ ; CI: buffer_store_dwordx4 ; CI: s_endpgm define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x <4 x float>], [512 x <4 x float>] addrspace(3)* @lds.v4, i32 0, i32 %x.i %val0 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx0 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i32 %x.i @@ -130,7 +130,7 @@ ; CI: buffer_store_dwordx4 ; CI: s_endpgm define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x <8 x float>], [512 x <8 x float>] addrspace(3)* @lds.v8, i32 0, i32 %x.i %val0 = load <8 x float>, <8 x float> addrspace(3)* %arrayidx0 %out.gep = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %out, i32 %x.i @@ -158,7 +158,7 @@ ; CI: buffer_store_dwordx4 ; CI: s_endpgm define void @simple_read2_v16f32_superreg(<16 x float> addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x <16 x float>], [512 x <16 x float>] addrspace(3)* @lds.v16, i32 0, i32 %x.i %val0 = load <16 x float>, <16 x float> addrspace(3)* %arrayidx0 %out.gep = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %out, i32 %x.i @@ -173,7 +173,7 @@ ; CI: buffer_store_dwordx2 v{{\[}}[[REG_ELT0]]:[[REG_ELT1]]{{\]}} ; CI: s_endpgm define void @simple_read2_v2f32_superreg_scalar_loads_align4(<2 x float> addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %arrayidx0, i32 1 @@ -196,7 +196,7 @@ ; CI: buffer_store_dwordx4 v{{\[}}[[REG_ELT0]]:[[REG_ELT3]]{{\]}} ; CI: s_endpgm define void @simple_read2_v4f32_superreg_scalar_loads_align4(<4 x float> addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %arrayidx0, i32 1 %arrayidx2 = getelementptr inbounds float, float addrspace(3)* %arrayidx0, i32 2 @@ -224,13 +224,10 @@ declare i32 @llvm.r600.read.tgid.y() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.y() #1 - -; Function Attrs: convergent nounwind -declare void @llvm.AMDGPU.barrier.local() #2 +declare i32 @llvm.amdgcn.workitem.id.y() #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/ds_read2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_read2st64.ll +++ test/CodeGen/AMDGPU/ds_read2st64.ll @@ -11,7 +11,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 64 @@ -30,7 +30,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -50,7 +50,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -70,7 +70,7 @@ ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -87,7 +87,7 @@ ; SI-NOT: ds_read2st64_b32 ; SI: s_endpgm define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i %val0 = load float, float addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 63 @@ -103,7 +103,7 @@ ; SI-NOT: ds_read2st64_b32 ; SI: s_endpgm define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -123,7 +123,7 @@ ; SI: buffer_store_dwordx2 [[RESULT]] ; SI: s_endpgm define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 8 %add.x = add nsw i32 %x.i, 64 @@ -142,7 +142,7 @@ ; SI: buffer_store_dwordx2 [[RESULT]] ; SI: s_endpgm define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0 %val0 = load double, double addrspace(3)* %arrayidx0, align 8 @@ -162,7 +162,7 @@ ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129 ; SI: s_endpgm define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 4 %add.x = add nsw i32 %x.i, 64 @@ -182,7 +182,7 @@ ; SI: buffer_store_dwordx2 [[RESULT]] ; SI: s_endpgm define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 256 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0 %val0 = load double, double addrspace(3)* %arrayidx0, align 8 @@ -202,7 +202,7 @@ ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0 %val0 = load double, double addrspace(3)* %arrayidx0, align 8 @@ -219,7 +219,7 @@ ; SI-NOT: ds_read2st64_b64 ; SI: s_endpgm define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %add.x.0 = add nsw i32 %x.i, 64 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0 %val0 = load double, double addrspace(3)* %arrayidx0, align 8 @@ -240,7 +240,7 @@ ; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8 ; SI: s_endpgm define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i %val0 = load double, double addrspace(3)* %arrayidx0, align 8 %add.x = add nsw i32 %x.i, 8 @@ -253,16 +253,10 @@ } ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tgid.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tgid.y() #1 - -; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.x() #1 - -; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.y() #1 +declare i32 @llvm.amdgcn.workitem.id.y() #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/ds_write2.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2.ll +++ test/CodeGen/AMDGPU/ds_write2.ll @@ -10,7 +10,7 @@ ; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8 ; SI: s_endpgm define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i %val = load float, float addrspace(1)* %in.gep, align 4 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -28,7 +28,7 @@ ; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 %val0 = load float, float addrspace(1)* %in.gep.0, align 4 @@ -47,7 +47,7 @@ ; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32 ; SI: s_endpgm define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i %val0 = load float, float addrspace(1)* %in0.gep, align 4 @@ -66,7 +66,7 @@ ; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32 ; SI: s_endpgm define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i %val0 = load float, float addrspace(1)* %in0.gep, align 4 @@ -87,7 +87,7 @@ ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1 %val0 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8 @@ -108,7 +108,7 @@ ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i %val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8 %val0 = extractelement <2 x float> %val, i32 0 @@ -127,7 +127,7 @@ ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i %val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16 %val0 = extractelement <4 x float> %val, i32 0 @@ -147,7 +147,7 @@ ; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 ; SI: s_endpgm define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 %val0 = load float, float addrspace(1)* %in.gep.0, align 4 @@ -165,7 +165,7 @@ ; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028 ; SI: s_endpgm define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i %val0 = load float, float addrspace(1)* %in0.gep, align 4 @@ -183,7 +183,7 @@ ; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27 ; SI: s_endpgm define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x %val0 = load float, float addrspace(1)* %in0.gep, align 4 @@ -213,7 +213,7 @@ ; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27 ; SI: s_endpgm define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x %val0 = load float, float addrspace(1)* %in0.gep, align 4 @@ -244,7 +244,7 @@ ; SI: ds_write_b32 ; SI: s_endpgm define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i %val0 = load float, float addrspace(1)* %in0.gep, align 4 @@ -271,7 +271,7 @@ ; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8 ; SI: s_endpgm define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i %val = load double, double addrspace(1)* %in.gep, align 8 %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i @@ -289,7 +289,7 @@ ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 ; SI: s_endpgm define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i %val = load double, double addrspace(1)* %in.gep, align 8 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i @@ -307,7 +307,7 @@ ; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8 ; SI: s_endpgm define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1 %val0 = load double, double addrspace(1)* %in.gep.0, align 8 @@ -372,8 +372,8 @@ @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4 define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tgid.x() #1 - %y.i = tail call i32 @llvm.r600.read.tidig.y() #1 + %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1 + %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1 %val = load float, float addrspace(1)* %in %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i store float %val, float addrspace(3)* %arrayidx44, align 4 @@ -411,7 +411,7 @@ ; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:1{{$}} ; CI: s_endpgm define void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in %val0 = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 4 %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(3)* %out, i32 %x.i @@ -420,19 +420,16 @@ } ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tgid.x() #1 +declare i32 @llvm.amdgcn.workgroup.id.x() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tgid.y() #1 +declare i32 @llvm.amdgcn.workgroup.id.y() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.y() #1 - -; Function Attrs: convergent nounwind -declare void @llvm.AMDGPU.barrier.local() #2 +declare i32 @llvm.amdgcn.workitem.id.y() #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/ds_write2st64.ll =================================================================== --- test/CodeGen/AMDGPU/ds_write2st64.ll +++ test/CodeGen/AMDGPU/ds_write2st64.ll @@ -8,7 +8,7 @@ ; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1 ; SI: s_endpgm define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i %val = load float, float addrspace(1)* %in.gep, align 4 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i @@ -26,7 +26,7 @@ ; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 ; SI: s_endpgm define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 %val0 = load float, float addrspace(1)* %in.gep.0, align 4 @@ -47,7 +47,7 @@ ; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255 ; SI: s_endpgm define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 %val0 = load float, float addrspace(1)* %in.gep.0, align 4 @@ -67,7 +67,7 @@ ; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 ; SI: s_endpgm define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1 %val0 = load double, double addrspace(1)* %in.gep.0, align 8 @@ -86,7 +86,7 @@ ; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8 ; SI: s_endpgm define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 { - %x.i = tail call i32 @llvm.r600.read.tidig.x() #1 + %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i %val = load double, double addrspace(1)* %in.gep, align 8 %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i @@ -98,19 +98,10 @@ } ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tgid.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tgid.y() #1 - -; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.x() #1 - -; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.y() #1 - -; Function Attrs: convergent nounwind -declare void @llvm.AMDGPU.barrier.local() #2 +declare i32 @llvm.amdgcn.workitem.id.y() #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/fabs.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.f64.ll +++ test/CodeGen/AMDGPU/fabs.f64.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare double @fabs(double) readnone declare double @llvm.fabs.f64(double) readnone @@ -11,7 +11,7 @@ ; SI: v_and_b32 ; SI: s_endpgm define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %tidext = sext i32 %tid to i64 %gep = getelementptr double, double addrspace(1)* %in, i64 %tidext %val = load double, double addrspace(1)* %gep, align 8 Index: test/CodeGen/AMDGPU/flat-address-space.ll =================================================================== --- test/CodeGen/AMDGPU/flat-address-space.ll +++ test/CodeGen/AMDGPU/flat-address-space.ll @@ -127,9 +127,6 @@ ret void } -declare void @llvm.AMDGPU.barrier.local() #1 -declare i32 @llvm.r600.read.tidig.x() #3 - attributes #0 = { nounwind } attributes #1 = { nounwind convergent } attributes #3 = { nounwind readnone } Index: test/CodeGen/AMDGPU/fma-combine.ll =================================================================== --- test/CodeGen/AMDGPU/fma-combine.ll +++ test/CodeGen/AMDGPU/fma-combine.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 declare double @llvm.fabs.f64(double) #0 declare double @llvm.fma.f64(double, double, double) #0 declare float @llvm.fma.f32(float, float, float) #0 @@ -14,7 +14,7 @@ ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -42,7 +42,7 @@ ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -71,7 +71,7 @@ ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -95,7 +95,7 @@ ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -123,7 +123,7 @@ ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -152,7 +152,7 @@ ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -180,7 +180,7 @@ ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -209,7 +209,7 @@ ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -238,7 +238,7 @@ ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -272,7 +272,7 @@ ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI: s_endpgm define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -307,7 +307,7 @@ ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 @@ -342,7 +342,7 @@ ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 Index: test/CodeGen/AMDGPU/fmax_legacy.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmax_legacy.f64.ll +++ test/CodeGen/AMDGPU/fmax_legacy.f64.ll @@ -1,11 +1,11 @@ ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; Make sure we don't try to form FMAX_LEGACY nodes with f64 -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; FUNC-LABEL: @test_fmax_legacy_uge_f64 define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -20,7 +20,7 @@ ; FUNC-LABEL: @test_fmax_legacy_oge_f64 define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -35,7 +35,7 @@ ; FUNC-LABEL: @test_fmax_legacy_ugt_f64 define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -50,7 +50,7 @@ ; FUNC-LABEL: @test_fmax_legacy_ogt_f64 define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 Index: test/CodeGen/AMDGPU/fmin_legacy.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmin_legacy.f64.ll +++ test/CodeGen/AMDGPU/fmin_legacy.f64.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; FUNC-LABEL: @test_fmin_legacy_f64 define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double> inreg %reg0) #0 { @@ -15,7 +15,7 @@ ; FUNC-LABEL: @test_fmin_legacy_ule_f64 define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -30,7 +30,7 @@ ; FUNC-LABEL: @test_fmin_legacy_ole_f64 define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -45,7 +45,7 @@ ; FUNC-LABEL: @test_fmin_legacy_olt_f64 define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -60,7 +60,7 @@ ; FUNC-LABEL: @test_fmin_legacy_ult_f64 define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 Index: test/CodeGen/AMDGPU/fmuladd.ll =================================================================== --- test/CodeGen/AMDGPU/fmuladd.ll +++ test/CodeGen/AMDGPU/fmuladd.ll @@ -1,8 +1,8 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s declare float @llvm.fmuladd.f32(float, float, float) declare double @llvm.fmuladd.f64(double, double, double) -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone ; CHECK-LABEL: {{^}}fmuladd_f32: @@ -37,7 +37,7 @@ ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] ; CHECK: buffer_store_dword [[R2]] define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -56,7 +56,7 @@ ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] ; CHECK: buffer_store_dword [[R2]] define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -77,7 +77,7 @@ define void @fadd_a_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -99,7 +99,7 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -119,7 +119,7 @@ ; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]] ; CHECK: buffer_store_dword [[R2]] define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -139,7 +139,7 @@ ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] ; CHECK: buffer_store_dword [[R2]] define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -161,7 +161,7 @@ ; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]] ; CHECK: buffer_store_dword [[R2]] define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -183,7 +183,7 @@ ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]] ; CHECK: buffer_store_dword [[RESULT]] define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid Index: test/CodeGen/AMDGPU/fp-classify.ll =================================================================== --- test/CodeGen/AMDGPU/fp-classify.ll +++ test/CodeGen/AMDGPU/fp-classify.ll @@ -1,9 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare i1 @llvm.AMDGPU.class.f32(float, i32) #1 -declare i1 @llvm.AMDGPU.class.f64(double, i32) #1 -declare i32 @llvm.r600.read.tidig.x() #1 declare float @llvm.fabs.f32(float) #1 declare double @llvm.fabs.f64(double) #1 Index: test/CodeGen/AMDGPU/fp_to_sint.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fp_to_sint.f64.ll +++ test/CodeGen/AMDGPU/fp_to_sint.f64.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: @fp_to_sint_f64_i32 ; SI: v_cvt_i32_f64_e32 @@ -47,7 +47,7 @@ ; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]] ; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr double, double addrspace(1)* %in, i32 %tid %val = load double, double addrspace(1)* %gep, align 8 %cast = fptosi double %val to i64 Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s ; Tests for indirect addressing on SI, which is implemented using dynamic ; indexing of vectors. @@ -87,7 +87,7 @@ ; CHECK: s_cbranch_execnz define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) { entry: - %id = call i32 @llvm.r600.read.tidig.x() #1 + %id = call i32 @llvm.amdgcn.workitem.id.x() #1 %index = add i32 %id, -512 %value = extractelement <4 x i32> , i32 %index store i32 %value, i32 addrspace(1)* %out @@ -152,7 +152,7 @@ ; CHECK: s_cbranch_execnz define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { entry: - %id = call i32 @llvm.r600.read.tidig.x() #1 + %id = call i32 @llvm.amdgcn.workitem.id.x() #1 %index = add i32 %id, -512 %value = insertelement <4 x i32> , i32 5, i32 %index store <4 x i32> %value, <4 x i32> addrspace(1)* %out @@ -167,12 +167,13 @@ ; CHECK: s_cbranch_execnz define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) { entry: - %id = call i32 @llvm.r600.read.tidig.x() #1 + %id = call i32 @llvm.amdgcn.workitem.id.x() #1 %index = add i32 %id, -16 %value = insertelement <4 x i32> , i32 5, i32 %index store <4 x i32> %value, <4 x i32> addrspace(1)* %out ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 + attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/indirect-private-64.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-private-64.ll +++ test/CodeGen/AMDGPU/indirect-private-64.ll @@ -3,8 +3,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s - -declare void @llvm.AMDGPU.barrier.local() convergent nounwind +declare void @llvm.amdgcn.s.barrier() #1 ; SI-LABEL: {{^}}private_access_f64_alloca: @@ -13,12 +12,12 @@ ; SI-PROMOTE: ds_write_b64 ; SI-PROMOTE: ds_read_b64 -define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind { +define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) #1 { %val = load double, double addrspace(1)* %in, align 8 %array = alloca double, i32 16, align 8 %ptr = getelementptr inbounds double, double* %array, i32 %b store double %val, double* %ptr, align 8 - call void @llvm.AMDGPU.barrier.local() convergent nounwind + call void @llvm.amdgcn.s.barrier() %result = load double, double* %ptr, align 8 store double %result, double addrspace(1)* %out, align 8 ret void @@ -33,12 +32,12 @@ ; SI-PROMOTE: ds_write_b64 ; SI-PROMOTE: ds_read_b64 ; SI-PROMOTE: ds_read_b64 -define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind { +define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 { %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16 %array = alloca <2 x double>, i32 16, align 16 %ptr = getelementptr inbounds <2 x double>, <2 x double>* %array, i32 %b store <2 x double> %val, <2 x double>* %ptr, align 16 - call void @llvm.AMDGPU.barrier.local() convergent nounwind + call void @llvm.amdgcn.s.barrier() %result = load <2 x double>, <2 x double>* %ptr, align 16 store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16 ret void @@ -51,12 +50,12 @@ ; SI-PROMOTE: ds_write_b64 ; SI-PROMOTE: ds_read_b64 -define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind { +define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) #1 { %val = load i64, i64 addrspace(1)* %in, align 8 %array = alloca i64, i32 16, align 8 %ptr = getelementptr inbounds i64, i64* %array, i32 %b store i64 %val, i64* %ptr, align 8 - call void @llvm.AMDGPU.barrier.local() convergent nounwind + call void @llvm.amdgcn.s.barrier() %result = load i64, i64* %ptr, align 8 store i64 %result, i64 addrspace(1)* %out, align 8 ret void @@ -71,13 +70,16 @@ ; SI-PROMOTE: ds_write_b64 ; SI-PROMOTE: ds_read_b64 ; SI-PROMOTE: ds_read_b64 -define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind { +define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 { %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16 %array = alloca <2 x i64>, i32 16, align 16 %ptr = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i32 %b store <2 x i64> %val, <2 x i64>* %ptr, align 16 - call void @llvm.AMDGPU.barrier.local() convergent nounwind + call void @llvm.amdgcn.s.barrier() %result = load <2 x i64>, <2 x i64>* %ptr, align 16 store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16 ret void } + +attributes #0 = { nounwind } +attributes #1 = { nounwind convergent } Index: test/CodeGen/AMDGPU/llvm.amdgcn.class.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.class.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.class.ll @@ -2,7 +2,7 @@ declare i1 @llvm.amdgcn.class.f32(float, i32) #1 declare i1 @llvm.amdgcn.class.f64(double, i32) #1 -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fabs.f32(float) #1 declare double @llvm.fabs.f64(double) #1 @@ -133,7 +133,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in @@ -151,7 +151,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %b = load i32, i32 addrspace(1)* %gep.in @@ -171,7 +171,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %b = load i32, i32 addrspace(1)* %gep.in @@ -291,7 +291,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load double, double addrspace(1)* %in @@ -307,7 +307,7 @@ ; SI: v_cmp_class_f64_e32 vcc, ; SI: s_endpgm define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %b = load i32, i32 addrspace(1)* %gep.in @@ -322,7 +322,7 @@ ; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} ; SI: s_endpgm define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %b = load i32, i32 addrspace(1)* %gep.in @@ -339,7 +339,7 @@ ; SI-NOT: v_cmp_class ; SI: s_endpgm define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in @@ -359,7 +359,7 @@ ; SI-NOT: v_cmp_class ; SI: s_endpgm define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in @@ -382,7 +382,7 @@ ; SI-NOT: v_cmp_class ; SI: s_endpgm define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in @@ -417,7 +417,7 @@ ; SI-NOT: v_cmp_class ; SI: s_endpgm define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in @@ -437,7 +437,7 @@ ; SI-NOT: v_cmp_class ; SI: s_endpgm define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in @@ -457,7 +457,7 @@ ; SI: s_or_b64 ; SI: s_endpgm define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.in Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -3,7 +3,7 @@ ; FIXME: Enable for VI. -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) nounwind readnone declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind readnone @@ -115,7 +115,7 @@ ; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]] ; SI: s_endpgm define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1 %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2 @@ -152,7 +152,7 @@ ; SI: s_endpgm define void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind { entry: - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.out = getelementptr float, float addrspace(1)* %out, i32 2 %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1 Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) nounwind readnone declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone @@ -12,7 +12,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -32,7 +32,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -52,7 +52,7 @@ ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -72,7 +72,7 @@ ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 @@ -92,7 +92,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr float, float addrspace(1)* %in, i32 %tid %b = load float, float addrspace(1)* %gep, align 4 @@ -110,7 +110,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr float, float addrspace(1)* %in, i32 %tid %b = load float, float addrspace(1)* %gep, align 4 @@ -128,7 +128,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr float, float addrspace(1)* %in, i32 %tid %a = load float, float addrspace(1)* %gep, align 4 @@ -146,7 +146,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr float, float addrspace(1)* %in, i32 %tid %a = load float, float addrspace(1)* %gep, align 4 @@ -164,7 +164,7 @@ ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr double, double addrspace(1)* %in, i32 %tid %b = load double, double addrspace(1)* %gep, align 8 @@ -182,7 +182,7 @@ ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr double, double addrspace(1)* %in, i32 %tid %b = load double, double addrspace(1)* %gep, align 8 @@ -200,7 +200,7 @@ ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr double, double addrspace(1)* %in, i32 %tid %a = load double, double addrspace(1)* %gep, align 8 @@ -218,7 +218,7 @@ ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr double, double addrspace(1)* %in, i32 %tid %a = load double, double addrspace(1)* %gep, align 8 @@ -293,7 +293,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %a = load float, float addrspace(1)* %gep.0, align 4 @@ -309,7 +309,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %a = load float, float addrspace(1)* %gep.0, align 4 @@ -326,7 +326,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -348,7 +348,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -6,7 +6,7 @@ ; GCN: s_barrier define void @test_barrier(i32 addrspace(1)* %out) #0 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp store i32 %tmp, i32 addrspace(1)* %tmp1 call void @llvm.amdgcn.s.barrier() @@ -20,7 +20,7 @@ } declare void @llvm.amdgcn.s.barrier() #1 -declare i32 @llvm.r600.read.tidig.x() #2 +declare i32 @llvm.amdgcn.workitem.id.x() #2 declare i32 @llvm.r600.read.local.size.x() #2 attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.round.f64.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.round.f64.ll +++ test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -27,7 +27,7 @@ ; SI: buffer_store_dwordx2 ; SI: s_endpgm define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep = getelementptr double, double addrspace(1)* %in, i32 %tid %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid %x = load double, double addrspace(1)* %gep @@ -60,7 +60,7 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 declare double @llvm.round.f64(double) #1 declare <2 x double> @llvm.round.v2f64(<2 x double>) #1 Index: test/CodeGen/AMDGPU/mad-combine.ll =================================================================== --- test/CodeGen/AMDGPU/mad-combine.ll +++ test/CodeGen/AMDGPU/mad-combine.ll @@ -8,7 +8,7 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 declare float @llvm.fabs.f32(float) #0 declare float @llvm.fma.f32(float, float, float) #0 declare float @llvm.fmuladd.f32(float, float, float) #0 @@ -32,7 +32,7 @@ ; SI-DENORM: buffer_store_dword [[RESULT]] ; SI-STD: buffer_store_dword [[C]] define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -71,7 +71,7 @@ ; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -108,7 +108,7 @@ ; SI-DENORM: buffer_store_dword [[RESULT]] ; SI-STD: buffer_store_dword [[C]] define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -138,7 +138,7 @@ ; SI: buffer_store_dword [[RESULT]] define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -175,7 +175,7 @@ ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -210,7 +210,7 @@ ; SI: buffer_store_dword [[RESULT]] define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -246,7 +246,7 @@ ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -282,7 +282,7 @@ ; SI: buffer_store_dword [[RESULT]] define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -320,7 +320,7 @@ ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -363,7 +363,7 @@ ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} ; SI: s_endpgm define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -408,7 +408,7 @@ ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -454,7 +454,7 @@ ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: s_endpgm define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -500,7 +500,7 @@ ; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: s_endpgm define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 @@ -546,7 +546,7 @@ ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: s_endpgm define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2 Index: test/CodeGen/AMDGPU/mad-sub.ll =================================================================== --- test/CodeGen/AMDGPU/mad-sub.ll +++ test/CodeGen/AMDGPU/mad-sub.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 declare float @llvm.fabs.f32(float) #0 ; FUNC-LABEL: {{^}}mad_sub_f32: @@ -10,7 +10,7 @@ ; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]] ; SI: buffer_store_dword [[RESULT]] define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext %add1 = add i64 %tid.ext, 1 @@ -34,7 +34,7 @@ ; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]] ; SI: buffer_store_dword [[RESULT]] define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext %add1 = add i64 %tid.ext, 1 @@ -55,7 +55,7 @@ ; SI: v_mul_f64 ; SI: v_add_f64 define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 %gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext %add1 = add i64 %tid.ext, 1 @@ -79,7 +79,7 @@ ; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]| ; SI: buffer_store_dword [[RESULT]] define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext %add1 = add i64 %tid.ext, 1 @@ -104,7 +104,7 @@ ; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]| ; SI: buffer_store_dword [[RESULT]] define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext %add1 = add i64 %tid.ext, 1 @@ -125,7 +125,7 @@ ; FUNC-LABEL: {{^}}neg_neg_mad_f32: ; SI: v_mac_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext %add1 = add i64 %tid.ext, 1 @@ -151,7 +151,7 @@ ; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]] ; SI: buffer_store_dword [[RESULT]] define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 { - %tid = tail call i32 @llvm.r600.read.tidig.x() #0 + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tid.ext = sext i32 %tid to i64 %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext %add1 = add i64 %tid.ext, 1 @@ -175,7 +175,7 @@ ; SI: v_mac_f32_e32 [[R2]], -2.0, [[R1]] ; SI: buffer_store_dword [[R2]] define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -196,7 +196,7 @@ ; SI: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]] ; SI: buffer_store_dword [[RESULT]] define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid Index: test/CodeGen/AMDGPU/madak.ll =================================================================== --- test/CodeGen/AMDGPU/madak.ll +++ test/CodeGen/AMDGPU/madak.ll @@ -3,7 +3,7 @@ ; FIXME: Enable VI -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone ; GCN-LABEL: {{^}}madak_f32: @@ -11,7 +11,7 @@ ; GCN: buffer_load_dword [[VB:v[0-9]+]] ; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -38,7 +38,7 @@ ; GCN-DAG: v_mac_f32_e32 [[VK]], [[VC]], [[VA]] ; GCN: s_endpgm define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 @@ -65,7 +65,7 @@ ; GCN: buffer_load_dword [[VA:v[0-9]+]] ; GCN: v_madak_f32_e32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000 define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -85,7 +85,7 @@ ; GCN: buffer_load_dword [[VB:v[0-9]+]] ; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0 define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -107,7 +107,7 @@ ; GCN-NOT: v_madak_f32 ; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]] define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -126,7 +126,7 @@ ; GCN-NOT: v_madak_f32 ; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]] define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -154,7 +154,7 @@ ; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} ; GCN: s_endpgm define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -176,7 +176,7 @@ ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}} ; GCN: s_endpgm define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid Index: test/CodeGen/AMDGPU/madmk.ll =================================================================== --- test/CodeGen/AMDGPU/madmk.ll +++ test/CodeGen/AMDGPU/madmk.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.fabs.f32(float) nounwind readnone ; GCN-LABEL: {{^}}madmk_f32: @@ -9,7 +9,7 @@ ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000 define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -32,7 +32,7 @@ ; GCN-DAG: v_mac_f32_e32 [[VC]], [[VK]], [[VA]] ; GCN: s_endpgm define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1 @@ -61,7 +61,7 @@ ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]] define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -80,7 +80,7 @@ ; GCN: v_mac_f32_e32 ; GCN: s_endpgm define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %mul = fmul float %a, 10.0 @@ -94,7 +94,7 @@ ; GCN: v_mad_f32 ; GCN: s_endpgm define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep.0, align 4 @@ -110,7 +110,7 @@ ; GCN: v_mac_f32_e32 ; GCN: s_endpgm define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %b = load float, float addrspace(1)* %gep.0, align 4 @@ -126,7 +126,7 @@ ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -147,7 +147,7 @@ ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}| define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -168,7 +168,7 @@ ; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 ; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0 define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid Index: test/CodeGen/AMDGPU/max.ll =================================================================== --- test/CodeGen/AMDGPU/max.ll +++ test/CodeGen/AMDGPU/max.ll @@ -1,11 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: {{^}}v_test_imax_sge_i32: ; SI: v_max_i32_e32 define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -23,7 +23,7 @@ ; SI: v_max_i32_e32 ; SI: v_max_i32_e32 define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep0 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid %outgep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %out, i32 %tid @@ -58,7 +58,7 @@ ; SI: buffer_load_sbyte ; SI: v_max_i32_e32 define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid @@ -91,7 +91,7 @@ ; FUNC-LABEL: @v_test_imax_sgt_i32 ; SI: v_max_i32_e32 define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -115,7 +115,7 @@ ; FUNC-LABEL: @v_test_umax_uge_i32 ; SI: v_max_u32_e32 define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -154,7 +154,7 @@ ; SI: buffer_load_ubyte ; SI: v_max_u32_e32 define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid @@ -169,7 +169,7 @@ ; FUNC-LABEL: @v_test_umax_ugt_i32 ; SI: v_max_u32_e32 define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid Index: test/CodeGen/AMDGPU/merge-stores.ll =================================================================== --- test/CodeGen/AMDGPU/merge-stores.ll +++ test/CodeGen/AMDGPU/merge-stores.ll @@ -376,7 +376,7 @@ %w = load i32, i32 addrspace(1)* %in.gep.3 ; Make sure the barrier doesn't stop this - tail call void @llvm.AMDGPU.barrier.local() #1 + tail call void @llvm.amdgcn.s.barrier() #1 store i32 %w, i32 addrspace(1)* %out.gep.3 store i32 %z, i32 addrspace(1)* %out.gep.2 @@ -413,7 +413,7 @@ %w = load i32, i32 addrspace(1)* %in.gep.3 ; Make sure the barrier doesn't stop this - tail call void @llvm.AMDGPU.barrier.local() #1 + tail call void @llvm.amdgcn.s.barrier() #1 store i32 %w, i32 addrspace(1)* %out store i32 %z, i32 addrspace(1)* %out.gep.1 @@ -705,7 +705,7 @@ ret void } -declare void @llvm.AMDGPU.barrier.local() #1 +declare void @llvm.amdgcn.s.barrier() #1 attributes #0 = { nounwind } attributes #1 = { convergent nounwind } Index: test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll =================================================================== --- test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll +++ test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll @@ -7,12 +7,12 @@ ; Check that moving the pointer out of the resource descriptor to ; vaddr works for atomics. -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-LABEL: {{^}}atomic_max_i32: ; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}} define void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep %xor = xor i32 %tid, 1 @@ -32,7 +32,7 @@ ; GCN-LABEL: {{^}}atomic_max_i32_noret: ; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}} define void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep %xor = xor i32 %tid, 1 Index: test/CodeGen/AMDGPU/mubuf.ll =================================================================== --- test/CodeGen/AMDGPU/mubuf.ll +++ test/CodeGen/AMDGPU/mubuf.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s -declare i32 @llvm.r600.read.tidig.x() readnone +declare i32 @llvm.amdgcn.workitem.id.x() readnone ;;;==========================================================================;;; ;;; MUBUF LOAD TESTS @@ -170,7 +170,7 @@ ; CHECK-LABEL: {{^}}store_vgpr_ptr: ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid store i32 99, i32 addrspace(1)* %out.gep, align 4 ret void Index: test/CodeGen/AMDGPU/no-shrink-extloads.ll =================================================================== --- test/CodeGen/AMDGPU/no-shrink-extloads.ll +++ test/CodeGen/AMDGPU/no-shrink-extloads.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; Make sure we don't turn the 32-bit argument load into a 16-bit ; load. There aren't extending scalar lods, so that would require @@ -22,7 +22,7 @@ ; SI: buffer_load_dword v ; SI: buffer_store_short v define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid %load = load i32, i32 addrspace(1)* %gep.in @@ -44,7 +44,7 @@ ; SI: buffer_load_dword v ; SI: buffer_store_byte v define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid %load = load i32, i32 addrspace(1)* %gep.in @@ -66,7 +66,7 @@ ; SI: buffer_load_dword v ; SI: buffer_store_byte v define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid %load = load i32, i32 addrspace(1)* %gep.in @@ -88,7 +88,7 @@ ; SI: buffer_load_dword v ; SI: buffer_store_dword v define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %load = load i64, i64 addrspace(1)* %gep.in @@ -111,7 +111,7 @@ ; SI: buffer_load_dword v ; SI: buffer_store_dword v define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %load = load i64, i64 addrspace(1)* %gep.in @@ -135,7 +135,7 @@ ; SI: buffer_load_ubyte v ; SI: buffer_store_byte v define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid %load = load i16, i16 addrspace(1)* %gep.in @@ -158,7 +158,7 @@ ; SI: buffer_load_dword v ; SI: buffer_store_byte v define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid %load = load i64, i64 addrspace(1)* %gep.in @@ -181,7 +181,7 @@ ; SI: buffer_load_dword v ; SI: buffer_store_byte v define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid %load = load i64, i64 addrspace(1)* %gep.in Index: test/CodeGen/AMDGPU/operand-folding.ll =================================================================== --- test/CodeGen/AMDGPU/operand-folding.ll +++ test/CodeGen/AMDGPU/operand-folding.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}fold_sgpr: ; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s @@ -8,7 +8,7 @@ br i1 %tmp0, label %if, label %endif if: - %id = call i32 @llvm.r600.read.tidig.x() + %id = call i32 @llvm.amdgcn.workitem.id.x() %offset = add i32 %fold, %id %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset store i32 0, i32 addrspace(1)* %tmp1 @@ -27,7 +27,7 @@ br i1 %tmp0, label %if, label %endif if: - %id = call i32 @llvm.r600.read.tidig.x() + %id = call i32 @llvm.amdgcn.workitem.id.x() %val = or i32 %id, %fold store i32 %val, i32 addrspace(1)* %out br label %endif @@ -63,7 +63,7 @@ define void @vector_inline(<4 x i32> addrspace(1)* %out) { entry: - %tmp0 = call i32 @llvm.r600.read.tidig.x() + %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp0, 1 %tmp2 = add i32 %tmp0, 2 %tmp3 = add i32 %tmp0, 3 @@ -82,7 +82,7 @@ define void @imm_one_use(i32 addrspace(1)* %out) { entry: - %tmp0 = call i32 @llvm.r600.read.tidig.x() + %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = xor i32 %tmp0, 100 store i32 %tmp1, i32 addrspace(1)* %out ret void @@ -96,7 +96,7 @@ define void @vector_imm(<4 x i32> addrspace(1)* %out) { entry: - %tmp0 = call i32 @llvm.r600.read.tidig.x() + %tmp0 = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp0, 1 %tmp2 = add i32 %tmp0, 2 %tmp3 = add i32 %tmp0, 3 @@ -109,5 +109,6 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #0 -attributes #0 = { readnone } +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +attributes #0 = { nounwind readnone } Index: test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll =================================================================== --- test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll +++ test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll @@ -8,10 +8,10 @@ ; During live interval construction, the first sub register def is ; incorrectly marked as dead. -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 define void @dead_def_subregister(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep Index: test/CodeGen/AMDGPU/rsq.ll =================================================================== --- test/CodeGen/AMDGPU/rsq.ll +++ test/CodeGen/AMDGPU/rsq.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare float @llvm.sqrt.f32(float) nounwind readnone declare double @llvm.sqrt.f64(double) nounwind readnone @@ -56,7 +56,7 @@ ; SI: s_endpgm define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 Index: test/CodeGen/AMDGPU/salu-to-valu.ll =================================================================== --- test/CodeGen/AMDGPU/salu-to-valu.ll +++ test/CodeGen/AMDGPU/salu-to-valu.ll @@ -2,8 +2,8 @@ ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s -declare i32 @llvm.r600.read.tidig.x() #0 -declare i32 @llvm.r600.read.tidig.y() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 +declare i32 @llvm.amdgcn.workitem.id.y() #0 ; In this test both the pointer and the offset operands to the ; BUFFER_LOAD instructions end up being stored in vgprs. This @@ -26,8 +26,8 @@ define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() - %tmp1 = call i32 @llvm.r600.read.tidig.y() + %tmp = call i32 @llvm.amdgcn.workitem.id.x() + %tmp1 = call i32 @llvm.amdgcn.workitem.id.y() %tmp2 = sext i32 %tmp to i64 %tmp3 = sext i32 %tmp1 to i64 br label %loop @@ -87,7 +87,7 @@ ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4 %tmp3 = load i32, i32 addrspace(2)* %tmp2 @@ -107,7 +107,7 @@ ; GCN-HSA: flat_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp %tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000 %tmp4 = load i32, i32 addrspace(2)* %tmp3 @@ -127,7 +127,7 @@ ; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp %tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000 %tmp4 = load i64, i64 addrspace(2)* %tmp3 @@ -149,7 +149,7 @@ ; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234 %tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3 @@ -185,7 +185,7 @@ ; GCN-HSA: flat_load_dwordx4 define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234 %tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3 @@ -234,7 +234,7 @@ ; GCN: s_endpgm define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234 %tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3 @@ -251,7 +251,7 @@ ; GCN-HSA: flat_store_dword [[ADD]] define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4 %tmp3 = load i32, i32 addrspace(2)* %tmp2 @@ -265,7 +265,7 @@ ; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}} define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255 %tmp3 = load i32, i32 addrspace(2)* %tmp2 @@ -279,7 +279,7 @@ ; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}] define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() #0 + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256 %tmp3 = load i32, i32 addrspace(2)* %tmp2 @@ -294,7 +294,7 @@ ; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: - %tmp0 = tail call i32 @llvm.r600.read.tidig.x() + %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)* %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4 @@ -317,7 +317,7 @@ ; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: - %tmp0 = tail call i32 @llvm.r600.read.tidig.x() + %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)* %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4 @@ -354,7 +354,7 @@ ; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: - %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 + %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)* %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4 @@ -389,7 +389,7 @@ ; GCN-HSA: flat_load_dwordx4 define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: - %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1 + %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)* %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4 Index: test/CodeGen/AMDGPU/schedule-global-loads.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-global-loads.ll +++ test/CodeGen/AMDGPU/schedule-global-loads.ll @@ -1,8 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() #1 - ; FIXME: This currently doesn't do a great job of clustering the ; loads, which end up with extra moves between them. Right now, it ; seems the only things areLoadsFromSameBasePtr is accomplishing is Index: test/CodeGen/AMDGPU/sgpr-control-flow.ll =================================================================== --- test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -40,7 +40,7 @@ define void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) { entry: - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %tid_f = uitofp i32 %tid to float %tmp1 = fcmp ueq float %tid_f, 0.0 br i1 %tmp1, label %if, label %else @@ -77,7 +77,7 @@ ; SI: buffer_store_dword [[RESULT]] define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { entry: - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %tmp1 = icmp eq i32 %tid, 0 br i1 %tmp1, label %if, label %else @@ -100,6 +100,6 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { readnone } Index: test/CodeGen/AMDGPU/shl_add_constant.ll =================================================================== --- test/CodeGen/AMDGPU/shl_add_constant.ll +++ test/CodeGen/AMDGPU/shl_add_constant.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; Test with inline immediate @@ -10,7 +10,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x %val = load i32, i32 addrspace(1)* %ptr, align 4 %add = add i32 %val, 9 @@ -26,7 +26,7 @@ ; SI-DAG: buffer_store_dword [[SHLREG]] ; SI: s_endpgm define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x %val = load i32, i32 addrspace(1)* %ptr, align 4 %add = add i32 %val, 9 @@ -44,7 +44,7 @@ ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x %val = load i32, i32 addrspace(1)* %ptr, align 4 %shl = add i32 %val, 999 Index: test/CodeGen/AMDGPU/shl_add_ptr.ll =================================================================== --- test/CodeGen/AMDGPU/shl_add_ptr.ll +++ test/CodeGen/AMDGPU/shl_add_ptr.ll @@ -7,7 +7,7 @@ ; LDS globals. -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 @lds0 = addrspace(3) global [512 x float] undef, align 4 @lds1 = addrspace(3) global [512 x float] undef, align 4 @@ -20,7 +20,7 @@ ; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8 ; SI: s_endpgm define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -40,7 +40,7 @@ ; SI-DAG: buffer_store_dword [[ADDUSE]] ; SI: s_endpgm define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -56,7 +56,7 @@ ; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535 ; SI: s_endpgm define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 65535 %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0 %val0 = load i8, i8 addrspace(3)* %arrayidx0 @@ -74,7 +74,7 @@ ; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 ; SI: s_endpgm define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 64 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 %val0 = load float, float addrspace(3)* %arrayidx0, align 4 @@ -90,7 +90,7 @@ ; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 store float 1.0, float addrspace(3)* %arrayidx0, align 4 @@ -105,7 +105,7 @@ @lds2 = addrspace(3) global [512 x i32] undef, align 4 ; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { -; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 +; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 ; %idx.0 = add nsw i32 %tid.x, 2 ; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 ; %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4 @@ -120,7 +120,7 @@ ; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic @@ -135,7 +135,7 @@ ; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -149,7 +149,7 @@ ; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -163,7 +163,7 @@ ; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -177,7 +177,7 @@ ; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -191,7 +191,7 @@ ; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -205,7 +205,7 @@ ; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -215,7 +215,7 @@ } ; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { -; %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 +; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 ; %idx.0 = add nsw i32 %tid.x, 2 ; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 ; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -229,7 +229,7 @@ ; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -243,7 +243,7 @@ ; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -257,7 +257,7 @@ ; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst @@ -271,7 +271,7 @@ ; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 ; SI: s_endpgm define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { - %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1 + %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0 %val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst Index: test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll =================================================================== --- test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -2,7 +2,7 @@ declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -declare void @llvm.AMDGPU.barrier.local() #2 +declare void @llvm.amdgcn.s.barrier() #2 @stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4 @@ -61,7 +61,7 @@ %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4 store i32 99, i32 addrspace(1)* %gptr, align 4 - call void @llvm.AMDGPU.barrier.local() #2 + call void @llvm.amdgcn.s.barrier() #2 %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4 %add = add nsw i32 %tmp1, %tmp2 Index: test/CodeGen/AMDGPU/sint_to_fp.f64.ll =================================================================== --- test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; SI-LABEL: {{^}}sint_to_fp_i32_to_f64 ; SI: v_cvt_f64_i32_e32 @@ -52,7 +52,7 @@ ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]] ; SI: buffer_store_dwordx2 [[RESULT]] define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %val = load i64, i64 addrspace(1)* %gep, align 8 %result = sitofp i64 %val to double Index: test/CodeGen/AMDGPU/sint_to_fp.i64.ll =================================================================== --- test/CodeGen/AMDGPU/sint_to_fp.i64.ll +++ test/CodeGen/AMDGPU/sint_to_fp.i64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600 @@ -28,7 +28,7 @@ ; GCN: v_cndmask_b32_e32 [[SIGN_SEL:v[0-9]+]], ; GCN: {{buffer|flat}}_store_dword [[SIGN_SEL]] define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep @@ -46,7 +46,7 @@ ; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64: define void @v_sint_to_fp_v4i64(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep @@ -55,7 +55,7 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/split-scalar-i64-add.ll =================================================================== --- test/CodeGen/AMDGPU/split-scalar-i64-add.ll +++ test/CodeGen/AMDGPU/split-scalar-i64-add.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() readnone +declare i32 @llvm.amdgcn.workitem.id.x() readnone ; This is broken because the low half of the 64-bit add remains on the ; SALU, but the upper half does not. The addc expects the carry bit @@ -62,7 +62,7 @@ ; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}} ; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) { - %tid = call i32 @llvm.r600.read.tidig.x() readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %load = load i32, i32 addrspace(1)* %gep %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0 Index: test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll =================================================================== --- test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll +++ test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll @@ -35,9 +35,9 @@ entry: %tmp = tail call i32 @llvm.r600.read.local.size.y() %tmp1 = tail call i32 @llvm.r600.read.local.size.z() - %tmp2 = tail call i32 @llvm.r600.read.tidig.x() - %tmp3 = tail call i32 @llvm.r600.read.tidig.y() - %tmp4 = tail call i32 @llvm.r600.read.tidig.z() + %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp3 = tail call i32 @llvm.amdgcn.workitem.id.y() + %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.z() %tmp6 = mul i32 %tmp2, %tmp %tmp10 = add i32 %tmp3, %tmp6 %tmp11 = mul i32 %tmp10, %tmp1 @@ -87,7 +87,7 @@ declare i32 @llvm.r600.read.local.size.x() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; Function Attrs: nounwind readnone declare i32 @llvm.r600.read.local.size.y() #1 @@ -96,10 +96,10 @@ declare i32 @llvm.r600.read.local.size.z() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.y() #1 +declare i32 @llvm.amdgcn.workitem.id.y() #1 ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.z() #1 +declare i32 @llvm.amdgcn.workitem.id.z() #1 attributes #0 = { norecurse nounwind } attributes #1 = { nounwind readnone } Index: test/CodeGen/AMDGPU/store-barrier.ll =================================================================== --- test/CodeGen/AMDGPU/store-barrier.ll +++ test/CodeGen/AMDGPU/store-barrier.ll @@ -12,7 +12,7 @@ ; CHECK: s_barrier ; CHECK: s_endpgm ; Function Attrs: nounwind -define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) { +define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) #0 { bb: %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9 %tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2 @@ -21,7 +21,7 @@ %tmp16 = add i32 %tmp13, 1 %tmp17 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp16 store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2 - tail call void @llvm.AMDGPU.barrier.local() #2 + tail call void @llvm.amdgcn.s.barrier() %tmp25 = load i32, i32 addrspace(1)* %tmp10, align 4 %tmp26 = sext i32 %tmp25 to i64 %tmp27 = sext i32 %arg4 to i64 @@ -37,6 +37,7 @@ } ; Function Attrs: convergent nounwind -declare void @llvm.AMDGPU.barrier.local() #2 +declare void @llvm.amdgcn.s.barrier() #1 -attributes #2 = { convergent nounwind } +attributes #0 = { nounwind } +attributes #1 = { convergent nounwind } Index: test/CodeGen/AMDGPU/v_cndmask.ll =================================================================== --- test/CodeGen/AMDGPU/v_cndmask.ll +++ test/CodeGen/AMDGPU/v_cndmask.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; SI-LABEL: {{^}}v_cnd_nan_nosgpr: ; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}} @@ -9,7 +9,7 @@ ; All nan values are converted to 0xffffffff ; SI: s_endpgm define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 { - %idx = call i32 @llvm.r600.read.tidig.x() #1 + %idx = call i32 @llvm.amdgcn.workitem.id.x() #1 %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx %f = load float, float addrspace(1)* %fptr %setcc = icmp ne i32 %c, 0 Index: test/CodeGen/AMDGPU/valu-i1.ll =================================================================== --- test/CodeGen/AMDGPU/valu-i1.ll +++ test/CodeGen/AMDGPU/valu-i1.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; SI-LABEL: @test_if ; Make sure the i1 values created by the cfg structurizer pass are @@ -54,7 +54,7 @@ ; SI: s_or_b64 exec, exec, [[BR_SREG]] ; SI: s_endpgm define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %is.0 = icmp ne i32 %tid, 0 br i1 %is.0, label %store, label %exit @@ -86,7 +86,7 @@ define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { entry: - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %is.0 = icmp ne i32 %tid, 0 %limit = add i32 %tid, 64 br i1 %is.0, label %loop, label %exit @@ -152,7 +152,7 @@ define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 { bb: - %tmp = tail call i32 @llvm.r600.read.tidig.x() #0 + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 %tmp4 = sext i32 %tmp to i64 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4 %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 Index: test/CodeGen/AMDGPU/vop-shrink.ll =================================================================== --- test/CodeGen/AMDGPU/vop-shrink.ll +++ test/CodeGen/AMDGPU/vop-shrink.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; Test that we correctly commute a sub instruction ; FUNC-LABEL: {{^}}sub_rev: @@ -10,7 +10,7 @@ define void @sub_rev(i32 addrspace(1)* %out, <4 x i32> %sgpr, i32 %cond) { entry: - %vgpr = call i32 @llvm.r600.read.tidig.x() #1 + %vgpr = call i32 @llvm.amdgcn.workitem.id.x() #1 %tmp = icmp eq i32 %cond, 0 br i1 %tmp, label %if, label %else @@ -37,7 +37,7 @@ ; SI: v_add_f32_e32 v{{[0-9]+}}, 0x44800000 define void @add_fold(float addrspace(1)* %out) { entry: - %tmp = call i32 @llvm.r600.read.tidig.x() + %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = uitofp i32 %tmp to float %tmp2 = fadd float %tmp1, 1.024000e+03 store float %tmp2, float addrspace(1)* %out @@ -45,7 +45,7 @@ } ; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } attributes #1 = { readnone } Index: test/CodeGen/AMDGPU/wait.ll =================================================================== --- test/CodeGen/AMDGPU/wait.ll +++ test/CodeGen/AMDGPU/wait.ll @@ -18,7 +18,7 @@ %tmp11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp10, i32 0, i32 %arg6) %tmp12 = extractelement <4 x float> %tmp11, i32 0 %tmp13 = extractelement <4 x float> %tmp11, i32 1 - call void @llvm.AMDGPU.barrier.global() #1 + call void @llvm.amdgcn.s.barrier() #1 %tmp14 = extractelement <4 x float> %tmp11, i32 2 ; %tmp15 = extractelement <4 x float> %tmp11, i32 3 %tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt @@ -71,7 +71,7 @@ ; Function Attrs: convergent nounwind -declare void @llvm.AMDGPU.barrier.global() #1 +declare void @llvm.amdgcn.s.barrier() #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2 Index: test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll =================================================================== --- test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll +++ test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll @@ -7,11 +7,11 @@ ; can do to avoid this. declare void @llvm.write_register.i32(metadata, i32) #0 -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 define void @write_vgpr_into_sgpr() { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() call void @llvm.write_register.i32(metadata !0, i32 %tid) ret void }