Index: test/CodeGen/AMDGPU/add_i64.ll
===================================================================
--- test/CodeGen/AMDGPU/add_i64.ll
+++ test/CodeGen/AMDGPU/add_i64.ll
@@ -1,13 +1,13 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 
-declare i32 @llvm.r600.read.tidig.x() readnone
+declare i32 @llvm.amdgcn.workitem.id.x() readnone
 
 ; SI-LABEL: {{^}}test_i64_vreg:
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
-  %tid = call i32 @llvm.r600.read.tidig.x() readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
   %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
   %a = load i64, i64 addrspace(1)* %a_ptr
@@ -59,7 +59,7 @@
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
-  %tid = call i32 @llvm.r600.read.tidig.x() readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
   %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
   %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
Index: test/CodeGen/AMDGPU/addrspacecast.ll
===================================================================
--- test/CodeGen/AMDGPU/addrspacecast.ll
+++ test/CodeGen/AMDGPU/addrspacecast.ll
@@ -47,19 +47,19 @@
 ; CHECK: flat_load_dword
 define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
   %alloca = alloca i32, i32 9, align 4
-  %x = call i32 @llvm.r600.read.tidig.x() #3
+  %x = call i32 @llvm.amdgcn.workitem.id.x() #3
   %pptr = getelementptr i32, i32* %alloca, i32 %x
   %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
   store i32 %x, i32 addrspace(4)* %fptr
   ; Dummy call
-  call void @llvm.AMDGPU.barrier.local() #1
+  call void @llvm.amdgcn.s.barrier() #1
   %reload = load i32, i32 addrspace(4)* %fptr, align 4
   store i32 %reload, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-declare void @llvm.AMDGPU.barrier.local() #1
-declare i32 @llvm.r600.read.tidig.x() #3
+declare void @llvm.amdgcn.s.barrier() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #3
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind convergent }
Index: test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
===================================================================
--- test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
+++ test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
@@ -1,8 +1,9 @@
 ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
 ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
 
-declare i32 @llvm.SI.tid() nounwind readnone
-declare void @llvm.AMDGPU.barrier.local() nounwind convergent
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
+declare void @llvm.amdgcn.s.barrier() #2
 
 ; The required pointer calculations for the alloca'd actually requires
 ; an add and won't be folded into the addressing, which fails with a
@@ -24,9 +25,10 @@
 
 ; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16
 ; SI-PROMOTE: ds_write_b32 [[PTRREG]]
-define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
+define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) #0 {
   %alloca = alloca [4 x i32], i32 4, align 16
-  %tid = call i32 @llvm.SI.tid() readnone
+  %mbcnt.lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0);
+  %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %mbcnt.lo)
   %a_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr inbounds i32, i32 addrspace(1)* %inB, i32 %tid
   %a = load i32, i32 addrspace(1)* %a_ptr
@@ -35,10 +37,13 @@
   %alloca_ptr = getelementptr inbounds [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
   store i32 %result, i32* %alloca_ptr, align 4
   ; Dummy call
-  call void @llvm.AMDGPU.barrier.local() nounwind convergent
+  call void @llvm.amdgcn.s.barrier()
   %reload = load i32, i32* %alloca_ptr, align 4
   %out_ptr = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid
   store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
   ret void
 }
 
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind convergent }
Index: test/CodeGen/AMDGPU/cgp-addressing-modes.ll
===================================================================
--- test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -5,8 +5,6 @@
 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare i32 @llvm.r600.read.tidig.x() #0
-
 ; OPT-LABEL: @test_sink_global_small_offset_i32(
 ; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in
 ; OPT-VI: getelementptr i32, i32 addrspace(1)* %in
Index: test/CodeGen/AMDGPU/commute-compares.ll
===================================================================
--- test/CodeGen/AMDGPU/commute-compares.ll
+++ test/CodeGen/AMDGPU/commute-compares.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
 
-declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 ; --------------------------------------------------------------------------------
 ; i32 compares
@@ -9,7 +9,7 @@
 ; GCN-LABEL: {{^}}commute_eq_64_i32:
 ; GCN: v_cmp_eq_i32_e32 vcc, 64, v{{[0-9]+}}
 define void @commute_eq_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -22,7 +22,7 @@
 ; GCN-LABEL: {{^}}commute_ne_64_i32:
 ; GCN: v_cmp_ne_i32_e32 vcc, 64, v{{[0-9]+}}
 define void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -37,7 +37,7 @@
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039
 ; GCN: v_cmp_ne_i32_e32 vcc, [[K]], v{{[0-9]+}}
 define void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -50,7 +50,7 @@
 ; GCN-LABEL: {{^}}commute_ugt_64_i32:
 ; GCN: v_cmp_lt_u32_e32 vcc, 64, v{{[0-9]+}}
 define void @commute_ugt_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -63,7 +63,7 @@
 ; GCN-LABEL: {{^}}commute_uge_64_i32:
 ; GCN: v_cmp_lt_u32_e32 vcc, 63, v{{[0-9]+}}
 define void @commute_uge_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -76,7 +76,7 @@
 ; GCN-LABEL: {{^}}commute_ult_64_i32:
 ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
 define void @commute_ult_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -89,7 +89,7 @@
 ; GCN-LABEL: {{^}}commute_ule_63_i32:
 ; GCN: v_cmp_gt_u32_e32 vcc, 64, v{{[0-9]+}}
 define void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -105,7 +105,7 @@
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}}
 ; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
 define void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -118,7 +118,7 @@
 ; GCN-LABEL: {{^}}commute_sgt_neg1_i32:
 ; GCN: v_cmp_lt_i32_e32 vcc, -1, v{{[0-9]+}}
 define void @commute_sgt_neg1_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -131,7 +131,7 @@
 ; GCN-LABEL: {{^}}commute_sge_neg2_i32:
 ; GCN: v_cmp_lt_i32_e32 vcc, -3, v{{[0-9]+}}
 define void @commute_sge_neg2_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -144,7 +144,7 @@
 ; GCN-LABEL: {{^}}commute_slt_neg16_i32:
 ; GCN: v_cmp_gt_i32_e32 vcc, -16, v{{[0-9]+}}
 define void @commute_slt_neg16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -157,7 +157,7 @@
 ; GCN-LABEL: {{^}}commute_sle_5_i32:
 ; GCN: v_cmp_gt_i32_e32 vcc, 6, v{{[0-9]+}}
 define void @commute_sle_5_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i32, i32 addrspace(1)* %gep.in
@@ -174,7 +174,7 @@
 ; GCN-LABEL: {{^}}commute_eq_64_i64:
 ; GCN: v_cmp_eq_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_eq_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -187,7 +187,7 @@
 ; GCN-LABEL: {{^}}commute_ne_64_i64:
 ; GCN: v_cmp_ne_i64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ne_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -200,7 +200,7 @@
 ; GCN-LABEL: {{^}}commute_ugt_64_i64:
 ; GCN: v_cmp_lt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ugt_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -213,7 +213,7 @@
 ; GCN-LABEL: {{^}}commute_uge_64_i64:
 ; GCN: v_cmp_lt_u64_e32 vcc, 63, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_uge_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -226,7 +226,7 @@
 ; GCN-LABEL: {{^}}commute_ult_64_i64:
 ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ult_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -239,7 +239,7 @@
 ; GCN-LABEL: {{^}}commute_ule_63_i64:
 ; GCN: v_cmp_gt_u64_e32 vcc, 64, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ule_63_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -255,7 +255,7 @@
 ; GCN-DAG: s_movk_i32 s[[KLO:[0-9]+]], 0x41{{$}}
 ; GCN: v_cmp_gt_u64_e32 vcc, s{{\[}}[[KLO]]:{{[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ule_64_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -268,7 +268,7 @@
 ; GCN-LABEL: {{^}}commute_sgt_neg1_i64:
 ; GCN: v_cmp_lt_i64_e32 vcc, -1, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_sgt_neg1_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -281,7 +281,7 @@
 ; GCN-LABEL: {{^}}commute_sge_neg2_i64:
 ; GCN: v_cmp_lt_i64_e32 vcc, -3, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_sge_neg2_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -294,7 +294,7 @@
 ; GCN-LABEL: {{^}}commute_slt_neg16_i64:
 ; GCN: v_cmp_gt_i64_e32 vcc, -16, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_slt_neg16_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -307,7 +307,7 @@
 ; GCN-LABEL: {{^}}commute_sle_5_i64:
 ; GCN: v_cmp_gt_i64_e32 vcc, 6, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_sle_5_i64(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep.in
@@ -325,7 +325,7 @@
 ; GCN-LABEL: {{^}}commute_oeq_2.0_f32:
 ; GCN: v_cmp_eq_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_oeq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -339,7 +339,7 @@
 ; GCN-LABEL: {{^}}commute_ogt_2.0_f32:
 ; GCN: v_cmp_lt_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_ogt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -352,7 +352,7 @@
 ; GCN-LABEL: {{^}}commute_oge_2.0_f32:
 ; GCN: v_cmp_le_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_oge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -365,7 +365,7 @@
 ; GCN-LABEL: {{^}}commute_olt_2.0_f32:
 ; GCN: v_cmp_gt_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_olt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -378,7 +378,7 @@
 ; GCN-LABEL: {{^}}commute_ole_2.0_f32:
 ; GCN: v_cmp_ge_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_ole_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -391,7 +391,7 @@
 ; GCN-LABEL: {{^}}commute_one_2.0_f32:
 ; GCN: v_cmp_lg_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_one_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -404,7 +404,7 @@
 ; GCN-LABEL: {{^}}commute_ord_2.0_f32:
 ; GCN: v_cmp_o_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
 define void @commute_ord_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -417,7 +417,7 @@
 ; GCN-LABEL: {{^}}commute_ueq_2.0_f32:
 ; GCN: v_cmp_nlg_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_ueq_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -430,7 +430,7 @@
 ; GCN-LABEL: {{^}}commute_ugt_2.0_f32:
 ; GCN: v_cmp_nge_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_ugt_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -443,7 +443,7 @@
 ; GCN-LABEL: {{^}}commute_uge_2.0_f32:
 ; GCN: v_cmp_ngt_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_uge_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -456,7 +456,7 @@
 ; GCN-LABEL: {{^}}commute_ult_2.0_f32:
 ; GCN: v_cmp_nle_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_ult_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -469,7 +469,7 @@
 ; GCN-LABEL: {{^}}commute_ule_2.0_f32:
 ; GCN: v_cmp_nlt_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_ule_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -482,7 +482,7 @@
 ; GCN-LABEL: {{^}}commute_une_2.0_f32:
 ; GCN: v_cmp_neq_f32_e32 vcc, 2.0, v{{[0-9]+}}
 define void @commute_une_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -495,7 +495,7 @@
 ; GCN-LABEL: {{^}}commute_uno_2.0_f32:
 ; GCN: v_cmp_u_f32_e32 vcc, [[REG:v[0-9]+]], [[REG]]
 define void @commute_uno_2.0_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load float, float addrspace(1)* %gep.in
@@ -513,7 +513,7 @@
 ; GCN-LABEL: {{^}}commute_oeq_2.0_f64:
 ; GCN: v_cmp_eq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_oeq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -527,7 +527,7 @@
 ; GCN-LABEL: {{^}}commute_ogt_2.0_f64:
 ; GCN: v_cmp_lt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ogt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -540,7 +540,7 @@
 ; GCN-LABEL: {{^}}commute_oge_2.0_f64:
 ; GCN: v_cmp_le_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_oge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -553,7 +553,7 @@
 ; GCN-LABEL: {{^}}commute_olt_2.0_f64:
 ; GCN: v_cmp_gt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_olt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -566,7 +566,7 @@
 ; GCN-LABEL: {{^}}commute_ole_2.0_f64:
 ; GCN: v_cmp_ge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ole_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -579,7 +579,7 @@
 ; GCN-LABEL: {{^}}commute_one_2.0_f64:
 ; GCN: v_cmp_lg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_one_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -592,7 +592,7 @@
 ; GCN-LABEL: {{^}}commute_ord_2.0_f64:
 ; GCN: v_cmp_o_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
 define void @commute_ord_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -605,7 +605,7 @@
 ; GCN-LABEL: {{^}}commute_ueq_2.0_f64:
 ; GCN: v_cmp_nlg_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ueq_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -618,7 +618,7 @@
 ; GCN-LABEL: {{^}}commute_ugt_2.0_f64:
 ; GCN: v_cmp_nge_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ugt_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -631,7 +631,7 @@
 ; GCN-LABEL: {{^}}commute_uge_2.0_f64:
 ; GCN: v_cmp_ngt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_uge_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -644,7 +644,7 @@
 ; GCN-LABEL: {{^}}commute_ult_2.0_f64:
 ; GCN: v_cmp_nle_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ult_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -657,7 +657,7 @@
 ; GCN-LABEL: {{^}}commute_ule_2.0_f64:
 ; GCN: v_cmp_nlt_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_ule_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -670,7 +670,7 @@
 ; GCN-LABEL: {{^}}commute_une_2.0_f64:
 ; GCN: v_cmp_neq_f64_e32 vcc, 2.0, v{{\[[0-9]+:[0-9]+\]}}
 define void @commute_une_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
@@ -683,7 +683,7 @@
 ; GCN-LABEL: {{^}}commute_uno_2.0_f64:
 ; GCN: v_cmp_u_f64_e32 vcc, [[REG:v\[[0-9]+:[0-9]+\]]], [[REG]]
 define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %val = load double, double addrspace(1)* %gep.in
Index: test/CodeGen/AMDGPU/commute_modifiers.ll
===================================================================
--- test/CodeGen/AMDGPU/commute_modifiers.ll
+++ test/CodeGen/AMDGPU/commute_modifiers.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare float @llvm.fabs.f32(float) #1
 declare float @llvm.fma.f32(float, float, float) nounwind readnone
 
@@ -9,7 +9,7 @@
 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
 ; SI-NEXT: buffer_store_dword [[REG]]
 define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %x = load float, float addrspace(1)* %gep.0
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
@@ -23,7 +23,7 @@
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
 ; SI-NEXT: buffer_store_dword [[REG]]
 define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %x = load float, float addrspace(1)* %gep.0
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
@@ -38,7 +38,7 @@
 ; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
 ; SI-NEXT: buffer_store_dword [[REG]]
 define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %x = load float, float addrspace(1)* %gep.0
   %x.fneg = fsub float -0.000000e+00, %x
@@ -54,7 +54,7 @@
 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
 ; SI-NEXT: buffer_store_dword [[REG]]
 define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %x = load float, float addrspace(1)* %gep.0
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
@@ -69,7 +69,7 @@
 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
 ; SI-NEXT: buffer_store_dword [[REG]]
 define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %x = load float, float addrspace(1)* %gep.0
@@ -86,7 +86,7 @@
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
 ; SI-NEXT: buffer_store_dword [[REG]]
 define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %x = load float, float addrspace(1)* %gep.0
@@ -103,7 +103,7 @@
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
 ; SI-NEXT: buffer_store_dword [[REG]]
 define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %x = load float, float addrspace(1)* %gep.0
@@ -122,7 +122,7 @@
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
 ; SI-NEXT: buffer_store_dword [[REG]]
 define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %x = load float, float addrspace(1)* %gep.0
@@ -140,7 +140,7 @@
 ; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
 ; SI-NEXT: buffer_store_dword [[REG]]
 define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %x = load float, float addrspace(1)* %gep.0
@@ -162,7 +162,7 @@
 ; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], |[[R2]]|
 ; SI: buffer_store_dword [[RESULT]]
 define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Index: test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
===================================================================
--- test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
+++ test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
@@ -12,7 +12,7 @@
 ; GCN: s_endpgm
 define void @reschedule_global_load_lds_store(i32 addrspace(1)* noalias %gptr0, i32 addrspace(1)* noalias %gptr1, i32 addrspace(3)* noalias %lptr, i32 %c) #0 {
 entry:
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx = shl i32 %tid, 2
   %gep0 = getelementptr i32, i32 addrspace(1)* %gptr0, i32 %idx
   %gep1 = getelementptr i32, i32 addrspace(1)* %gptr1, i32 %idx
@@ -42,10 +42,7 @@
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #1
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
===================================================================
--- test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
+++ test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
 
-declare i32 @llvm.r600.read.tidig.x() #0
-declare void @llvm.AMDGPU.barrier.local() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare void @llvm.amdgcn.s.barrier() #1
 
 ; Function Attrs: nounwind
 ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
@@ -25,7 +25,7 @@
 ; CHECK: s_endpgm
 define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
 entry:
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #0
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %mul = shl nsw i32 %x.i, 1
   br label %for.body
 
@@ -33,7 +33,7 @@
   %sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ]
   %offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ]
   %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  tail call void @llvm.AMDGPU.barrier.local() #1
+  tail call void @llvm.amdgcn.s.barrier() #1
   %arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02
   %tmp = load float, float addrspace(3)* %arrayidx, align 4
   %add1 = add nsw i32 %offset.02, 1
Index: test/CodeGen/AMDGPU/ds-sub-offset.ll
===================================================================
--- test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -1,7 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
 
-declare void @llvm.AMDGPU.barrier.local() #2
-declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 @lds.obj = addrspace(3) global [256 x i32] undef, align 4
 
@@ -12,7 +11,7 @@
 ; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12
 define void @write_ds_sub0_offset0_global() #0 {
 entry:
-  %x.i = call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1
   %sub1 = sub i32 0, %x.i
   %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
   %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
@@ -26,7 +25,7 @@
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
 ; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535
 define void @add_x_shl_neg_to_sub_max_offset() #1 {
-  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 65535, %shl
@@ -41,7 +40,7 @@
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
 ; GCN: ds_write_b8 [[NEG]], [[K]]{{$}}
 define void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
-  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 65536, %shl
@@ -60,7 +59,7 @@
 ; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}}
 ; GCN: s_endpgm
 define void @add_x_shl_neg_to_sub_multi_use() #1 {
-  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add0 = add i32 123, %shl
@@ -82,7 +81,7 @@
 ; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
 ; GCN: s_endpgm
 define void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
-  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 123, %shl
@@ -97,7 +96,7 @@
 ; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
 ; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255
 define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
-  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 1019, %shl
@@ -111,7 +110,7 @@
 ; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]]
 ; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
 define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
-  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0
   %neg = sub i32 0, %x.i
   %shl = shl i32 %neg, 2
   %add = add i32 1020, %shl
Index: test/CodeGen/AMDGPU/ds_read2.ll
===================================================================
--- test/CodeGen/AMDGPU/ds_read2.ll
+++ test/CodeGen/AMDGPU/ds_read2.ll
@@ -13,7 +13,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @simple_read2_f32(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -32,7 +32,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 255
@@ -50,7 +50,7 @@
 ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
 ; SI: s_endpgm
 define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 257
@@ -67,7 +67,7 @@
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
 ; SI: s_endpgm
 define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -99,7 +99,7 @@
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
 ; SI: s_endpgm
 define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -109,7 +109,7 @@
   %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum.0 = fadd float %val0, %val1
 
-  call void @llvm.AMDGPU.barrier.local() #2
+  call void @llvm.amdgcn.s.barrier() #2
 
   %idx.2 = add nsw i32 %tid.x, 11
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
@@ -134,7 +134,7 @@
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
 ; SI: s_endpgm
 define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -171,7 +171,7 @@
 ; SI: ds_read_b32
 ; SI: s_endpgm
 define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
   %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
   %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
@@ -197,7 +197,7 @@
 ; SI: ds_read_b32
 ; SI: s_endpgm
 define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
   %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
   %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
@@ -220,7 +220,7 @@
 ; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}}
 ; SI: s_endpgm
 define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0
   %ptr.1 = insertelement <2 x [512 x float] addrspace(3)*> %ptr.0, [512 x float] addrspace(3)* @lds, i32 1
   %x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
@@ -244,7 +244,7 @@
 ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
 ; SI: s_endpgm
 define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   %val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -262,7 +262,7 @@
 ; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
 ; SI: s_endpgm
 define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -281,7 +281,7 @@
 ; SI-NOT: ds_read2_b32
 ; SI: s_endpgm
 define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
   %val0 = load float, float addrspace(3)* %arrayidx0, align 1
   %add.x = add nsw i32 %x.i, 8
@@ -297,7 +297,7 @@
 ; SI-NOT: ds_read2_b32
 ; SI: s_endpgm
 define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
   %val0 = load float, float addrspace(3)* %arrayidx0, align 2
   %add.x = add nsw i32 %x.i, 8
@@ -316,7 +316,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT]]
 ; SI: s_endpgm
 define void @simple_read2_f64(double addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
   %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
@@ -332,7 +332,7 @@
 ; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:255
 ; SI: s_endpgm
 define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
   %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 255
@@ -350,7 +350,7 @@
 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
 ; SI: s_endpgm
 define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
   %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 257
@@ -368,7 +368,7 @@
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
 ; SI: s_endpgm
 define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
   %val0 = load double, double addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 7
@@ -438,8 +438,8 @@
 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
 
 define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
-  %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
+  %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
+  %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1
   %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
   %tmp16 = load float, float addrspace(3)* %arrayidx44, align 4
   %add47 = add nsw i32 %x.i, 1
@@ -494,19 +494,19 @@
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.x() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.y() #1
+declare i32 @llvm.amdgcn.workgroup.id.y() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.y() #1
+declare i32 @llvm.amdgcn.workitem.id.y() #1
 
 ; Function Attrs: convergent nounwind
-declare void @llvm.AMDGPU.barrier.local() #2
+declare void @llvm.amdgcn.s.barrier() #2
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/ds_read2_superreg.ll
===================================================================
--- test/CodeGen/AMDGPU/ds_read2_superreg.ll
+++ test/CodeGen/AMDGPU/ds_read2_superreg.ll
@@ -13,7 +13,7 @@
 ; CI: buffer_store_dwordx2 [[RESULT]]
 ; CI: s_endpgm
 define void @simple_read2_v2f32_superreg_align4(<2 x float> addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds  [512 x <2 x float>], [512 x <2 x float>] addrspace(3)* @lds.v2, i32 0, i32 %x.i
   %val0 = load <2 x float>, <2 x float> addrspace(3)* %arrayidx0, align 4
   %out.gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %out, i32 %x.i
@@ -27,7 +27,7 @@
 ; CI: buffer_store_dwordx2 [[RESULT]]
 ; CI: s_endpgm
 define void @simple_read2_v2f32_superreg(<2 x float> addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x <2 x float>], [512 x <2 x float>] addrspace(3)* @lds.v2, i32 0, i32 %x.i
   %val0 = load <2 x float>, <2 x float> addrspace(3)* %arrayidx0
   %out.gep = getelementptr inbounds <2 x float>, <2 x float> addrspace(1)* %out, i32 %x.i
@@ -44,7 +44,7 @@
 ; CI: buffer_store_dword v[[ADD2]]
 ; CI: s_endpgm
 define void @simple_read2_v4f32_superreg_align4(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x <4 x float>], [512 x <4 x float>] addrspace(3)* @lds.v4, i32 0, i32 %x.i
   %val0 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx0, align 4
   %elt0 = extractelement <4 x float> %val0, i32 0
@@ -69,7 +69,7 @@
 ; CI: buffer_store_dword v[[ADD1]]
 ; CI: s_endpgm
 define void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x <3 x float>], [512 x <3 x float>] addrspace(3)* @lds.v3, i32 0, i32 %x.i
   %val0 = load <3 x float>, <3 x float> addrspace(3)* %arrayidx0, align 4
   %elt0 = extractelement <3 x float> %val0, i32 0
@@ -95,7 +95,7 @@
 ; CI: buffer_store_dwordx4
 ; CI: s_endpgm
 define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x <4 x float>], [512 x <4 x float>] addrspace(3)* @lds.v4, i32 0, i32 %x.i
   %val0 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx0, align 8
   %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i32 %x.i
@@ -110,7 +110,7 @@
 ; CI: buffer_store_dwordx4
 ; CI: s_endpgm
 define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x <4 x float>], [512 x <4 x float>] addrspace(3)* @lds.v4, i32 0, i32 %x.i
   %val0 = load <4 x float>, <4 x float> addrspace(3)* %arrayidx0
   %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %out, i32 %x.i
@@ -130,7 +130,7 @@
 ; CI: buffer_store_dwordx4
 ; CI: s_endpgm
 define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x <8 x float>], [512 x <8 x float>] addrspace(3)* @lds.v8, i32 0, i32 %x.i
   %val0 = load <8 x float>, <8 x float> addrspace(3)* %arrayidx0
   %out.gep = getelementptr inbounds <8 x float>, <8 x float> addrspace(1)* %out, i32 %x.i
@@ -158,7 +158,7 @@
 ; CI: buffer_store_dwordx4
 ; CI: s_endpgm
 define void @simple_read2_v16f32_superreg(<16 x float> addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x <16 x float>], [512 x <16 x float>] addrspace(3)* @lds.v16, i32 0, i32 %x.i
   %val0 = load <16 x float>, <16 x float> addrspace(3)* %arrayidx0
   %out.gep = getelementptr inbounds <16 x float>, <16 x float> addrspace(1)* %out, i32 %x.i
@@ -173,7 +173,7 @@
 ; CI: buffer_store_dwordx2 v{{\[}}[[REG_ELT0]]:[[REG_ELT1]]{{\]}}
 ; CI: s_endpgm
 define void @simple_read2_v2f32_superreg_scalar_loads_align4(<2 x float> addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %arrayidx0, i32 1
 
@@ -196,7 +196,7 @@
 ; CI: buffer_store_dwordx4 v{{\[}}[[REG_ELT0]]:[[REG_ELT3]]{{\]}}
 ; CI: s_endpgm
 define void @simple_read2_v4f32_superreg_scalar_loads_align4(<4 x float> addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %arrayidx0, i32 1
   %arrayidx2 = getelementptr inbounds float, float addrspace(3)* %arrayidx0, i32 2
@@ -224,13 +224,10 @@
 declare i32 @llvm.r600.read.tgid.y() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.y() #1
-
-; Function Attrs: convergent nounwind
-declare void @llvm.AMDGPU.barrier.local() #2
+declare i32 @llvm.amdgcn.workitem.id.y() #1
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/ds_read2st64.ll
===================================================================
--- test/CodeGen/AMDGPU/ds_read2st64.ll
+++ test/CodeGen/AMDGPU/ds_read2st64.ll
@@ -11,7 +11,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
@@ -30,7 +30,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -50,7 +50,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -70,7 +70,7 @@
 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
 ; SI: s_endpgm
 define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -87,7 +87,7 @@
 ; SI-NOT: ds_read2st64_b32
 ; SI: s_endpgm
 define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 63
@@ -103,7 +103,7 @@
 ; SI-NOT: ds_read2st64_b32
 ; SI: s_endpgm
 define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -123,7 +123,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT]]
 ; SI: s_endpgm
 define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
   %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 64
@@ -142,7 +142,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT]]
 ; SI: s_endpgm
 define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
   %val0 = load double, double addrspace(3)* %arrayidx0, align 8
@@ -162,7 +162,7 @@
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
 ; SI: s_endpgm
 define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
   %val0 = load double, double addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
@@ -182,7 +182,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT]]
 ; SI: s_endpgm
 define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %add.x.0 = add nsw i32 %x.i, 256
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
   %val0 = load double, double addrspace(3)* %arrayidx0, align 8
@@ -202,7 +202,7 @@
 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
 ; SI: s_endpgm
 define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
   %val0 = load double, double addrspace(3)* %arrayidx0, align 8
@@ -219,7 +219,7 @@
 ; SI-NOT: ds_read2st64_b64
 ; SI: s_endpgm
 define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
   %val0 = load double, double addrspace(3)* %arrayidx0, align 8
@@ -240,7 +240,7 @@
 ; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:8
 ; SI: s_endpgm
 define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
   %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
@@ -253,16 +253,10 @@
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.y() #1
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #1
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.y() #1
+declare i32 @llvm.amdgcn.workitem.id.y() #1
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/ds_write2.ll
===================================================================
--- test/CodeGen/AMDGPU/ds_write2.ll
+++ test/CodeGen/AMDGPU/ds_write2.ll
@@ -10,7 +10,7 @@
 ; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %val = load float, float addrspace(1)* %in.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -28,7 +28,7 @@
 ; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
   %val0 = load float, float addrspace(1)* %in.gep.0, align 4
@@ -47,7 +47,7 @@
 ; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
 ; SI: s_endpgm
 define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
   %val0 = load float, float addrspace(1)* %in0.gep, align 4
@@ -66,7 +66,7 @@
 ; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
 ; SI: s_endpgm
 define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
   %val0 = load float, float addrspace(1)* %in0.gep, align 4
@@ -87,7 +87,7 @@
 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1
   %val0 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8
@@ -108,7 +108,7 @@
 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
   %val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8
   %val0 = extractelement <2 x float> %val, i32 0
@@ -127,7 +127,7 @@
 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i
   %val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16
   %val0 = extractelement <4 x float> %val, i32 0
@@ -147,7 +147,7 @@
 ; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
 ; SI: s_endpgm
 define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
   %val0 = load float, float addrspace(1)* %in.gep.0, align 4
@@ -165,7 +165,7 @@
 ; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
 ; SI: s_endpgm
 define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
   %val0 = load float, float addrspace(1)* %in0.gep, align 4
@@ -183,7 +183,7 @@
 ; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
 ; SI: s_endpgm
 define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
   %val0 = load float, float addrspace(1)* %in0.gep, align 4
@@ -213,7 +213,7 @@
 ; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
 ; SI: s_endpgm
 define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
   %val0 = load float, float addrspace(1)* %in0.gep, align 4
@@ -244,7 +244,7 @@
 ; SI: ds_write_b32
 ; SI: s_endpgm
 define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
   %val0 = load float, float addrspace(1)* %in0.gep, align 4
@@ -271,7 +271,7 @@
 ; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
   %val = load double, double addrspace(1)* %in.gep, align 8
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
@@ -289,7 +289,7 @@
 ; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15
 ; SI: s_endpgm
 define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
   %val = load double, double addrspace(1)* %in.gep, align 8
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
@@ -307,7 +307,7 @@
 ; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset1:8
 ; SI: s_endpgm
 define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
   %val0 = load double, double addrspace(1)* %in.gep.0, align 8
@@ -372,8 +372,8 @@
 @sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
 
 define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
-  %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
+  %x.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1
+  %y.i = tail call i32 @llvm.amdgcn.workitem.id.y() #1
   %val = load float, float addrspace(1)* %in
   %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
   store float %val, float addrspace(3)* %arrayidx44, align 4
@@ -411,7 +411,7 @@
 ; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:1{{$}}
 ; CI: s_endpgm
 define void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %in
   %val0 = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 4
   %out.gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(3)* %out, i32 %x.i
@@ -420,19 +420,16 @@
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.x() #1
+declare i32 @llvm.amdgcn.workgroup.id.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.y() #1
+declare i32 @llvm.amdgcn.workgroup.id.y() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.y() #1
-
-; Function Attrs: convergent nounwind
-declare void @llvm.AMDGPU.barrier.local() #2
+declare i32 @llvm.amdgcn.workitem.id.y() #1
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/ds_write2st64.ll
===================================================================
--- test/CodeGen/AMDGPU/ds_write2st64.ll
+++ test/CodeGen/AMDGPU/ds_write2st64.ll
@@ -8,7 +8,7 @@
 ; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset1:1
 ; SI: s_endpgm
 define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %val = load float, float addrspace(1)* %in.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -26,7 +26,7 @@
 ; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5
 ; SI: s_endpgm
 define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
   %val0 = load float, float addrspace(1)* %in.gep.0, align 4
@@ -47,7 +47,7 @@
 ; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset1:255
 ; SI: s_endpgm
 define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
   %val0 = load float, float addrspace(1)* %in.gep.0, align 4
@@ -67,7 +67,7 @@
 ; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127
 ; SI: s_endpgm
 define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
   %val0 = load double, double addrspace(1)* %in.gep.0, align 8
@@ -86,7 +86,7 @@
 ; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:8
 ; SI: s_endpgm
 define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
-  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
   %val = load double, double addrspace(1)* %in.gep, align 8
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
@@ -98,19 +98,10 @@
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.y() #1
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #1
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.y() #1
-
-; Function Attrs: convergent nounwind
-declare void @llvm.AMDGPU.barrier.local() #2
+declare i32 @llvm.amdgcn.workitem.id.y() #1
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/fabs.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/fabs.f64.ll
+++ test/CodeGen/AMDGPU/fabs.f64.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 declare double @fabs(double) readnone
 declare double @llvm.fabs.f64(double) readnone
@@ -11,7 +11,7 @@
 ; SI: v_and_b32
 ; SI: s_endpgm
 define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %tidext = sext i32 %tid to i64
   %gep = getelementptr double, double addrspace(1)* %in, i64 %tidext
   %val = load double, double addrspace(1)* %gep, align 8
Index: test/CodeGen/AMDGPU/flat-address-space.ll
===================================================================
--- test/CodeGen/AMDGPU/flat-address-space.ll
+++ test/CodeGen/AMDGPU/flat-address-space.ll
@@ -127,9 +127,6 @@
   ret void
 }
 
-declare void @llvm.AMDGPU.barrier.local() #1
-declare i32 @llvm.r600.read.tidig.x() #3
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind convergent }
 attributes #3 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/fma-combine.ll
===================================================================
--- test/CodeGen/AMDGPU/fma-combine.ll
+++ test/CodeGen/AMDGPU/fma-combine.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare double @llvm.fabs.f64(double) #0
 declare double @llvm.fma.f64(double, double, double) #0
 declare float @llvm.fma.f32(float, float, float) #0
@@ -14,7 +14,7 @@
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -42,7 +42,7 @@
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
 define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -71,7 +71,7 @@
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -95,7 +95,7 @@
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -123,7 +123,7 @@
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
 define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -152,7 +152,7 @@
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -180,7 +180,7 @@
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
 define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -209,7 +209,7 @@
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -238,7 +238,7 @@
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
 define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -272,7 +272,7 @@
 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}}
 ; SI: s_endpgm
 define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -307,7 +307,7 @@
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
@@ -342,7 +342,7 @@
 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
Index: test/CodeGen/AMDGPU/fmax_legacy.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/fmax_legacy.f64.ll
+++ test/CodeGen/AMDGPU/fmax_legacy.f64.ll
@@ -1,11 +1,11 @@
 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; Make sure we don't try to form FMAX_LEGACY nodes with f64
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; FUNC-LABEL: @test_fmax_legacy_uge_f64
 define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
@@ -20,7 +20,7 @@
 
 ; FUNC-LABEL: @test_fmax_legacy_oge_f64
 define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
@@ -35,7 +35,7 @@
 
 ; FUNC-LABEL: @test_fmax_legacy_ugt_f64
 define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
@@ -50,7 +50,7 @@
 
 ; FUNC-LABEL: @test_fmax_legacy_ogt_f64
 define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
Index: test/CodeGen/AMDGPU/fmin_legacy.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/fmin_legacy.f64.ll
+++ test/CodeGen/AMDGPU/fmin_legacy.f64.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; FUNC-LABEL: @test_fmin_legacy_f64
 define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double> inreg %reg0) #0 {
@@ -15,7 +15,7 @@
 
 ; FUNC-LABEL: @test_fmin_legacy_ule_f64
 define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
@@ -30,7 +30,7 @@
 
 ; FUNC-LABEL: @test_fmin_legacy_ole_f64
 define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
@@ -45,7 +45,7 @@
 
 ; FUNC-LABEL: @test_fmin_legacy_olt_f64
 define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
@@ -60,7 +60,7 @@
 
 ; FUNC-LABEL: @test_fmin_legacy_ult_f64
 define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
Index: test/CodeGen/AMDGPU/fmuladd.ll
===================================================================
--- test/CodeGen/AMDGPU/fmuladd.ll
+++ test/CodeGen/AMDGPU/fmuladd.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
 
 declare float @llvm.fmuladd.f32(float, float, float)
 declare double @llvm.fmuladd.f64(double, double, double)
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.fabs.f32(float) nounwind readnone
 
 ; CHECK-LABEL: {{^}}fmuladd_f32:
@@ -37,7 +37,7 @@
 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
 ; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -56,7 +56,7 @@
 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
 ; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -77,7 +77,7 @@
 define void @fadd_a_a_b_f32(float addrspace(1)* %out,
                             float addrspace(1)* %in1,
                             float addrspace(1)* %in2) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -99,7 +99,7 @@
 define void @fadd_b_a_a_f32(float addrspace(1)* %out,
                             float addrspace(1)* %in1,
                             float addrspace(1)* %in2) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -119,7 +119,7 @@
 ; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
 ; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -139,7 +139,7 @@
 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
 ; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -161,7 +161,7 @@
 ; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
 ; CHECK: buffer_store_dword [[R2]]
 define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -183,7 +183,7 @@
 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
 ; CHECK: buffer_store_dword [[RESULT]]
 define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Index: test/CodeGen/AMDGPU/fp-classify.ll
===================================================================
--- test/CodeGen/AMDGPU/fp-classify.ll
+++ test/CodeGen/AMDGPU/fp-classify.ll
@@ -1,9 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
-declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
-declare i32 @llvm.r600.read.tidig.x() #1
 declare float @llvm.fabs.f32(float) #1
 declare double @llvm.fabs.f64(double) #1
 
Index: test/CodeGen/AMDGPU/fp_to_sint.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/fp_to_sint.f64.ll
+++ test/CodeGen/AMDGPU/fp_to_sint.f64.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; FUNC-LABEL: @fp_to_sint_f64_i32
 ; SI: v_cvt_i32_f64_e32
@@ -47,7 +47,7 @@
 ; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
 ; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
 define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
   %val = load double, double addrspace(1)* %gep, align 8
   %cast = fptosi double %val to i64
Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll
===================================================================
--- test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
 
 ; Tests for indirect addressing on SI, which is implemented using dynamic
 ; indexing of vectors.
@@ -87,7 +87,7 @@
 ; CHECK: s_cbranch_execnz
 define void @extract_neg_offset_vgpr(i32 addrspace(1)* %out) {
 entry:
-  %id = call i32 @llvm.r600.read.tidig.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
   %index = add i32 %id, -512
   %value = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
   store i32 %value, i32 addrspace(1)* %out
@@ -152,7 +152,7 @@
 ; CHECK: s_cbranch_execnz
 define void @insert_neg_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
 entry:
-  %id = call i32 @llvm.r600.read.tidig.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
   %index = add i32 %id, -512
   %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
   store <4 x i32> %value, <4 x i32> addrspace(1)* %out
@@ -167,12 +167,13 @@
 ; CHECK: s_cbranch_execnz
 define void @insert_neg_inline_offset_vgpr(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out) {
 entry:
-  %id = call i32 @llvm.r600.read.tidig.x() #1
+  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
   %index = add i32 %id, -16
   %value = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 5, i32 %index
   store <4 x i32> %value, <4 x i32> addrspace(1)* %out
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/indirect-private-64.ll
===================================================================
--- test/CodeGen/AMDGPU/indirect-private-64.ll
+++ test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -3,8 +3,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
 
-
-declare void @llvm.AMDGPU.barrier.local() convergent nounwind
+declare void @llvm.amdgcn.s.barrier() #1
 
 ; SI-LABEL: {{^}}private_access_f64_alloca:
 
@@ -13,12 +12,12 @@
 
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
-define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
+define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) #1 {
   %val = load double, double addrspace(1)* %in, align 8
   %array = alloca double, i32 16, align 8
   %ptr = getelementptr inbounds double, double* %array, i32 %b
   store double %val, double* %ptr, align 8
-  call void @llvm.AMDGPU.barrier.local() convergent nounwind
+  call void @llvm.amdgcn.s.barrier()
   %result = load double, double* %ptr, align 8
   store double %result, double addrspace(1)* %out, align 8
   ret void
@@ -33,12 +32,12 @@
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
 ; SI-PROMOTE: ds_read_b64
-define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
+define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 {
   %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
   %array = alloca <2 x double>, i32 16, align 16
   %ptr = getelementptr inbounds <2 x double>, <2 x double>* %array, i32 %b
   store <2 x double> %val, <2 x double>* %ptr, align 16
-  call void @llvm.AMDGPU.barrier.local() convergent nounwind
+  call void @llvm.amdgcn.s.barrier()
   %result = load <2 x double>, <2 x double>* %ptr, align 16
   store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
   ret void
@@ -51,12 +50,12 @@
 
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
-define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
+define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) #1 {
   %val = load i64, i64 addrspace(1)* %in, align 8
   %array = alloca i64, i32 16, align 8
   %ptr = getelementptr inbounds i64, i64* %array, i32 %b
   store i64 %val, i64* %ptr, align 8
-  call void @llvm.AMDGPU.barrier.local() convergent nounwind
+  call void @llvm.amdgcn.s.barrier()
   %result = load i64, i64* %ptr, align 8
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
@@ -71,13 +70,16 @@
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
 ; SI-PROMOTE: ds_read_b64
-define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
+define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 {
   %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
   %array = alloca <2 x i64>, i32 16, align 16
   %ptr = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i32 %b
   store <2 x i64> %val, <2 x i64>* %ptr, align 16
-  call void @llvm.AMDGPU.barrier.local() convergent nounwind
+  call void @llvm.amdgcn.s.barrier()
   %result = load <2 x i64>, <2 x i64>* %ptr, align 16
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
   ret void
 }
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind convergent }
Index: test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
@@ -2,7 +2,7 @@
 
 declare i1 @llvm.amdgcn.class.f32(float, i32) #1
 declare i1 @llvm.amdgcn.class.f64(double, i32) #1
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare float @llvm.fabs.f32(float) #1
 declare double @llvm.fabs.f64(double) #1
 
@@ -133,7 +133,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %a = load float, float addrspace(1)* %gep.in
@@ -151,7 +151,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %b = load i32, i32 addrspace(1)* %gep.in
@@ -171,7 +171,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %b = load i32, i32 addrspace(1)* %gep.in
@@ -291,7 +291,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %a = load double, double addrspace(1)* %in
@@ -307,7 +307,7 @@
 ; SI: v_cmp_class_f64_e32 vcc,
 ; SI: s_endpgm
 define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %b = load i32, i32 addrspace(1)* %gep.in
@@ -322,7 +322,7 @@
 ; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
 ; SI: s_endpgm
 define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %b = load i32, i32 addrspace(1)* %gep.in
@@ -339,7 +339,7 @@
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
 define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %a = load float, float addrspace(1)* %gep.in
@@ -359,7 +359,7 @@
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
 define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %a = load float, float addrspace(1)* %gep.in
@@ -382,7 +382,7 @@
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
 define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %a = load float, float addrspace(1)* %gep.in
@@ -417,7 +417,7 @@
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
 define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %a = load float, float addrspace(1)* %gep.in
@@ -437,7 +437,7 @@
 ; SI-NOT: v_cmp_class
 ; SI: s_endpgm
 define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %a = load float, float addrspace(1)* %gep.in
@@ -457,7 +457,7 @@
 ; SI: s_or_b64
 ; SI: s_endpgm
 define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %a = load float, float addrspace(1)* %gep.in
Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
@@ -3,7 +3,7 @@
 
 ; FIXME: Enable for VI.
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) nounwind readnone
 declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind readnone
 
@@ -115,7 +115,7 @@
 ; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]]
 ; SI: s_endpgm
 define void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
   %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
@@ -152,7 +152,7 @@
 ; SI: s_endpgm
 define void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind {
 entry:
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
   %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) nounwind readnone
 declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) nounwind readnone
 declare float @llvm.fabs.f32(float) nounwind readnone
@@ -12,7 +12,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
@@ -32,7 +32,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
@@ -52,7 +52,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
@@ -72,7 +72,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
@@ -92,7 +92,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
   %b = load float, float addrspace(1)* %gep, align 4
@@ -110,7 +110,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
   %b = load float, float addrspace(1)* %gep, align 4
@@ -128,7 +128,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
   %a = load float, float addrspace(1)* %gep, align 4
@@ -146,7 +146,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
   %a = load float, float addrspace(1)* %gep, align 4
@@ -164,7 +164,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
   %b = load double, double addrspace(1)* %gep, align 8
@@ -182,7 +182,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
   %b = load double, double addrspace(1)* %gep, align 8
@@ -200,7 +200,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
   %a = load double, double addrspace(1)* %gep, align 8
@@ -218,7 +218,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
   %a = load double, double addrspace(1)* %gep, align 8
@@ -293,7 +293,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %a = load float, float addrspace(1)* %gep.0, align 4
 
@@ -309,7 +309,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %a = load float, float addrspace(1)* %gep.0, align 4
 
@@ -326,7 +326,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
@@ -348,7 +348,7 @@
 ; SI: buffer_store_dword [[RESULT0]]
 ; SI: s_endpgm
 define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -6,7 +6,7 @@
 ; GCN: s_barrier
 define void @test_barrier(i32 addrspace(1)* %out) #0 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x()
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp
   store i32 %tmp, i32 addrspace(1)* %tmp1
   call void @llvm.amdgcn.s.barrier()
@@ -20,7 +20,7 @@
 }
 
 declare void @llvm.amdgcn.s.barrier() #1
-declare i32 @llvm.r600.read.tidig.x() #2
+declare i32 @llvm.amdgcn.workitem.id.x() #2
 declare i32 @llvm.r600.read.local.size.x() #2
 
 attributes #0 = { nounwind }
Index: test/CodeGen/AMDGPU/llvm.round.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -27,7 +27,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
   %x = load double, double addrspace(1)* %gep
@@ -60,7 +60,7 @@
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 declare double @llvm.round.f64(double) #1
 declare <2 x double> @llvm.round.v2f64(<2 x double>) #1
Index: test/CodeGen/AMDGPU/mad-combine.ll
===================================================================
--- test/CodeGen/AMDGPU/mad-combine.ll
+++ test/CodeGen/AMDGPU/mad-combine.ll
@@ -8,7 +8,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=verde -mattr=+fp32-denormals -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare float @llvm.fabs.f32(float) #0
 declare float @llvm.fma.f32(float, float, float) #0
 declare float @llvm.fmuladd.f32(float, float, float) #0
@@ -32,7 +32,7 @@
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; SI-STD: buffer_store_dword [[C]]
 define void @combine_to_mad_f32_0(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -71,7 +71,7 @@
 ; SI-STD-DAG: buffer_store_dword [[D]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
 define void @combine_to_mad_f32_0_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -108,7 +108,7 @@
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; SI-STD: buffer_store_dword [[C]]
 define void @combine_to_mad_f32_1(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -138,7 +138,7 @@
 
 ; SI: buffer_store_dword [[RESULT]]
 define void @combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -175,7 +175,7 @@
 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
 define void @combine_to_mad_fsub_0_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -210,7 +210,7 @@
 
 ; SI: buffer_store_dword [[RESULT]]
 define void @combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -246,7 +246,7 @@
 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
 define void @combine_to_mad_fsub_1_f32_2use(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -282,7 +282,7 @@
 
 ; SI: buffer_store_dword [[RESULT]]
 define void @combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -320,7 +320,7 @@
 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
 define void @combine_to_mad_fsub_2_f32_2uses_neg(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -363,7 +363,7 @@
 ; SI-DAG: buffer_store_dword [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}}
 ; SI: s_endpgm
 define void @combine_to_mad_fsub_2_f32_2uses_mul(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -408,7 +408,7 @@
 
 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 define void @aggressive_combine_to_mad_fsub_0_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -454,7 +454,7 @@
 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: s_endpgm
 define void @aggressive_combine_to_mad_fsub_1_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -500,7 +500,7 @@
 ; SI-STD: buffer_store_dword [[TMP]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: s_endpgm
 define void @aggressive_combine_to_mad_fsub_2_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
@@ -546,7 +546,7 @@
 ; SI: buffer_store_dword [[RESULT]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: s_endpgm
 define void @aggressive_combine_to_mad_fsub_3_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
Index: test/CodeGen/AMDGPU/mad-sub.ll
===================================================================
--- test/CodeGen/AMDGPU/mad-sub.ll
+++ test/CodeGen/AMDGPU/mad-sub.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare float @llvm.fabs.f32(float) #0
 
 ; FUNC-LABEL: {{^}}mad_sub_f32:
@@ -10,7 +10,7 @@
 ; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
 ; SI: buffer_store_dword [[RESULT]]
 define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -34,7 +34,7 @@
 ; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
 ; SI: buffer_store_dword [[RESULT]]
 define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -55,7 +55,7 @@
 ; SI: v_mul_f64
 ; SI: v_add_f64
 define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr double, double addrspace(1)* %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -79,7 +79,7 @@
 ; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
 ; SI: buffer_store_dword [[RESULT]]
 define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -104,7 +104,7 @@
 ; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
 ; SI: buffer_store_dword [[RESULT]]
 define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -125,7 +125,7 @@
 ; FUNC-LABEL: {{^}}neg_neg_mad_f32:
 ; SI: v_mac_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -151,7 +151,7 @@
 ; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
 ; SI: buffer_store_dword [[RESULT]]
 define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid.ext = sext i32 %tid to i64
   %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 %tid.ext
   %add1 = add i64 %tid.ext, 1
@@ -175,7 +175,7 @@
 ; SI: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
 ; SI: buffer_store_dword [[R2]]
 define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -196,7 +196,7 @@
 ; SI: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
 ; SI: buffer_store_dword [[RESULT]]
 define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
Index: test/CodeGen/AMDGPU/madak.ll
===================================================================
--- test/CodeGen/AMDGPU/madak.ll
+++ test/CodeGen/AMDGPU/madak.ll
@@ -3,7 +3,7 @@
 
 ; FIXME: Enable VI
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.fabs.f32(float) nounwind readnone
 
 ; GCN-LABEL: {{^}}madak_f32:
@@ -11,7 +11,7 @@
 ; GCN: buffer_load_dword [[VB:v[0-9]+]]
 ; GCN: v_madak_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 define void @madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -38,7 +38,7 @@
 ; GCN-DAG: v_mac_f32_e32 [[VK]], [[VC]], [[VA]]
 ; GCN: s_endpgm
 define void @madak_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
@@ -65,7 +65,7 @@
 ; GCN: buffer_load_dword [[VA:v[0-9]+]]
 ; GCN: v_madak_f32_e32 {{v[0-9]+}}, 4.0, [[VA]], 0x41200000
 define void @madak_m_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
@@ -85,7 +85,7 @@
 ; GCN: buffer_load_dword [[VB:v[0-9]+]]
 ; GCN: v_mad_f32 {{v[0-9]+}}, [[VA]], [[VB]], 4.0
 define void @madak_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -107,7 +107,7 @@
 ; GCN-NOT: v_madak_f32
 ; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
 define void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
@@ -126,7 +126,7 @@
 ; GCN-NOT: v_madak_f32
 ; GCN: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
 define void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
@@ -154,7 +154,7 @@
 ; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
 ; GCN: s_endpgm
 define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -176,7 +176,7 @@
 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
 ; GCN: s_endpgm
 define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
Index: test/CodeGen/AMDGPU/madmk.ll
===================================================================
--- test/CodeGen/AMDGPU/madmk.ll
+++ test/CodeGen/AMDGPU/madmk.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
 ; XUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.fabs.f32(float) nounwind readnone
 
 ; GCN-LABEL: {{^}}madmk_f32:
@@ -9,7 +9,7 @@
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_madmk_f32_e32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 define void @madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -32,7 +32,7 @@
 ; GCN-DAG: v_mac_f32_e32 [[VC]], [[VK]], [[VA]]
 ; GCN: s_endpgm
 define void @madmk_2_use_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
@@ -61,7 +61,7 @@
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mac_f32_e32 [[VB]], 4.0, [[VA]]
 define void @madmk_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -80,7 +80,7 @@
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
 define void @s_s_madmk_f32(float addrspace(1)* noalias %out, float %a, float %b) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
   %mul = fmul float %a, 10.0
@@ -94,7 +94,7 @@
 ; GCN: v_mad_f32
 ; GCN: s_endpgm
 define void @v_s_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %b) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
   %a = load float, float addrspace(1)* %gep.0, align 4
@@ -110,7 +110,7 @@
 ; GCN: v_mac_f32_e32
 ; GCN: s_endpgm
 define void @scalar_vector_madmk_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in, float %a) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
   %b = load float, float addrspace(1)* %gep.0, align 4
@@ -126,7 +126,7 @@
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
 define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -147,7 +147,7 @@
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, |{{[sv][0-9]+}}|
 define void @no_madmk_src2_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
@@ -168,7 +168,7 @@
 ; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
 ; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], [[A]], 2.0
 define void @madmk_add_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
Index: test/CodeGen/AMDGPU/max.ll
===================================================================
--- test/CodeGen/AMDGPU/max.ll
+++ test/CodeGen/AMDGPU/max.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; FUNC-LABEL: {{^}}v_test_imax_sge_i32:
 ; SI: v_max_i32_e32
 define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -23,7 +23,7 @@
 ; SI: v_max_i32_e32
 ; SI: v_max_i32_e32
 define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep0 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %out, i32 %tid
@@ -58,7 +58,7 @@
 ; SI: buffer_load_sbyte
 ; SI: v_max_i32_e32
 define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
@@ -91,7 +91,7 @@
 ; FUNC-LABEL: @v_test_imax_sgt_i32
 ; SI: v_max_i32_e32
 define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -115,7 +115,7 @@
 ; FUNC-LABEL: @v_test_umax_uge_i32
 ; SI: v_max_u32_e32
 define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
@@ -154,7 +154,7 @@
 ; SI: buffer_load_ubyte
 ; SI: v_max_u32_e32
 define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
@@ -169,7 +169,7 @@
 ; FUNC-LABEL: @v_test_umax_ugt_i32
 ; SI: v_max_u32_e32
 define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
Index: test/CodeGen/AMDGPU/merge-stores.ll
===================================================================
--- test/CodeGen/AMDGPU/merge-stores.ll
+++ test/CodeGen/AMDGPU/merge-stores.ll
@@ -376,7 +376,7 @@
   %w = load i32, i32 addrspace(1)* %in.gep.3
 
   ; Make sure the barrier doesn't stop this
-  tail call void @llvm.AMDGPU.barrier.local() #1
+  tail call void @llvm.amdgcn.s.barrier() #1
 
   store i32 %w, i32 addrspace(1)* %out.gep.3
   store i32 %z, i32 addrspace(1)* %out.gep.2
@@ -413,7 +413,7 @@
   %w = load i32, i32 addrspace(1)* %in.gep.3
 
   ; Make sure the barrier doesn't stop this
-  tail call void @llvm.AMDGPU.barrier.local() #1
+  tail call void @llvm.amdgcn.s.barrier() #1
 
   store i32 %w, i32 addrspace(1)* %out
   store i32 %z, i32 addrspace(1)* %out.gep.1
@@ -705,7 +705,7 @@
   ret void
 }
 
-declare void @llvm.AMDGPU.barrier.local() #1
+declare void @llvm.amdgcn.s.barrier() #1
 
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
Index: test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
===================================================================
--- test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
+++ test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
@@ -7,12 +7,12 @@
 ; Check that moving the pointer out of the resource descriptor to
 ; vaddr works for atomics.
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; GCN-LABEL: {{^}}atomic_max_i32:
 ; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}}
 define void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
   %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
   %xor = xor i32 %tid, 1
@@ -32,7 +32,7 @@
 ; GCN-LABEL: {{^}}atomic_max_i32_noret:
 ; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}}
 define void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
   %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
   %xor = xor i32 %tid, 1
Index: test/CodeGen/AMDGPU/mubuf.ll
===================================================================
--- test/CodeGen/AMDGPU/mubuf.ll
+++ test/CodeGen/AMDGPU/mubuf.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.r600.read.tidig.x() readnone
+declare i32 @llvm.amdgcn.workitem.id.x() readnone
 
 ;;;==========================================================================;;;
 ;;; MUBUF LOAD TESTS
@@ -170,7 +170,7 @@
 ; CHECK-LABEL: {{^}}store_vgpr_ptr:
 ; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
 define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   store i32 99, i32 addrspace(1)* %out.gep, align 4
   ret void
Index: test/CodeGen/AMDGPU/no-shrink-extloads.ll
===================================================================
--- test/CodeGen/AMDGPU/no-shrink-extloads.ll
+++ test/CodeGen/AMDGPU/no-shrink-extloads.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; Make sure we don't turn the 32-bit argument load into a 16-bit
 ; load. There aren't extending scalar lods, so that would require
@@ -22,7 +22,7 @@
 ; SI: buffer_load_dword v
 ; SI: buffer_store_short v
 define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
   %load = load i32, i32 addrspace(1)* %gep.in
@@ -44,7 +44,7 @@
 ; SI: buffer_load_dword v
 ; SI: buffer_store_byte v
 define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
   %load = load i32, i32 addrspace(1)* %gep.in
@@ -66,7 +66,7 @@
 ; SI: buffer_load_dword v
 ; SI: buffer_store_byte v
 define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
   %load = load i32, i32 addrspace(1)* %gep.in
@@ -88,7 +88,7 @@
 ; SI: buffer_load_dword v
 ; SI: buffer_store_dword v
 define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %load = load i64, i64 addrspace(1)* %gep.in
@@ -111,7 +111,7 @@
 ; SI: buffer_load_dword v
 ; SI: buffer_store_dword v
 define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %load = load i64, i64 addrspace(1)* %gep.in
@@ -135,7 +135,7 @@
 ; SI: buffer_load_ubyte v
 ; SI: buffer_store_byte v
 define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
   %load = load i16, i16 addrspace(1)* %gep.in
@@ -158,7 +158,7 @@
 ; SI: buffer_load_dword v
 ; SI: buffer_store_byte v
 define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
   %load = load i64, i64 addrspace(1)* %gep.in
@@ -181,7 +181,7 @@
 ; SI: buffer_load_dword v
 ; SI: buffer_store_byte v
 define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
   %load = load i64, i64 addrspace(1)* %gep.in
Index: test/CodeGen/AMDGPU/operand-folding.ll
===================================================================
--- test/CodeGen/AMDGPU/operand-folding.ll
+++ test/CodeGen/AMDGPU/operand-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
 
 ; CHECK-LABEL: {{^}}fold_sgpr:
 ; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
@@ -8,7 +8,7 @@
   br i1 %tmp0, label %if, label %endif
 
 if:
-  %id = call i32 @llvm.r600.read.tidig.x()
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
   %offset = add i32 %fold, %id
   %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
   store i32 0, i32 addrspace(1)* %tmp1
@@ -27,7 +27,7 @@
   br i1 %tmp0, label %if, label %endif
 
 if:
-  %id = call i32 @llvm.r600.read.tidig.x()
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
   %val = or i32 %id, %fold
   store i32 %val, i32 addrspace(1)* %out
   br label %endif
@@ -63,7 +63,7 @@
 
 define void @vector_inline(<4 x i32> addrspace(1)* %out) {
 entry:
-  %tmp0 = call i32 @llvm.r600.read.tidig.x()
+  %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp0, 1
   %tmp2 = add i32 %tmp0, 2
   %tmp3 = add i32 %tmp0, 3
@@ -82,7 +82,7 @@
 
 define void @imm_one_use(i32 addrspace(1)* %out) {
 entry:
-  %tmp0 = call i32 @llvm.r600.read.tidig.x()
+  %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = xor i32 %tmp0, 100
   store i32 %tmp1, i32 addrspace(1)* %out
   ret void
@@ -96,7 +96,7 @@
 
 define void @vector_imm(<4 x i32> addrspace(1)* %out) {
 entry:
-  %tmp0 = call i32 @llvm.r600.read.tidig.x()
+  %tmp0 = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp0, 1
   %tmp2 = add i32 %tmp0, 2
   %tmp3 = add i32 %tmp0, 3
@@ -109,5 +109,6 @@
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #0
-attributes #0 = { readnone }
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+attributes #0 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
===================================================================
--- test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
+++ test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
@@ -8,10 +8,10 @@
 ; During live interval construction, the first sub register def is
 ; incorrectly marked as dead.
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 define void @dead_def_subregister(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %val = load i64, i64 addrspace(1)* %in.gep
 
Index: test/CodeGen/AMDGPU/rsq.ll
===================================================================
--- test/CodeGen/AMDGPU/rsq.ll
+++ test/CodeGen/AMDGPU/rsq.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 declare float @llvm.sqrt.f32(float) nounwind readnone
 declare double @llvm.sqrt.f64(double) nounwind readnone
 
@@ -56,7 +56,7 @@
 
 ; SI: s_endpgm
 define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
Index: test/CodeGen/AMDGPU/salu-to-valu.ll
===================================================================
--- test/CodeGen/AMDGPU/salu-to-valu.ll
+++ test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -2,8 +2,8 @@
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s
 
-declare i32 @llvm.r600.read.tidig.x() #0
-declare i32 @llvm.r600.read.tidig.y() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.y() #0
 
 ; In this test both the pointer and the offset operands to the
 ; BUFFER_LOAD instructions end up being stored in vgprs.  This
@@ -26,8 +26,8 @@
 
 define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x()
-  %tmp1 = call i32 @llvm.r600.read.tidig.y()
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp1 = call i32 @llvm.amdgcn.workitem.id.y()
   %tmp2 = sext i32 %tmp to i64
   %tmp3 = sext i32 %tmp1 to i64
   br label %loop
@@ -87,7 +87,7 @@
 ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp, 4
   %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
   %tmp3 = load i32, i32 addrspace(2)* %tmp2
@@ -107,7 +107,7 @@
 ; GCN-HSA: flat_store_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp
   %tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000
   %tmp4 = load i32, i32 addrspace(2)* %tmp3
@@ -127,7 +127,7 @@
 ; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp
   %tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000
   %tmp4 = load i64, i64 addrspace(2)* %tmp3
@@ -149,7 +149,7 @@
 ; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp
   %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234
   %tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3
@@ -185,7 +185,7 @@
 ; GCN-HSA: flat_load_dwordx4
 define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp
   %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234
   %tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3
@@ -234,7 +234,7 @@
 ; GCN: s_endpgm
 define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp
   %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234
   %tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3
@@ -251,7 +251,7 @@
 ; GCN-HSA: flat_store_dword [[ADD]]
 define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp, 4
   %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
   %tmp3 = load i32, i32 addrspace(2)* %tmp2
@@ -265,7 +265,7 @@
 ; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}}
 define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp, 4
   %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255
   %tmp3 = load i32, i32 addrspace(2)* %tmp2
@@ -279,7 +279,7 @@
 ; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}]
 define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = add i32 %tmp, 4
   %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256
   %tmp3 = load i32, i32 addrspace(2)* %tmp2
@@ -294,7 +294,7 @@
 ; GCN-HSA: flat_load_dwordx4
 define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
 entry:
-  %tmp0 = tail call i32 @llvm.r600.read.tidig.x()
+  %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
   %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
   %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
@@ -317,7 +317,7 @@
 ; GCN-HSA: flat_load_dwordx4
 define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
 entry:
-  %tmp0 = tail call i32 @llvm.r600.read.tidig.x()
+  %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
   %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
   %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
@@ -354,7 +354,7 @@
 ; GCN-HSA: flat_load_dwordx4
 define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
 entry:
-  %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
   %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
   %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
@@ -389,7 +389,7 @@
 ; GCN-HSA: flat_load_dwordx4
 define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
 entry:
-  %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
   %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
   %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
Index: test/CodeGen/AMDGPU/schedule-global-loads.ll
===================================================================
--- test/CodeGen/AMDGPU/schedule-global-loads.ll
+++ test/CodeGen/AMDGPU/schedule-global-loads.ll
@@ -1,8 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
 
 
-declare i32 @llvm.r600.read.tidig.x() #1
-
 ; FIXME: This currently doesn't do a great job of clustering the
 ; loads, which end up with extra moves between them. Right now, it
 ; seems the only things areLoadsFromSameBasePtr is accomplishing is
Index: test/CodeGen/AMDGPU/sgpr-control-flow.ll
===================================================================
--- test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -40,7 +40,7 @@
 
 define void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 entry:
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %tid_f = uitofp i32 %tid to float
   %tmp1 = fcmp ueq float %tid_f, 0.0
   br i1 %tmp1, label %if, label %else
@@ -77,7 +77,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
 entry:
-  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
   %tmp1 = icmp eq i32 %tid, 0
   br i1 %tmp1, label %if, label %else
 
@@ -100,6 +100,6 @@
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { readnone }
Index: test/CodeGen/AMDGPU/shl_add_constant.ll
===================================================================
--- test/CodeGen/AMDGPU/shl_add_constant.ll
+++ test/CodeGen/AMDGPU/shl_add_constant.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Test with inline immediate
 
@@ -10,7 +10,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
   %val = load i32, i32 addrspace(1)* %ptr, align 4
   %add = add i32 %val, 9
@@ -26,7 +26,7 @@
 ; SI-DAG: buffer_store_dword [[SHLREG]]
 ; SI: s_endpgm
 define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
   %val = load i32, i32 addrspace(1)* %ptr, align 4
   %add = add i32 %val, 9
@@ -44,7 +44,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
   %val = load i32, i32 addrspace(1)* %ptr, align 4
   %shl = add i32 %val, 999
Index: test/CodeGen/AMDGPU/shl_add_ptr.ll
===================================================================
--- test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -7,7 +7,7 @@
 ; LDS globals.
 
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 @lds0 = addrspace(3) global [512 x float] undef, align 4
 @lds1 = addrspace(3) global [512 x float] undef, align 4
@@ -20,7 +20,7 @@
 ; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
 ; SI: s_endpgm
 define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -40,7 +40,7 @@
 ; SI-DAG: buffer_store_dword [[ADDUSE]]
 ; SI: s_endpgm
 define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -56,7 +56,7 @@
 ; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
 ; SI: s_endpgm
 define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 65535
   %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
   %val0 = load i8, i8 addrspace(3)* %arrayidx0
@@ -74,7 +74,7 @@
 ; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
 ; SI: s_endpgm
 define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 64
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
   %val0 = load float, float addrspace(3)* %arrayidx0, align 4
@@ -90,7 +90,7 @@
 ; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
   store float 1.0, float addrspace(3)* %arrayidx0, align 4
@@ -105,7 +105,7 @@
 @lds2 = addrspace(3) global [512 x i32] undef, align 4
 
 ; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-;   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+;   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
 ;   %idx.0 = add nsw i32 %tid.x, 2
 ;   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
 ;   %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
@@ -120,7 +120,7 @@
 ; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
@@ -135,7 +135,7 @@
 ; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -149,7 +149,7 @@
 ; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -163,7 +163,7 @@
 ; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -177,7 +177,7 @@
 ; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -191,7 +191,7 @@
 ; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -205,7 +205,7 @@
 ; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -215,7 +215,7 @@
 }
 
 ; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-;   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+;   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
 ;   %idx.0 = add nsw i32 %tid.x, 2
 ;   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
 ;   %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -229,7 +229,7 @@
 ; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -243,7 +243,7 @@
 ; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -257,7 +257,7 @@
 ; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
@@ -271,7 +271,7 @@
 ; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
 ; SI: s_endpgm
 define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
-  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
   %val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
Index: test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
===================================================================
--- test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -2,7 +2,7 @@
 
 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
 declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
-declare void @llvm.AMDGPU.barrier.local() #2
+declare void @llvm.amdgcn.s.barrier() #2
 
 
 @stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
@@ -61,7 +61,7 @@
 
   %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
   store i32 99, i32 addrspace(1)* %gptr, align 4
-  call void @llvm.AMDGPU.barrier.local() #2
+  call void @llvm.amdgcn.s.barrier() #2
   %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
Index: test/CodeGen/AMDGPU/sint_to_fp.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; SI-LABEL: {{^}}sint_to_fp_i32_to_f64
 ; SI: v_cvt_f64_i32_e32
@@ -52,7 +52,7 @@
 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %val = load i64, i64 addrspace(1)* %gep, align 8
   %result = sitofp i64 %val to double
Index: test/CodeGen/AMDGPU/sint_to_fp.i64.ll
===================================================================
--- test/CodeGen/AMDGPU/sint_to_fp.i64.ll
+++ test/CodeGen/AMDGPU/sint_to_fp.i64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
 
 ; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600
@@ -28,7 +28,7 @@
 ; GCN: v_cndmask_b32_e32 [[SIGN_SEL:v[0-9]+]],
 ; GCN: {{buffer|flat}}_store_dword [[SIGN_SEL]]
 define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
   %val = load i64, i64 addrspace(1)* %in.gep
@@ -46,7 +46,7 @@
 
 ; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64:
 define void @v_sint_to_fp_v4i64(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
   %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
@@ -55,7 +55,7 @@
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/split-scalar-i64-add.ll
===================================================================
--- test/CodeGen/AMDGPU/split-scalar-i64-add.ll
+++ test/CodeGen/AMDGPU/split-scalar-i64-add.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() readnone
+declare i32 @llvm.amdgcn.workitem.id.x() readnone
 
 ; This is broken because the low half of the 64-bit add remains on the
 ; SALU, but the upper half does not. The addc expects the carry bit
@@ -62,7 +62,7 @@
 ; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}}
 ; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc
 define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
-  %tid = call i32 @llvm.r600.read.tidig.x() readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
   %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %load = load i32, i32 addrspace(1)* %gep
   %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
Index: test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
===================================================================
--- test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
+++ test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
@@ -35,9 +35,9 @@
 entry:
   %tmp = tail call i32 @llvm.r600.read.local.size.y()
   %tmp1 = tail call i32 @llvm.r600.read.local.size.z()
-  %tmp2 = tail call i32 @llvm.r600.read.tidig.x()
-  %tmp3 = tail call i32 @llvm.r600.read.tidig.y()
-  %tmp4 = tail call i32 @llvm.r600.read.tidig.z()
+  %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %tmp3 = tail call i32 @llvm.amdgcn.workitem.id.y()
+  %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.z()
   %tmp6 = mul i32 %tmp2, %tmp
   %tmp10 = add i32 %tmp3, %tmp6
   %tmp11 = mul i32 %tmp10, %tmp1
@@ -87,7 +87,7 @@
 declare i32 @llvm.r600.read.local.size.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.r600.read.local.size.y() #1
@@ -96,10 +96,10 @@
 declare i32 @llvm.r600.read.local.size.z() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.y() #1
+declare i32 @llvm.amdgcn.workitem.id.y() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.z() #1
+declare i32 @llvm.amdgcn.workitem.id.z() #1
 
 attributes #0 = { norecurse nounwind }
 attributes #1 = { nounwind readnone }
Index: test/CodeGen/AMDGPU/store-barrier.ll
===================================================================
--- test/CodeGen/AMDGPU/store-barrier.ll
+++ test/CodeGen/AMDGPU/store-barrier.ll
@@ -12,7 +12,7 @@
 ; CHECK: s_barrier
 ; CHECK: s_endpgm
 ; Function Attrs: nounwind
-define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
+define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) #0 {
 bb:
   %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9
   %tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2
@@ -21,7 +21,7 @@
   %tmp16 = add i32 %tmp13, 1
   %tmp17 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp16
   store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
-  tail call void @llvm.AMDGPU.barrier.local() #2
+  tail call void @llvm.amdgcn.s.barrier()
   %tmp25 = load i32, i32 addrspace(1)* %tmp10, align 4
   %tmp26 = sext i32 %tmp25 to i64
   %tmp27 = sext i32 %arg4 to i64
@@ -37,6 +37,7 @@
 }
 
 ; Function Attrs: convergent nounwind
-declare void @llvm.AMDGPU.barrier.local() #2
+declare void @llvm.amdgcn.s.barrier() #1
 
-attributes #2 = { convergent nounwind }
+attributes #0 = { nounwind }
+attributes #1 = { convergent nounwind }
Index: test/CodeGen/AMDGPU/v_cndmask.ll
===================================================================
--- test/CodeGen/AMDGPU/v_cndmask.ll
+++ test/CodeGen/AMDGPU/v_cndmask.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; SI-LABEL: {{^}}v_cnd_nan_nosgpr:
 ; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
@@ -9,7 +9,7 @@
 ; All nan values are converted to 0xffffffff
 ; SI: s_endpgm
 define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
-  %idx = call i32 @llvm.r600.read.tidig.x() #1
+  %idx = call i32 @llvm.amdgcn.workitem.id.x() #1
   %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
   %f = load float, float addrspace(1)* %fptr
   %setcc = icmp ne i32 %c, 0
Index: test/CodeGen/AMDGPU/valu-i1.ll
===================================================================
--- test/CodeGen/AMDGPU/valu-i1.ll
+++ test/CodeGen/AMDGPU/valu-i1.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 
 ; SI-LABEL: @test_if
 ; Make sure the i1 values created by the cfg structurizer pass are
@@ -54,7 +54,7 @@
 ; SI: s_or_b64 exec, exec, [[BR_SREG]]
 ; SI: s_endpgm
 define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %is.0 = icmp ne i32 %tid, 0
   br i1 %is.0, label %store, label %exit
 
@@ -86,7 +86,7 @@
 
 define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
 entry:
-  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %is.0 = icmp ne i32 %tid, 0
   %limit = add i32 %tid, 64
   br i1 %is.0, label %loop, label %exit
@@ -152,7 +152,7 @@
 
 define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
 bb:
-  %tmp = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
   %tmp4 = sext i32 %tmp to i64
   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
   %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
Index: test/CodeGen/AMDGPU/vop-shrink.ll
===================================================================
--- test/CodeGen/AMDGPU/vop-shrink.ll
+++ test/CodeGen/AMDGPU/vop-shrink.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; Test that we correctly commute a sub instruction
 ; FUNC-LABEL: {{^}}sub_rev:
@@ -10,7 +10,7 @@
 
 define void @sub_rev(i32 addrspace(1)* %out, <4 x i32> %sgpr, i32 %cond) {
 entry:
-  %vgpr = call i32 @llvm.r600.read.tidig.x() #1
+  %vgpr = call i32 @llvm.amdgcn.workitem.id.x() #1
   %tmp = icmp eq i32 %cond, 0
   br i1 %tmp, label %if, label %else
 
@@ -37,7 +37,7 @@
 ; SI: v_add_f32_e32 v{{[0-9]+}}, 0x44800000
 define void @add_fold(float addrspace(1)* %out) {
 entry:
-  %tmp = call i32 @llvm.r600.read.tidig.x()
+  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = uitofp i32 %tmp to float
   %tmp2 = fadd float %tmp1, 1.024000e+03
   store float %tmp2, float addrspace(1)* %out
@@ -45,7 +45,7 @@
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { readnone }
Index: test/CodeGen/AMDGPU/wait.ll
===================================================================
--- test/CodeGen/AMDGPU/wait.ll
+++ test/CodeGen/AMDGPU/wait.ll
@@ -18,7 +18,7 @@
   %tmp11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp10, i32 0, i32 %arg6)
   %tmp12 = extractelement <4 x float> %tmp11, i32 0
   %tmp13 = extractelement <4 x float> %tmp11, i32 1
-  call void @llvm.AMDGPU.barrier.global() #1
+  call void @llvm.amdgcn.s.barrier() #1
   %tmp14 = extractelement <4 x float> %tmp11, i32 2
 ;  %tmp15 = extractelement <4 x float> %tmp11, i32 3
   %tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
@@ -71,7 +71,7 @@
 
 
 ; Function Attrs: convergent nounwind
-declare void @llvm.AMDGPU.barrier.global() #1
+declare void @llvm.amdgcn.s.barrier() #1
 
 ; Function Attrs: nounwind readnone
 declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
Index: test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
===================================================================
--- test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
+++ test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
@@ -7,11 +7,11 @@
 ; can do to avoid this.
 
 declare void @llvm.write_register.i32(metadata, i32) #0
-declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 
 define void @write_vgpr_into_sgpr() {
-  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
   call void @llvm.write_register.i32(metadata !0, i32 %tid)
   ret void
 }