Index: llvm/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -615,21 +615,27 @@ return LowerImplicitParameter(DAG, VT, DL, 8); case Intrinsic::r600_read_tgid_x: + case Intrinsic::amdgcn_workgroup_id_x: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T1_X, VT); case Intrinsic::r600_read_tgid_y: + case Intrinsic::amdgcn_workgroup_id_y: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T1_Y, VT); case Intrinsic::r600_read_tgid_z: + case Intrinsic::amdgcn_workgroup_id_z: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T1_Z, VT); case Intrinsic::r600_read_tidig_x: + case Intrinsic::amdgcn_workitem_id_x: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T0_X, VT); case Intrinsic::r600_read_tidig_y: + case Intrinsic::amdgcn_workitem_id_y: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T0_Y, VT); case Intrinsic::r600_read_tidig_z: + case Intrinsic::amdgcn_workitem_id_z: return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass, R600::T0_Z, VT); Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5805,29 +5805,23 @@ return lowerImplicitZextParam(DAG, Op, MVT::i16, SI::KernelInputOffsets::LOCAL_SIZE_Z); case Intrinsic::amdgcn_workgroup_id_x: - case Intrinsic::r600_read_tgid_x: return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_X); case Intrinsic::amdgcn_workgroup_id_y: - case Intrinsic::r600_read_tgid_y: return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_Y); case Intrinsic::amdgcn_workgroup_id_z: - case Intrinsic::r600_read_tgid_z: return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::WORKGROUP_ID_Z); case Intrinsic::amdgcn_workitem_id_x: - case Intrinsic::r600_read_tidig_x: return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, SDLoc(DAG.getEntryNode()), MFI->getArgInfo().WorkItemIDX); case Intrinsic::amdgcn_workitem_id_y: - case Intrinsic::r600_read_tidig_y: return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, SDLoc(DAG.getEntryNode()), MFI->getArgInfo().WorkItemIDY); case Intrinsic::amdgcn_workitem_id_z: - case Intrinsic::r600_read_tidig_z: return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32, SDLoc(DAG.getEntryNode()), MFI->getArgInfo().WorkItemIDZ); Index: llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll +++ llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll @@ -139,95 +139,6 @@ ret void } -; Legacy use of r600 intrinsics by GCN - -; The tgid values are stored in sgprs offset by the number of user -; sgprs. - -; FUNC-LABEL: {{^}}tgid_x_legacy: -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define amdgpu_kernel void @tgid_x_legacy(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tgid.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}tgid_y_legacy: -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -define amdgpu_kernel void @tgid_y_legacy(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tgid.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; FUNC-LABEL: {{^}}tgid_z_legacy: -; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} -; GCN-NOHSA: buffer_store_dword [[VVAL]] - -; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 -; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 -; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 -; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define amdgpu_kernel void @tgid_z_legacy(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tgid.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; GCN-NOHSA: .section .AMDGPU.config -; GCN-NOHSA: .long 47180 -; GCN-NOHSA-NEXT: .long 132{{$}} - -; FUNC-LABEL: {{^}}tidig_x_legacy: -; GCN-NOHSA: buffer_store_dword v0 -define amdgpu_kernel void @tidig_x_legacy(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tidig.x() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; GCN-NOHSA: .section .AMDGPU.config -; GCN-NOHSA: .long 47180 -; GCN-NOHSA-NEXT: .long 2180{{$}} - -; FUNC-LABEL: {{^}}tidig_y_legacy: - -; GCN-NOHSA: buffer_store_dword v1 -define amdgpu_kernel void @tidig_y_legacy(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tidig.y() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; GCN-NOHSA: .section .AMDGPU.config -; GCN-NOHSA: .long 47180 -; GCN-NOHSA-NEXT: .long 4228{{$}} - -; FUNC-LABEL: {{^}}tidig_z_legacy: -; GCN-NOHSA: buffer_store_dword v2 -define amdgpu_kernel void @tidig_z_legacy(i32 addrspace(1)* %out) { -entry: - %0 = call i32 @llvm.r600.read.tidig.z() #0 - store i32 %0, i32 addrspace(1)* %out - ret void -} - declare i32 @llvm.r600.read.ngroups.x() #0 declare i32 @llvm.r600.read.ngroups.y() #0 declare i32 @llvm.r600.read.ngroups.z() #0 @@ -240,12 +151,4 @@ declare i32 @llvm.r600.read.local.size.y() #0 declare i32 @llvm.r600.read.local.size.z() #0 -declare i32 @llvm.r600.read.tgid.x() #0 -declare i32 @llvm.r600.read.tgid.y() #0 -declare i32 @llvm.r600.read.tgid.z() #0 - -declare i32 @llvm.r600.read.tidig.x() #0 -declare i32 @llvm.r600.read.tidig.y() #0 -declare i32 @llvm.r600.read.tidig.z() #0 - attributes #0 = { readnone } Index: llvm/test/CodeGen/AMDGPU/and.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/and.ll +++ llvm/test/CodeGen/AMDGPU/and.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 ; FUNC-LABEL: {{^}}test2: ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} @@ -96,7 +96,7 @@ ; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr: ; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid @@ -112,7 +112,7 @@ ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %b = load i32, i32 addrspace(1)* %gep.b @@ -126,7 +126,7 @@ ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]] ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]] define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %a = load i32, i32 addrspace(1)* %gep.a @@ -138,7 +138,7 @@ ; FUNC-LABEL: {{^}}v_and_constant_i32 ; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}} define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %a = load i32, i32 addrspace(1)* %gep, align 4 %and = and i32 %a, 1234567 @@ -149,7 +149,7 @@ ; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32 ; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}} define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %a = load i32, i32 addrspace(1)* %gep, align 4 %and = and i32 %a, 64 @@ -160,7 +160,7 @@ ; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32 ; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}} define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid %a = load i32, i32 addrspace(1)* %gep, align 4 %and = and i32 %a, -16 @@ -251,7 +251,7 @@ ; SI: v_and_b32 ; SI: v_and_b32 define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a = load i64, i64 addrspace(1)* %gep.a, align 8 %gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid @@ -266,7 +266,7 @@ ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}} ; SI: buffer_store_dwordx2 define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a = load i64, i64 addrspace(1)* %gep.a, align 8 %and = and i64 %a, 1231231234567 @@ -322,7 +322,7 @@ ; SI-NOT: and ; SI: buffer_store_dwordx2 define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a = load i64, i64 addrspace(1)* %gep.a, align 8 %and = and i64 %a, 1234567 @@ -337,7 +337,7 @@ ; SI-NOT: and ; SI: buffer_store_dwordx2 define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a = load i64, i64 addrspace(1)* %gep.a, align 8 %and = and i64 %a, 64 @@ -353,7 +353,7 @@ ; SI-NOT: and ; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}} define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %a = load i64, i64 addrspace(1)* %gep.a, align 8 %and = and i64 %a, -8 Index: llvm/test/CodeGen/AMDGPU/ctlz.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ctlz.ll +++ llvm/test/CodeGen/AMDGPU/ctlz.ll @@ -15,7 +15,7 @@ declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone define amdgpu_kernel void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { ; SI-LABEL: s_ctlz_i32: @@ -120,7 +120,7 @@ ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone @@ -195,7 +195,7 @@ ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8 %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone @@ -288,7 +288,7 @@ ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: 32(4.484155e-44), 2(2.802597e-45) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone @@ -576,7 +576,7 @@ ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W, ; EG-NEXT: LSHR * T1.X, PV.W, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep @@ -663,7 +663,7 @@ ; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, PV.Z, ; EG-NEXT: LSHR * T1.X, PV.W, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep @@ -729,7 +729,7 @@ ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: -1(nan), 2(2.802597e-45) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone @@ -795,7 +795,7 @@ ; EG-NEXT: CNDE_INT T0.X, T0.X, literal.x, PV.W, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: -1(nan), 2(2.802597e-45) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone @@ -872,7 +872,7 @@ ; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, literal.x, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: -1(nan), 2(2.802597e-45) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone @@ -948,7 +948,7 @@ ; EG-NEXT: CNDE_INT T0.X, PV.W, literal.x, T0.W, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, ; EG-NEXT: -1(nan), 2(2.802597e-45) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone @@ -1017,7 +1017,7 @@ ; EG-NEXT: MOV * T0.Z, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid %val = load i8, i8 addrspace(1)* %valptr.gep %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone @@ -1160,7 +1160,7 @@ ; EG-NEXT: MOV * T0.Z, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %valptr.gep = getelementptr i7, i7 addrspace(1)* %valptr, i32 %tid %val = load i7, i7 addrspace(1)* %valptr.gep %ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone Index: llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -12,7 +12,7 @@ declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32: ; GCN: s_load_dword [[VAL:s[0-9]+]], @@ -36,7 +36,7 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] ; EG: FFBH_UINT {{\*? *}}[[RESULT]] define amdgpu_kernel void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone @@ -54,7 +54,7 @@ ; EG: FFBH_UINT {{\*? *}}[[RESULT]] ; EG: FFBH_UINT {{\*? *}}[[RESULT]] define amdgpu_kernel void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8 %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone @@ -76,7 +76,7 @@ ; EG: FFBH_UINT {{\*? *}}[[RESULT]] ; EG: FFBH_UINT {{\*? *}}[[RESULT]] define amdgpu_kernel void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone @@ -89,7 +89,7 @@ ; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] ; GCN: buffer_store_byte [[RESULT]], define amdgpu_kernel void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid %val = load i8, i8 addrspace(1)* %in.gep %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone @@ -131,7 +131,7 @@ ; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]] ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}} define amdgpu_kernel void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep @@ -142,7 +142,7 @@ ; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64_trunc: define amdgpu_kernel void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep @@ -157,7 +157,7 @@ ; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[RESULT]], define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone @@ -172,7 +172,7 @@ ; GCN: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[RESULT]], define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone @@ -187,7 +187,7 @@ ; GCN: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]] ; GCN: {{buffer|flat}}_store_byte [[FFBH]], define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %valptr.gep = getelementptr i8, i8 addrspace(1)* %valptr, i32 %tid %val = load i8, i8 addrspace(1)* %valptr.gep %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone @@ -206,7 +206,7 @@ ; GCN-DAG: buffer_store_byte [[RESULT1]] ; GCN: s_endpgm define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone @@ -225,7 +225,7 @@ ; GCN: v_cndmask ; GCN: buffer_store_dword define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone @@ -243,7 +243,7 @@ ; GCN: v_cndmask ; GCN: buffer_store_dword define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone @@ -261,7 +261,7 @@ ; GCN: v_cndmask ; GCN: buffer_store_dword define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone @@ -279,7 +279,7 @@ ; GCN: v_cndmask ; GCN: buffer_store_dword define amdgpu_kernel void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone Index: llvm/test/CodeGen/AMDGPU/ctpop.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ctpop.ll +++ llvm/test/CodeGen/AMDGPU/ctpop.ll @@ -8,7 +8,7 @@ declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: {{^}}s_ctpop_i32: ; GCN: s_load_dword [[SVAL:s[0-9]+]], @@ -33,7 +33,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -55,7 +55,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in0.gep = getelementptr i32, i32 addrspace(1)* %in0, i32 %tid %in1.gep = getelementptr i32, i32 addrspace(1)* %in1, i32 %tid %val0 = load volatile i32, i32 addrspace(1)* %in0.gep, align 4 @@ -74,7 +74,7 @@ ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %sval) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -91,7 +91,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone @@ -111,7 +111,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone @@ -139,7 +139,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <8 x i32>, <8 x i32> addrspace(1)* %in, i32 %tid %val = load <8 x i32>, <8 x i32> addrspace(1)* %in.gep, align 32 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone @@ -183,7 +183,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <16 x i32>, <16 x i32> addrspace(1)* %in, i32 %tid %val = load <16 x i32>, <16 x i32> addrspace(1)* %in.gep, align 32 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone @@ -199,7 +199,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -216,7 +216,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -233,7 +233,7 @@ ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -251,7 +251,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -269,7 +269,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -290,7 +290,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone Index: llvm/test/CodeGen/AMDGPU/ctpop16.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ctpop16.ll +++ llvm/test/CodeGen/AMDGPU/ctpop16.ll @@ -8,7 +8,7 @@ declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) nounwind readnone -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: {{^}}s_ctpop_i16: ; GCN: s_load_dword [[SVAL:s[0-9]+]], @@ -33,7 +33,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %val = load i16, i16 addrspace(1)* %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone @@ -55,7 +55,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_add_chain_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in0, i16 addrspace(1)* noalias %in1) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in0.gep = getelementptr i16, i16 addrspace(1)* %in0, i32 %tid %in1.gep = getelementptr i16, i16 addrspace(1)* %in1, i32 %tid %val0 = load volatile i16, i16 addrspace(1)* %in0.gep, align 4 @@ -74,7 +74,7 @@ ; GCN: buffer_store_short [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_add_sgpr_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %sval) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %val = load i16, i16 addrspace(1)* %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone @@ -91,7 +91,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_v2i16(<2 x i16> addrspace(1)* noalias %out, <2 x i16> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid %val = load <2 x i16>, <2 x i16> addrspace(1)* %in.gep, align 8 %ctpop = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> %val) nounwind readnone @@ -111,7 +111,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_v4i16(<4 x i16> addrspace(1)* noalias %out, <4 x i16> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid %val = load <4 x i16>, <4 x i16> addrspace(1)* %in.gep, align 16 %ctpop = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %val) nounwind readnone @@ -139,7 +139,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_v8i16(<8 x i16> addrspace(1)* noalias %out, <8 x i16> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <8 x i16>, <8 x i16> addrspace(1)* %in, i32 %tid %val = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep, align 32 %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %val) nounwind readnone @@ -183,7 +183,7 @@ ; EG: BCNT_INT ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_v16i16(<16 x i16> addrspace(1)* noalias %out, <16 x i16> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <16 x i16>, <16 x i16> addrspace(1)* %in, i32 %tid %val = load <16 x i16>, <16 x i16> addrspace(1)* %in.gep, align 32 %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %val) nounwind readnone @@ -199,7 +199,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i16_add_inline_constant(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %val = load i16, i16 addrspace(1)* %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone @@ -216,7 +216,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i16_add_inline_constant_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %val = load i16, i16 addrspace(1)* %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone @@ -234,7 +234,7 @@ ; GCN: buffer_store_short [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i16_add_literal(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %val = load i16, i16 addrspace(1)* %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone @@ -252,7 +252,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i16_add_var(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %val = load i16, i16 addrspace(1)* %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone @@ -270,7 +270,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i16_add_var_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 %const) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %val = load i16, i16 addrspace(1)* %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone @@ -291,7 +291,7 @@ ; EG: BCNT_INT define amdgpu_kernel void @v_ctpop_i16_add_vvar_inv(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in, i16 addrspace(1)* noalias %constptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid %val = load i16, i16 addrspace(1)* %in.gep, align 4 %ctpop = call i16 @llvm.ctpop.i16(i16 %val) nounwind readnone Index: llvm/test/CodeGen/AMDGPU/ctpop64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/ctpop64.ll +++ llvm/test/CodeGen/AMDGPU/ctpop64.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i64 @llvm.ctpop.i64(i64) nounwind readnone declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone @@ -34,7 +34,7 @@ ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep, align 8 %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone @@ -53,7 +53,7 @@ ; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep, align 8 %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone @@ -93,7 +93,7 @@ ; GCN: v_bcnt_u32_b32 ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i32 %tid %val = load <2 x i64>, <2 x i64> addrspace(1)* %in.gep, align 16 %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone @@ -113,7 +113,7 @@ ; GCN: v_bcnt_u32_b32 ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid %val = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep, align 32 %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone @@ -193,7 +193,7 @@ ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctpop_i128(i32 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %tid %val = load i128, i128 addrspace(1)* %in.gep, align 8 %ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone Index: llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -9,7 +9,7 @@ declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32: ; SI: s_load_dword [[VAL:s[0-9]+]], @@ -33,7 +33,7 @@ ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] ; EG: FFBL_INT {{\*? *}}[[RESULT]] define amdgpu_kernel void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %valptr, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep, align 4 %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone @@ -51,7 +51,7 @@ ; EG: FFBL_INT {{\*? *}}[[RESULT]] ; EG: FFBL_INT {{\*? *}}[[RESULT]] define amdgpu_kernel void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %valptr, i32 %tid %val = load <2 x i32>, <2 x i32> addrspace(1)* %in.gep, align 8 %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone @@ -73,7 +73,7 @@ ; EG: FFBL_INT {{\*? *}}[[RESULT]] ; EG: FFBL_INT {{\*? *}}[[RESULT]] define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %valptr, i32 %tid %val = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep, align 16 %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone Index: llvm/test/CodeGen/AMDGPU/fma.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fma.ll +++ llvm/test/CodeGen/AMDGPU/fma.ll @@ -13,7 +13,7 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: {{^}}fma_f32: ; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}} @@ -86,7 +86,7 @@ ; FUNC-LABEL: @fma_commute_mul_inline_imm_f32 ; SI: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, 2.0, {{v[0-9]+}} define amdgpu_kernel void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -101,7 +101,7 @@ ; FUNC-LABEL: @fma_commute_mul_s_f32 define amdgpu_kernel void @fma_commute_mul_s_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b, float %b) nounwind { - %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid @@ -125,7 +125,7 @@ ; GFX906: v_fma_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 define amdgpu_kernel void @fold_inline_imm_into_fmac_src2_f32(float addrspace(1)* %out, float addrspace(1)* %a, float addrspace(1)* %b) nounwind { bb: - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 %gep.a = getelementptr inbounds float, float addrspace(1)* %a, i64 %tid.ext %gep.b = getelementptr inbounds float, float addrspace(1)* %b, i64 %tid.ext Index: llvm/test/CodeGen/AMDGPU/fmax_legacy.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmax_legacy.ll +++ llvm/test/CodeGen/AMDGPU/fmax_legacy.ll @@ -6,7 +6,7 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32: ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] @@ -21,7 +21,7 @@ ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -49,7 +49,7 @@ ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -76,7 +76,7 @@ ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -102,7 +102,7 @@ ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -127,7 +127,7 @@ ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -153,7 +153,7 @@ ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1 @@ -186,7 +186,7 @@ ; GCN-NOT: v_max define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1 @@ -209,7 +209,7 @@ ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 Index: llvm/test/CodeGen/AMDGPU/fmin_legacy.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fmin_legacy.ll +++ llvm/test/CodeGen/AMDGPU/fmin_legacy.ll @@ -6,7 +6,7 @@ ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 ; The two inputs to the instruction are different SGPRs from the same ; super register, so we can't fold both SGPR operands even though they @@ -87,7 +87,7 @@ ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -111,7 +111,7 @@ ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -135,7 +135,7 @@ ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -159,7 +159,7 @@ ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 @@ -183,7 +183,7 @@ ; GCN-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1 @@ -210,7 +210,7 @@ ; GCN-NONAN: v_min_f32_e32 ; GCN-NONAN: v_min_f32_e32 define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1 @@ -243,7 +243,7 @@ ; GCN-NONAN: v_min_f32_e32 ; GCN-NONAN-NOT: v_min_ define amdgpu_kernel void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1 @@ -265,7 +265,7 @@ ; GCN-NOT: v_min ; GCN: s_endpgm define amdgpu_kernel void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() #1 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 Index: llvm/test/CodeGen/AMDGPU/mad_uint24.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mad_uint24.ll +++ llvm/test/CodeGen/AMDGPU/mad_uint24.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2 ; RUN: llc < %s -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2 -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: {{^}}u32_mad24: ; EG: MULADD_UINT24 Index: llvm/test/CodeGen/AMDGPU/max.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/max.ll +++ llvm/test/CodeGen/AMDGPU/max.ll @@ -7,7 +7,7 @@ ; EG: MAX_INT define amdgpu_kernel void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %a = load i32, i32 addrspace(1)* %aptr, align 4 %b = load i32, i32 addrspace(1)* %gep.in, align 4 @@ -29,7 +29,7 @@ ; EG: MAX_INT ; EG: MAX_INT define amdgpu_kernel void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid %a = load <4 x i32>, <4 x i32> addrspace(1)* %aptr, align 4 %b = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4 @@ -105,7 +105,7 @@ ; EG: MAX_INT define amdgpu_kernel void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %a = load i32, i32 addrspace(1)* %aptr, align 4 %b = load i32, i32 addrspace(1)* %gep.in, align 4 @@ -131,7 +131,7 @@ ; EG: MAX_UINT define amdgpu_kernel void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %a = load i32, i32 addrspace(1)* %aptr, align 4 %b = load i32, i32 addrspace(1)* %gep.in, align 4 @@ -190,7 +190,7 @@ ; EG: MAX_UINT define amdgpu_kernel void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %a = load i32, i32 addrspace(1)* %gep.in, align 4 %b = load i32, i32 addrspace(1)* %bptr, align 4 @@ -332,7 +332,7 @@ } -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } Index: llvm/test/CodeGen/AMDGPU/min.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/min.ll +++ llvm/test/CodeGen/AMDGPU/min.ll @@ -9,7 +9,7 @@ ; EG: MIN_INT define amdgpu_kernel void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid @@ -164,7 +164,7 @@ ; EG: MIN_INT define amdgpu_kernel void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %bptr, i32 %tid %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid @@ -184,7 +184,7 @@ ; EG: MIN_INT define amdgpu_kernel void @v_test_imin_slt_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds i16, i16 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr inbounds i16, i16 addrspace(1)* %bptr, i32 %tid %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid @@ -248,7 +248,7 @@ ; EG: MIN_UINT define amdgpu_kernel void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid @@ -271,7 +271,7 @@ ; EG: MIN_UINT ; EG: MIN_UINT define amdgpu_kernel void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %a.ptr, <3 x i32> addrspace(1)* %b.ptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a.ptr, i32 %tid %b.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b.ptr, i32 %tid %out.gep = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid @@ -305,7 +305,7 @@ ; EG: MIN_UINT ; EG: MIN_UINT define amdgpu_kernel void @v_test_umin_ule_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %a.ptr, <3 x i16> addrspace(1)* %b.ptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %a.ptr, i32 %tid %b.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %b.ptr, i32 %tid %out.gep = getelementptr inbounds <3 x i16>, <3 x i16> addrspace(1)* %out, i32 %tid @@ -334,7 +334,7 @@ ; EG: MIN_UINT define amdgpu_kernel void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i32 %tid %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i32 %tid %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid @@ -358,7 +358,7 @@ ; EG: MIN_UINT define amdgpu_kernel void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %a.ptr, i8 addrspace(1)* %b.ptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds i8, i8 addrspace(1)* %a.ptr, i32 %tid %b.gep = getelementptr inbounds i8, i8 addrspace(1)* %b.ptr, i32 %tid %out.gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %tid @@ -606,7 +606,7 @@ ; EG: MIN_INT ; EG: MIN_INT define amdgpu_kernel void @v_test_imin_sle_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid @@ -631,7 +631,7 @@ ; EG: MIN_UINT ; EG: MIN_UINT define amdgpu_kernel void @v_test_imin_ule_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %a.ptr, <2 x i16> addrspace(1)* %b.ptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a.ptr, i32 %tid %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b.ptr, i32 %tid %out.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid @@ -643,7 +643,7 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: llvm/test/CodeGen/AMDGPU/mul.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mul.ll +++ llvm/test/CodeGen/AMDGPU/mul.ll @@ -278,7 +278,7 @@ ; GCN: {{buffer|flat}}_store_dwordx4 define amdgpu_kernel void @v_mul_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %aptr, i128 addrspace(1)* %bptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.a = getelementptr inbounds i128, i128 addrspace(1)* %aptr, i32 %tid %gep.b = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid %gep.out = getelementptr inbounds i128, i128 addrspace(1)* %bptr, i32 %tid @@ -289,7 +289,7 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone} Index: llvm/test/CodeGen/AMDGPU/setcc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/setcc.ll +++ llvm/test/CodeGen/AMDGPU/setcc.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; FUNC-LABEL: {{^}}setcc_v2i32: ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z @@ -349,7 +349,7 @@ ; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, ; GCN: s_endpgm define amdgpu_kernel void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid @@ -370,7 +370,7 @@ ; GCN-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, ; GCN: s_endpgm define amdgpu_kernel void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid Index: llvm/test/CodeGen/AMDGPU/sext-in-reg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sext-in-reg.ll +++ llvm/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -161,7 +161,7 @@ ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid @@ -188,7 +188,7 @@ ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid @@ -215,7 +215,7 @@ ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid @@ -239,7 +239,7 @@ ; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[SHR]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid @@ -473,7 +473,7 @@ ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid @@ -503,7 +503,7 @@ ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} ; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid @@ -562,7 +562,7 @@ ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]] define amdgpu_kernel void @v_sext_in_reg_i1_i16(i16 addrspace(3)* %out, i16 addrspace(1)* %ptr) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i16, i16 addrspace(1)* %ptr, i32 %tid %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid @@ -583,7 +583,7 @@ ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[REG]], 0, 1{{$}} ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]] define amdgpu_kernel void @v_sext_in_reg_i1_i16_nonload(i16 addrspace(3)* %out, i16 addrspace(1)* %aptr, i16 addrspace(1)* %bptr, i16 %s.val) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %a.gep = getelementptr i16, i16 addrspace(1)* %aptr, i32 %tid %b.gep = getelementptr i16, i16 addrspace(1)* %bptr, i32 %tid %out.gep = getelementptr i16, i16 addrspace(3)* %out, i32 %tid @@ -715,7 +715,7 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: llvm/test/CodeGen/AMDGPU/shl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shl.ll +++ llvm/test/CodeGen/AMDGPU/shl.ll @@ -3,9 +3,9 @@ ; XUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=FUNC,GCN,VI ; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs | FileCheck %s -allow-deprecated-dag-overlap -check-prefixes=FUNC,EG -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 -declare i32 @llvm.r600.read.tgid.x() #0 +declare i32 @llvm.amdgcn.workgroup.id.x() #0 define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ; GCN-LABEL: shl_v2i32: @@ -341,7 +341,7 @@ ; EG-NEXT: MOV * T0.Z, 0.0, ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i16 1 @@ -467,7 +467,7 @@ ; EG-NEXT: OR_INT T0.X, PV.W, PS, ; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 @@ -587,7 +587,7 @@ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) ; EG-NEXT: MOV T7.X, PV.Y, ; EG-NEXT: MOV * T10.X, T6.X, - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid %gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 @@ -905,7 +905,7 @@ ; EG-NEXT: LSHR T2.X, PV.W, literal.x, ; EG-NEXT: MOV * T1.Y, T0.X, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %tid = call i32 @llvm.r600.read.tgid.x() #0 + %tid = call i32 @llvm.amdgcn.workgroup.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %a = load i64, i64 addrspace(1)* %gep.in Index: llvm/test/CodeGen/AMDGPU/sint_to_fp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sint_to_fp.ll +++ llvm/test/CodeGen/AMDGPU/sint_to_fp.ll @@ -17,7 +17,7 @@ ; R600: INT_TO_FLT define amdgpu_kernel void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep @@ -67,7 +67,7 @@ ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define amdgpu_kernel void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep @@ -106,7 +106,7 @@ ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %val = load i1, i1 addrspace(1)* %in.gep @@ -115,7 +115,7 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: llvm/test/CodeGen/AMDGPU/sminmax.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sminmax.ll +++ llvm/test/CodeGen/AMDGPU/sminmax.ll @@ -28,7 +28,7 @@ ; EG: MAX_INT define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in, align 4 %neg = sub i32 0, %val @@ -45,7 +45,7 @@ ; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]] ; GCN: v_mul_lo_u32 v{{[0-9]+}}, [[MAX]], [[MAX]] define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds i32, i32 addrspace(1)* %src, i32 %tid %val = load i32, i32 addrspace(1)* %gep.in, align 4 %neg = sub i32 0, %val @@ -100,7 +100,7 @@ %z1 = insertelement <2 x i32> %z0, i32 0, i32 1 %t0 = insertelement <2 x i32> undef, i32 2, i32 0 %t1 = insertelement <2 x i32> %t0, i32 2, i32 1 - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %src, i32 %tid %val = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in, align 4 %neg = sub <2 x i32> %z1, %val @@ -184,7 +184,7 @@ %t1 = insertelement <4 x i32> %t0, i32 2, i32 1 %t2 = insertelement <4 x i32> %t1, i32 2, i32 2 %t3 = insertelement <4 x i32> %t2, i32 2, i32 3 - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %src, i32 %tid %val = load <4 x i32>, <4 x i32> addrspace(1)* %gep.in, align 4 %neg = sub <4 x i32> %z3, %val @@ -268,7 +268,7 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } Index: llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll +++ llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll @@ -29,10 +29,8 @@ ; GCN-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24 ; GCN: s_endpgm -define amdgpu_kernel void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 { +define amdgpu_kernel void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset, i32 %tmp, i32 %tmp1, i32 %x.i.12.i) #0 { entry: - %tmp = tail call i32 @llvm.r600.read.local.size.y() - %tmp1 = tail call i32 @llvm.r600.read.local.size.z() %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp3 = tail call i32 @llvm.amdgcn.workitem.id.y() %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.z() @@ -41,7 +39,6 @@ %tmp11 = mul i32 %tmp10, %tmp1 %tmp9 = add i32 %tmp11, %tmp4 %x.i.i = tail call i32 @llvm.amdgcn.workgroup.id.x() #1 - %x.i.12.i = tail call i32 @llvm.r600.read.local.size.x() #1 %mul.26.i = mul i32 %x.i.12.i, %x.i.i %add.i = add i32 %tmp2, %mul.26.i %arrayidx = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i @@ -78,25 +75,9 @@ ret void } -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workgroup.id.x() #1 - -; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.local.size.x() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.x() #1 - -; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.local.size.y() #1 - -; Function Attrs: nounwind readnone -declare i32 @llvm.r600.read.local.size.z() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() #1 - -; Function Attrs: nounwind readnone declare i32 @llvm.amdgcn.workitem.id.z() #1 attributes #0 = { norecurse nounwind } Index: llvm/test/CodeGen/AMDGPU/sra.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/sra.ll +++ llvm/test/CodeGen/AMDGPU/sra.ll @@ -2,7 +2,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 ; FUNC-LABEL: {{^}}ashr_v2i32: ; SI: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} @@ -248,7 +248,7 @@ ; GCN: v_ashrrev_i32_e32 v[[SHIFT:[0-9]+]], 31, v[[HI]] ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[HI]]:[[SHIFT]]{{\]}} define amdgpu_kernel void @v_ashr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %a = load i64, i64 addrspace(1)* %gep.in @@ -276,7 +276,7 @@ ; GCN: v_mov_b32_e32 v[[COPY:[0-9]+]], v[[SHIFT]] ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[SHIFT]]:[[COPY]]{{\]}} define amdgpu_kernel void @v_ashr_63_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %a = load i64, i64 addrspace(1)* %gep.in Index: llvm/test/CodeGen/AMDGPU/srl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/srl.ll +++ llvm/test/CodeGen/AMDGPU/srl.ll @@ -2,7 +2,7 @@ ; XUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s -declare i32 @llvm.r600.read.tidig.x() #0 +declare i32 @llvm.amdgcn.workitem.id.x() #0 ; FUNC-LABEL: {{^}}lshr_i32: ; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} @@ -205,7 +205,7 @@ ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], v[[VHI1]]{{$}} ; GCN: buffer_store_dwordx2 v{{\[}}[[HI_A]]:[[VHI]]{{\]}} define amdgpu_kernel void @v_lshr_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid %a = load i64, i64 addrspace(1)* %gep.in Index: llvm/test/CodeGen/AMDGPU/trunc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/trunc.ll +++ llvm/test/CodeGen/AMDGPU/trunc.ll @@ -2,7 +2,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s -declare i32 @llvm.r600.read.tidig.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone define amdgpu_kernel void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, [8 x i32], i64 %in) { ; GCN-LABEL: {{^}}trunc_i64_to_i32_store: @@ -113,7 +113,7 @@ ; GCN: v_cmp_eq_u32_e32 vcc, 1, [[MASKED]] ; GCN: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc define amdgpu_kernel void @v_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) { - %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid %x = load i64, i64 addrspace(1)* %gep Index: llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll +++ llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll @@ -25,7 +25,7 @@ ; GCN: v_cvt_f16_f32_e32 [[VR_F16:v[0-9]+]], [[VR]] ; GCN: {{buffer|flat}}_store_short {{.*}}[[VR_F16]] define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep @@ -55,7 +55,7 @@ ; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]] ; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]] define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %val = load i64, i64 addrspace(1)* %in.gep @@ -73,7 +73,7 @@ ; FUNC-LABEL: {{^}}v_uint_to_fp_v4i64_to_v4f32: define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep @@ -91,7 +91,7 @@ ; FUNC-LABEL: {{^}}v_uint_to_fp_v4i64_to_v4f16: define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(<4 x half> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid %out.gep = getelementptr <4 x half>, <4 x half> addrspace(1)* %out, i32 %tid %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep @@ -100,7 +100,7 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } Index: llvm/test/CodeGen/AMDGPU/uint_to_fp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/uint_to_fp.ll +++ llvm/test/CodeGen/AMDGPU/uint_to_fp.ll @@ -17,7 +17,7 @@ ; R600: INT_TO_FLT define amdgpu_kernel void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %val = load i32, i32 addrspace(1)* %in.gep @@ -67,7 +67,7 @@ ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define amdgpu_kernel void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep @@ -106,7 +106,7 @@ ; SI: {{buffer|flat}}_store_dword {{.*}}[[RESULT]] ; SI: s_endpgm define amdgpu_kernel void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 { - %tid = call i32 @llvm.r600.read.tidig.x() + %tid = call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %val = load i1, i1 addrspace(1)* %in.gep @@ -133,7 +133,7 @@ ret void } -declare i32 @llvm.r600.read.tidig.x() #1 +declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } attributes #1 = { nounwind readnone }