Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -269,6 +269,9 @@ /// 2) dimension. unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; + /// Return true if only a single workitem can be active in a wave. + bool isSingleLaneExecution(const Function &Kernel) const; + /// Creates value range metadata on an workitemid.* intrinsic call or load. bool makeLIDRangeMetadata(Instruction *I) const; Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -469,6 +469,15 @@ return getFlatWorkGroupSizes(Kernel).second - 1; } +bool AMDGPUSubtarget::isSingleLaneExecution(const Function &Func) const { + for (int I = 0; I < 3; ++I) { + if (getMaxWorkitemID(Func, I) > 0) + return false; + } + + return true; +} + bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { Function *Kernel = I->getParent()->getParent(); unsigned MinSize = 0; Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -901,10 +901,16 @@ return true; if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { - if (Intrinsic->getIntrinsicID() == Intrinsic::read_register) + switch (Intrinsic->getIntrinsicID()) { + case Intrinsic::read_register: return isReadRegisterSourceOfDivergence(Intrinsic); - - return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID()); + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: + return !ST->isSingleLaneExecution(*Intrinsic->getFunction()); + default: + return AMDGPU::isIntrinsicSourceOfDivergence(Intrinsic->getIntrinsicID()); + } } // Assume all function calls are a source of divergence. Index: llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll =================================================================== --- llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll +++ llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll @@ -41,5 +41,83 @@ ret void } +; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_singlethreaded': +; CHECK-NOT: DIVERGENT +define amdgpu_kernel void @workitem_id_x_singlethreaded() #2 { + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + store volatile i32 %id.x, ptr addrspace(1) undef + ret void +} + +; CHECK-LABEL: UniformityInfo for function 'workitem_id_y_singlethreaded': +; CHECK-NOT: DIVERGENT +define amdgpu_kernel void @workitem_id_y_singlethreaded() #2 { + %id.x = call i32 @llvm.amdgcn.workitem.id.y() + store volatile i32 %id.x, ptr addrspace(1) undef + ret void +} + +; CHECK-LABEL: UniformityInfo for function 'workitem_id_z_singlethreaded': +; CHECK-NOT: DIVERGENT +define amdgpu_kernel void @workitem_id_z_singlethreaded() #2 { + %id.x = call i32 @llvm.amdgcn.workitem.id.y() + store volatile i32 %id.x, ptr addrspace(1) undef + ret void +} + +; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_singlethreaded_md': +; CHECK-NOT: DIVERGENT +define amdgpu_kernel void @workitem_id_x_singlethreaded_md() !reqd_work_group_size !0 { + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + store volatile i32 %id.x, ptr addrspace(1) undef + ret void +} + +; CHECK-LABEL: UniformityInfo for function 'workitem_id_y_singlethreaded_md': +; CHECK-NOT: DIVERGENT +define amdgpu_kernel void @workitem_id_y_singlethreaded_md() !reqd_work_group_size !0 { + %id.x = call i32 @llvm.amdgcn.workitem.id.y() + store volatile i32 %id.x, ptr addrspace(1) undef + ret void +} + +; CHECK-LABEL: UniformityInfo for function 'workitem_id_z_singlethreaded_md': +; CHECK-NOT: DIVERGENT +define amdgpu_kernel void @workitem_id_z_singlethreaded_md() !reqd_work_group_size !0 { + %id.x = call i32 @llvm.amdgcn.workitem.id.y() + store volatile i32 %id.x, ptr addrspace(1) undef + ret void +} + +; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimx': +; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() +define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimx() !reqd_work_group_size !1 { + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + store volatile i32 %id.x, ptr addrspace(1) undef + ret void +} + +; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimy': +; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() +define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimy() !reqd_work_group_size !2 { + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + store volatile i32 %id.x, ptr addrspace(1) undef + ret void +} + +; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_not_singlethreaded_dimz': +; CHECK: DIVERGENT: %id.x = call i32 @llvm.amdgcn.workitem.id.x() +define amdgpu_kernel void @workitem_id_x_not_singlethreaded_dimz() !reqd_work_group_size !3 { + %id.x = call i32 @llvm.amdgcn.workitem.id.x() + store volatile i32 %id.x, ptr addrspace(1) undef + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } +attributes #2 = { "amdgpu-flat-work-group-size"="1,1" } + +!0 = !{i32 1, i32 1, i32 1} +!1 = !{i32 2, i32 1, i32 1} +!2 = !{i32 1, i32 2, i32 1} +!3 = !{i32 1, i32 1, i32 2}