Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4094,6 +4094,23 @@ Known.Zero.setHighBits(Size - Subtarget->getWavefrontSizeLog2()); break; } + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::r600_read_tidig_x: + case Intrinsic::r600_read_tidig_y: + case Intrinsic::r600_read_tidig_z: + case Intrinsic::r600_read_local_size_x: + case Intrinsic::r600_read_local_size_y: + case Intrinsic::r600_read_local_size_z: { + unsigned Min, Max; + + // Returns 1 more than the maximum value. + std::tie(Min, Max) = Subtarget->getWorkitemIDRange( + *DAG.getMachineFunction().getFunction(), IID); + Known.Zero.setHighBits(countLeadingZeros(Max - 1)); + break; + } default: break; } Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -627,6 +627,10 @@ /// size, register usage, and/or lds usage. std::pair getWavesPerEU(const Function &F) const; + /// Return min, max value range for a workitemid.* inrinsic call. + std::pair getWorkitemIDRange(const Function &Kernel, + unsigned FuncID) const; + /// Creates value range metadata on an workitemid.* inrinsic call or load. bool makeLIDRangeMetadata(Instruction *I) const; }; Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -288,53 +288,49 @@ return Requested; } -bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { - Function *Kernel = I->getParent()->getParent(); +std::pair AMDGPUSubtarget::getWorkitemIDRange( + const Function &Kernel, unsigned FuncID) const { unsigned MinSize = 0; - unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; + unsigned MaxSize = getFlatWorkGroupSizes(Kernel).second; bool IdQuery = false; // If reqd_work_group_size is present it narrows value down. - if (auto *CI = dyn_cast(I)) { - const Function *F = CI->getCalledFunction(); - if (F) { - unsigned Dim = UINT_MAX; - switch (F->getIntrinsicID()) { - case Intrinsic::amdgcn_workitem_id_x: - case Intrinsic::r600_read_tidig_x: - IdQuery = true; - LLVM_FALLTHROUGH; - case Intrinsic::r600_read_local_size_x: - Dim = 0; - break; - case Intrinsic::amdgcn_workitem_id_y: - case Intrinsic::r600_read_tidig_y: - IdQuery = true; - LLVM_FALLTHROUGH; - case Intrinsic::r600_read_local_size_y: - Dim = 1; - break; - case Intrinsic::amdgcn_workitem_id_z: - case Intrinsic::r600_read_tidig_z: - IdQuery = true; - LLVM_FALLTHROUGH; - case Intrinsic::r600_read_local_size_z: - Dim = 2; - break; - default: - break; - } - if (Dim <= 3) { - if (auto Node = Kernel->getMetadata("reqd_work_group_size")) - if (Node->getNumOperands() == 3) - MinSize = MaxSize = mdconst::extract( - Node->getOperand(Dim))->getZExtValue(); - } - } + unsigned Dim = UINT_MAX; + switch (FuncID) { + case Intrinsic::amdgcn_workitem_id_x: + case Intrinsic::r600_read_tidig_x: + IdQuery = true; + LLVM_FALLTHROUGH; + case Intrinsic::r600_read_local_size_x: + Dim = 0; + break; + case Intrinsic::amdgcn_workitem_id_y: + case Intrinsic::r600_read_tidig_y: + IdQuery = true; + LLVM_FALLTHROUGH; + case Intrinsic::r600_read_local_size_y: + Dim = 1; + break; + case Intrinsic::amdgcn_workitem_id_z: + case Intrinsic::r600_read_tidig_z: + IdQuery = true; + LLVM_FALLTHROUGH; + case Intrinsic::r600_read_local_size_z: + Dim = 2; + break; + default: + break; + } + + if (Dim <= 3) { + if (auto Node = Kernel.getMetadata("reqd_work_group_size")) + if (Node->getNumOperands() == 3) + MinSize = MaxSize = mdconst::extract( + Node->getOperand(Dim))->getZExtValue(); } if (!MaxSize) - return false; + return std::make_pair(MinSize, MaxSize); // Range metadata is [Lo, Hi). For ID query we need to pass max size // as Hi. For size query we need to pass Hi + 1. @@ -343,6 +339,32 @@ else ++MaxSize; + return std::make_pair(MinSize, MaxSize); +} + +bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { + unsigned MinSize = 0, MaxSize = 0; + + unsigned IntrinID = 0; + if (auto *CI = dyn_cast(I)) { + if (const Function *F = CI->getCalledFunction()) { + IntrinID = F->getIntrinsicID(); + std::tie(MinSize, MaxSize) = getWorkitemIDRange( + *I->getParent()->getParent(), IntrinID); + } + } else { + // Used when emitting loads from dispatch packet. + MaxSize = getFlatWorkGroupSizes(*I->getParent()->getParent()).second; + + // Range metadata is [Lo, Hi). For ID query we need to pass max size + // as Hi. For size query we need to pass Hi + 1. + // This is a load, which is only used for sizes. + ++MaxSize; + } + + if (!MaxSize) + return false; + MDBuilder MDB(I->getContext()); MDNode *MaxWorkGroupSizeRange = MDB.createRange(APInt(32, MinSize), APInt(32, MaxSize));