diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -61,6 +61,7 @@ bool HasFminFmaxLegacy = true; bool EnablePromoteAlloca = false; bool HasTrigReducedRange = false; + unsigned EUsPerCU = 4; unsigned MaxWavesPerEU = 10; unsigned LocalMemorySize = 0; char WavefrontSizeLog2 = 0; @@ -209,6 +210,11 @@ return LocalMemorySize; } + /// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the + /// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs. + /// CU mode into account. + unsigned getEUsPerCU() const { return EUsPerCU; } + Align getAlignmentForImplicitArgPtr() const { return isAmdHsaOS() ? Align(8) : Align(4); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -173,6 +173,7 @@ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) { // clang-format on MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this); + EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(this); CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering())); Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));