diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -268,6 +268,13 @@ } unsigned getEUsPerCU(const MCSubtargetInfo *STI) { + // "Per CU" really means "per whatever functional block the waves of a + // workgroup must share". For gfx10 in CU mode this is the CU, which contains + // two SIMDs. + if (isGFX10(*STI) && STI->getFeatureBits().test(FeatureCuMode)) + return 2; + // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains + // two CUs, so a total of four SIMDs. return 4; }