diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -33,6 +33,11 @@ llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4)); +// TODO-GFX11: Remove this when full 16-bit codegen is implemented. +static llvm::cl::opt + LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden, + llvm::cl::desc("Never use more than 128 VGPRs")); + namespace { /// \returns Bit mask for given bit \p Shift and bit \p Width. @@ -795,6 +800,15 @@ } unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { + if (LimitTo128VGPRs.getNumOccurrences() ? LimitTo128VGPRs + : isGFX11Plus(*STI)) { + // GFX11 changes the encoding of 16-bit operands in VOP1/2/C instructions + // such that values 128..255 no longer mean v128..v255, they mean + // v0.hi..v127.hi instead. Until the compiler understands this, it is not + // safe to use v128..v255. + // TODO-GFX11: Remove this when full 16-bit codegen is implemented. + return 128; + } if (STI->getFeatureBits().test(FeatureGFX90AInsts)) return 512; return 256; diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll @@ -536,10 +536,10 @@ ; GFX10WGP-WAVE64: NumVgprs: 256 ; GFX10CU-WAVE32: NumVgprs: 256 ; GFX10CU-WAVE64: NumVgprs: 256 -; GFX11WGP-WAVE32: NumVgprs: 256 -; GFX11WGP-WAVE64: NumVgprs: 256 -; GFX11CU-WAVE32: NumVgprs: 256 -; GFX11CU-WAVE64: NumVgprs: 256 +; GFX11WGP-WAVE32: NumVgprs: 128 +; GFX11WGP-WAVE64: NumVgprs: 128 +; GFX11CU-WAVE32: NumVgprs: 128 +; GFX11CU-WAVE64: NumVgprs: 128 define amdgpu_kernel void @f256() #256 { call void @use256vgprs() ret void @@ -555,8 +555,8 @@ ; GFX10WGP-WAVE64: NumVgprs: 256 ; GFX10CU-WAVE32: NumVgprs: 128 ; GFX10CU-WAVE64: NumVgprs: 128 -; GFX11WGP-WAVE32: NumVgprs: 256 -; GFX11WGP-WAVE64: NumVgprs: 256 +; GFX11WGP-WAVE32: NumVgprs: 128 +; GFX11WGP-WAVE64: NumVgprs: 128 ; GFX11CU-WAVE32: NumVgprs: 128 ; GFX11CU-WAVE64: NumVgprs: 128 define amdgpu_kernel void @f512() #512 {