diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -155,6 +155,10 @@ bool hasTrue16BitInsts() const { return HasTrue16BitInsts; } + // TODO: Remove and use hasTrue16BitInsts() instead once True16 is + // fully supported. + bool useTrue16BitInsts() const; + bool hasMadMixInsts() const { return HasMadMixInsts; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -58,6 +58,10 @@ cl::desc("Number of addresses from which to enable MIMG NSA."), cl::init(3), cl::Hidden); +static cl::opt EnableTrue16Codegen("amdgpu-true16", + cl::desc("Use true 16-bit registers"), + cl::init(false), cl::ReallyHidden); + GCNSubtarget::~GCNSubtarget() = default; GCNSubtarget & @@ -166,6 +170,10 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {} +bool AMDGPUSubtarget::useTrue16BitInsts() const { + return hasTrue16BitInsts() && EnableTrue16Codegen; +} + GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM) : // clang-format off diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -146,8 +146,13 @@ addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024)); if (Subtarget->has16BitInsts()) { - addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); - addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); + if (Subtarget->useTrue16BitInsts()) { + addRegisterClass(MVT::i16, &AMDGPU::VGPR_16RegClass); + addRegisterClass(MVT::f16, &AMDGPU::VGPR_16RegClass); + } else { + addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); + } // Unless there are also VOP3P operations, not operations are really legal. addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass);