diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -375,6 +375,12 @@ "True 16-bit operand instructions" >; +def FeatureTrue16Codegen : SubtargetFeature<"true16-codegen", + "EnableTrue16Codegen", + "true", + "Use true 16-bit registers" +>; + def FeatureVOP3P : SubtargetFeature<"vop3p", "HasVOP3PInsts", "true", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -49,6 +49,7 @@ bool GCN3Encoding = false; bool Has16BitInsts = false; bool HasTrue16BitInsts = false; + bool EnableTrue16Codegen = false; bool HasMadMixInsts = false; bool HasMadMacF32Insts = false; bool HasDsSrc2Insts = false; @@ -155,6 +156,10 @@ bool hasTrue16BitInsts() const { return HasTrue16BitInsts; } + // TODO: Remove and use hasTrue16BitInsts() instead once True16 is + // fully supported. + bool useTrue16BitInsts() const; + bool hasMadMixInsts() const { return HasMadMixInsts; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -166,6 +166,10 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {} +bool AMDGPUSubtarget::useTrue16BitInsts() const { + return hasTrue16BitInsts() && EnableTrue16Codegen; +} + GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM) : // clang-format off diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -146,8 +146,13 @@ addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024)); if (Subtarget->has16BitInsts()) { - addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); - addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); + if (Subtarget->useTrue16BitInsts()) { + addRegisterClass(MVT::i16, &AMDGPU::VGPR_16RegClass); + addRegisterClass(MVT::f16, &AMDGPU::VGPR_16RegClass); + } else { + addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); + } // Unless there are also VOP3P operations, not operations are really legal. addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass);