diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -375,6 +375,12 @@ "True 16-bit operand instructions" >; +def FeatureRealTrue16Insts : SubtargetFeature<"real-true16", + "EnableRealTrue16Insts", + "true", + "Use true 16-bit registers" +>; + def FeatureVOP3P : SubtargetFeature<"vop3p", "HasVOP3PInsts", "true", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -49,6 +49,7 @@ bool GCN3Encoding = false; bool Has16BitInsts = false; bool HasTrue16BitInsts = false; + bool EnableRealTrue16Insts = false; bool HasMadMixInsts = false; bool HasMadMacF32Insts = false; bool HasDsSrc2Insts = false; @@ -153,8 +154,17 @@ return Has16BitInsts; } + /// Return true if the subtarget supports True16 instructions. bool hasTrue16BitInsts() const { return HasTrue16BitInsts; } + /// Return true if real (non-fake) variants of True16 instructions using + /// 16-bit registers should be code-generated. Fake True16 instructions are + /// identical to non-fake ones except that they take 32-bit registers as + /// operands and always use their low halves. + // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully + // supported and the support for fake True16 instructions is removed. + bool useRealTrue16Insts() const; + bool hasMadMixInsts() const { return HasMadMixInsts; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -167,6 +167,10 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {} +bool AMDGPUSubtarget::useRealTrue16Insts() const { + return hasTrue16BitInsts() && EnableRealTrue16Insts; +} + GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM) : // clang-format off diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -148,8 +148,13 @@ addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024)); if (Subtarget->has16BitInsts()) { - addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); - addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); + if (Subtarget->useRealTrue16Insts()) { + addRegisterClass(MVT::i16, &AMDGPU::VGPR_16RegClass); + addRegisterClass(MVT::f16, &AMDGPU::VGPR_16RegClass); + } else { + addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass); + } // Unless there are also VOP3P operations, not operations are really legal. addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass);