diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -273,6 +273,10 @@ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); } + bool isRegOrImmWithIntT16InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16); + } + bool isRegOrImmWithInt32InputMods() const { return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); } @@ -293,6 +297,10 @@ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); } + bool isRegOrImmWithFPT16InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16); + } + bool isRegOrImmWithFP32InputMods() const { return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); } @@ -512,7 +520,15 @@ return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); } + bool isVCSrcTB16() const { + return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16); + } + bool isVCSrcTB16_Lo128() const { + return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16); + } + + bool isVCSrcFake16B16_Lo128() const { return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); } @@ -532,7 +548,15 @@ return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); } + bool isVCSrcTF16() const { + return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); + } + bool isVCSrcTF16_Lo128() const { + return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16); + } + + bool isVCSrcFake16F16_Lo128() const { return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); } @@ -552,10 +576,16 @@ return isVCSrcF64() || isLiteralImm(MVT::i64); } + bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); } + bool isVSrcTB16_Lo128() const { return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); } + bool isVSrcFake16B16_Lo128() const { + return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16); + } + bool isVSrcB16() const { return isVCSrcB16() || isLiteralImm(MVT::i16); } @@ -588,10 +618,16 @@ return isVCSrcF64() || isLiteralImm(MVT::f64); } + bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); } + bool isVSrcTF16_Lo128() const { return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); } + bool isVSrcFake16F16_Lo128() const { + return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16); + } + bool isVSrcF16() const { return isVCSrcF16() || isLiteralImm(MVT::f16); } diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -114,6 +114,7 @@ MCOperand createRegOperand(unsigned int RegId) const; MCOperand createRegOperand(unsigned RegClassID, unsigned Val) const; MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const; + MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const; MCOperand errOperand(unsigned V, const Twine& ErrMsg) const; @@ -234,6 +235,10 @@ bool MandatoryLiteral = false, unsigned ImmWidth = 0) const; + MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, + bool MandatoryLiteral = false, + unsigned ImmWidth = 0) const; + MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -260,6 +260,61 @@ DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16) DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32) +static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "10-bit encoding expected"); + assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used"); + + bool IsHi = Imm & (1 << 9); + unsigned RegIdx = Imm & 0xff; + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); +} + +static DecodeStatus +DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<8>(Imm) && "8-bit encoding expected"); + + bool IsHi = Imm & (1 << 7); + unsigned RegIdx = Imm & 0x7f; + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); +} + +static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<9>(Imm) && "9-bit encoding expected"); + + const auto *DAsm = static_cast(Decoder); + bool IsVGPR = Imm & (1 << 8); + if (IsVGPR) { + bool IsHi = Imm & (1 << 7); + unsigned RegIdx = Imm & 0x7f; + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); + } + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16, + Imm & 0xFF, false, 16)); +} + +static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + assert(isUInt<10>(Imm) && "10-bit encoding expected"); + + const auto *DAsm = static_cast(Decoder); + bool IsVGPR = Imm & (1 << 8); + if (IsVGPR) { + bool IsHi = Imm & (1 << 9); + unsigned RegIdx = Imm & 0xff; + return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi)); + } + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16, + Imm & 0xFF, false, 16)); +} + static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { @@ -1139,6 +1194,13 @@ return createRegOperand(SRegClassID, Val >> shift); } +MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx, + bool IsHi) const { + unsigned RCID = + IsHi ? AMDGPU::VGPR_HI16RegClassID : AMDGPU::VGPR_LO16RegClassID; + return createRegOperand(RCID, RegIdx); +} + // Decode Literals for insts which always have a literal in the encoding MCOperand AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const { @@ -1395,6 +1457,18 @@ return createRegOperand(IsAGPR ? getAgprClassId(Width) : getVgprClassId(Width), Val - VGPR_MIN); } + return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth); +} + +MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, + unsigned Val, + bool MandatoryLiteral, + unsigned ImmWidth) const { + // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been + // decoded earlier. + assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0"); + using namespace AMDGPU::EncValues; + if (Val <= SGPR_MAX) { // "SGPR_MIN <= Val" is always true and causes compilation warning. static_assert(SGPR_MIN == 0); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -49,6 +49,14 @@ SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + void getMachineOpValueT16(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + void getMachineOpValueT16Lo128(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + /// Use a fixup to encode the simm16 field for SOPP branch /// instructions. void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, @@ -547,6 +555,28 @@ getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI); } +void AMDGPUMCCodeEmitter::getMachineOpValueT16( + const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { + llvm_unreachable("TODO: Implement getMachineOpValueT16()."); +} + +void AMDGPUMCCodeEmitter::getMachineOpValueT16Lo128( + const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg()) { + uint16_t Encoding = MRI.getEncodingValue(MO.getReg()); + unsigned RegIdx = Encoding & AMDGPU::EncValues::REG_IDX_MASK; + bool IsHi = Encoding & AMDGPU::EncValues::IS_HI; + bool IsVGPR = Encoding & AMDGPU::EncValues::IS_VGPR; + assert((!IsVGPR || isUInt<7>(RegIdx)) && "VGPR0-VGPR127 expected!"); + Op = (IsVGPR ? 0x100 : 0) | (IsHi ? 0x80 : 0) | RegIdx; + return; + } + getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI); +} + void AMDGPUMCCodeEmitter::getMachineOpValueCommon( const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -314,6 +314,7 @@ namespace EncValues { // Encoding values of enum9/8/7 operands enum : unsigned { + REG_IDX_MASK = 255, SGPR_MIN = 0, SGPR_MAX_SI = 101, SGPR_MAX_GFX10 = 105, @@ -329,7 +330,8 @@ LITERAL_CONST = 255, VGPR_MIN = 256, VGPR_MAX = 511, - IS_VGPR = 256 // Indicates VGPR or AGPR + IS_VGPR = 256, // Indicates VGPR or AGPR + IS_HI = 512, // High 16-bit register. }; } // namespace EncValues diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -304,6 +304,16 @@ class VOPDstOperand : RegisterOperand ; +def VOPDstOperand_t16 : VOPDstOperand { + let EncoderMethod = "getMachineOpValueT16"; + let DecoderMethod = "DecodeVGPR_16RegisterClass"; +} + +def VOPDstOperand_t16Lo128 : VOPDstOperand { + let EncoderMethod = "getMachineOpValueT16Lo128"; + let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass"; +} + class VINTRPe op> : Enc32 { bits<8> vdst; bits<8> vsrc; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1170,6 +1170,10 @@ } def FP16InputModsMatchClass : FPInputModsMatchClass<16>; +def FPT16InputModsMatchClass : FPInputModsMatchClass<16> { + let Name = "RegOrImmWithFPT16InputMods"; + let PredicateMethod = "isRegOrImmWithFPT16InputMods"; +} def FP32InputModsMatchClass : FPInputModsMatchClass<32>; def FP64InputModsMatchClass : FPInputModsMatchClass<64>; @@ -1187,6 +1191,7 @@ } def FP16InputMods : FPInputMods; +def FPT16InputMods : FPInputMods; def FP32InputMods : FPInputMods; def FP64InputMods : FPInputMods; @@ -1202,6 +1207,10 @@ let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; } +def IntT16InputModsMatchClass : IntInputModsMatchClass<16> { + let Name = "RegOrImmWithIntT16InputMods"; + let PredicateMethod = "isRegOrImmWithIntT16InputMods"; +} def Int32InputModsMatchClass : IntInputModsMatchClass<32>; def Int64InputModsMatchClass : IntInputModsMatchClass<64>; def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; @@ -1209,6 +1218,7 @@ class IntInputMods : InputMods { let PrintMethod = "printOperandAndIntInputMods"; } +def IntT16InputMods : IntInputMods; def Int32InputMods : IntInputMods; def Int64InputMods : IntInputMods; def Int32VCSrcInputMods : IntInputMods; @@ -1463,15 +1473,18 @@ // Returns the register class to use for the destination of VOP[123C] // instructions for the given VT. -class getVALUDstForVT { +class getVALUDstForVT { + defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16, + VOPDstOperand_t16Lo128), + VOPDstOperand); RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, !if(!eq(VT.Size, 128), VOPDstOperand, !if(!eq(VT.Size, 64), VOPDstOperand, - !if(!eq(VT.Size, 16), VOPDstOperand, + !if(!eq(VT.Size, 16), op16, VOPDstS64orS32)))); // else VT == i1 } -class getVALUDstForVT_t16 { +class getVALUDstForVT_fake16 { RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, !if(!eq(VT.Size, 128), VOPDstOperand, !if(!eq(VT.Size, 64), VOPDstOperand, @@ -1489,7 +1502,7 @@ // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. -class getVOPSrc0ForVT { +class getVOPSrc0ForVT { bit isFP = isFloatType.ret; RegisterOperand ret = @@ -1498,7 +1511,7 @@ VSrc_f64, !if(!eq(VT.Value, f16.Value), !if(IsTrue16, - VSrcT_f16_Lo128, + !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128), VSrc_f16 ), !if(!eq(VT.Value, v2f16.Value), @@ -1514,7 +1527,7 @@ VSrc_b64, !if(!eq(VT.Value, i16.Value), !if(IsTrue16, - VSrcT_b16_Lo128, + !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128), VSrc_b16 ), !if(!eq(VT.Value, v2i16.Value), @@ -1539,13 +1552,17 @@ VGPR_32)))); } -class getVregSrcForVT_t16 { +class getVregSrcForVT_t16 { RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, !if(!eq(VT.Size, 96), VReg_96, !if(!eq(VT.Size, 64), VReg_64, !if(!eq(VT.Size, 48), VReg_64, - !if(!eq(VT.Size, 16), VGPR_32_Lo128, + !if(!eq(VT.Size, 16), + !if(IsFake16, VGPR_32_Lo128, VGPR_16_Lo128), VGPR_32))))); + + RegisterOperand op = !if (!and(!eq(VT.Size, 16), !not(IsFake16)), + VGPRSrc_16_Lo128, RegisterOperand); } class getSDWASrcForVT { @@ -1557,7 +1574,7 @@ // Returns the register class to use for sources of VOP3 instructions for the // given VT. -class getVOP3SrcForVT { +class getVOP3SrcForVT { bit isFP = isFloatType.ret; RegisterOperand ret = !if(!eq(VT.Size, 128), @@ -1574,7 +1591,7 @@ SSrc_i1, !if(isFP, !if(!eq(VT.Value, f16.Value), - VSrc_f16, + !if(IsTrue16, VSrcT_f16, VSrc_f16), !if(!eq(VT.Value, v2f16.Value), VSrc_v2f16, !if(!eq(VT.Value, v4f16.Value), @@ -1584,7 +1601,7 @@ ) ), !if(!eq(VT.Value, i16.Value), - VSrc_b16, + !if(IsTrue16, VSrcT_b16, VSrc_b16), !if(!eq(VT.Value, v2i16.Value), VSrc_v2b16, VSrc_b32 @@ -1631,18 +1648,15 @@ } // Return type of input modifiers operand for specified input operand -class getSrcMod { +class getSrcMod { bit isFP = isFloatType.ret; bit isPacked = isPackedType.ret; Operand ret = !if(!eq(VT.Size, 64), !if(isFP, FP64InputMods, Int64InputMods), - !if(isFP, - !if(!eq(VT.Value, f16.Value), - FP16InputMods, - FP32InputMods - ), - Int32InputMods) - ); + !if(!eq(VT.Size, 16), + !if(isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods), + !if(IsTrue16, IntT16InputMods, IntOpSelMods)), + !if(isFP, FP32InputMods, Int32InputMods))); } class getOpSelMod { @@ -2457,7 +2471,7 @@ let IsTrue16 = 1; let IsRealTrue16 = 1; // Most DstVT are 16-bit, but not all. - let DstRC = getVALUDstForVT_t16.ret; + let DstRC = getVALUDstForVT_fake16.ret; let DstRC64 = getVALUDstForVT.ret; let Src1RC32 = RegisterOperand.ret>; let Src0DPP = getVregSrcForVT_t16.ret; @@ -2471,7 +2485,7 @@ class VOPProfile_Fake16 : VOPProfile { let IsTrue16 = 1; // Most DstVT are 16-bit, but not all - let DstRC = getVALUDstForVT_t16.ret; + let DstRC = getVALUDstForVT_fake16.ret; let DstRC64 = getVALUDstForVT.ret; let Src1RC32 = RegisterOperand.ret>; let Src0DPP = getVregSrcForVT_t16.ret; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1130,6 +1130,30 @@ : RegOrImmOperand ; +class RegOrB16T + : RegOrImmOperand { + let EncoderMethod = "getMachineOpValueT16"; +} + +class RegOrF16T + : RegOrImmOperand { + let EncoderMethod = "getMachineOpValueT16"; +} + +class RegOrB16_Lo128T + : RegOrImmOperand { + let EncoderMethod = "getMachineOpValueT16Lo128"; +} + +class RegOrF16_Lo128T + : RegOrImmOperand { + let EncoderMethod = "getMachineOpValueT16Lo128"; +} + class RegOrB32 : RegOrImmOperand ; @@ -1185,6 +1209,7 @@ : RegOrImmOperand ; + //===----------------------------------------------------------------------===// // SSrc_* Operands with an SGPR or a 32-bit immediate //===----------------------------------------------------------------------===// @@ -1206,8 +1231,30 @@ // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate //===----------------------------------------------------------------------===// +// The current and temporary future default used case for VOP3. def VSrc_b16 : RegOrB16 <"VS_32", "OPERAND_REG_IMM">; def VSrc_f16 : RegOrF16 <"VS_32", "OPERAND_REG_IMM">; + +// True16 VOP3 operands. +def VSrcT_b16 : RegOrB16T <"VS_16", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrcT16"; +} +def VSrcT_f16 : RegOrF16T <"VS_16", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrcT16"; +} + +// True16 VOP1/2/C operands. +def VSrcT_b16_Lo128 : RegOrB16_Lo128T <"VS_16_Lo128", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrcT16_Lo128"; +} +def VSrcT_f16_Lo128 : RegOrF16_Lo128T <"VS_16_Lo128", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrcT16_Lo128"; +} + +// The current and temporary future default used case for fake VOP1/2/C. +def VSrcFake16_b16_Lo128 : RegOrB16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">; +def VSrcFake16_f16_Lo128 : RegOrF16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">; + def VSrc_b32 : RegOrB32 <"VS_32", "OPERAND_REG_IMM">; def VSrc_f32 : RegOrF32 <"VS_32", "OPERAND_REG_IMM">; def VSrc_v2b16 : RegOrV2B16 <"VS_32", "OPERAND_REG_IMM">; @@ -1217,9 +1264,6 @@ def VSrc_v2b32 : RegOrV2B32 <"VS_64", "OPERAND_REG_IMM">; def VSrc_v2f32 : RegOrV2F32 <"VS_64", "OPERAND_REG_IMM">; -def VSrcT_b16_Lo128 : RegOrB16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">; -def VSrcT_f16_Lo128 : RegOrF16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">; - //===----------------------------------------------------------------------===// // VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use // with FMAMK/FMAAK @@ -1228,8 +1272,8 @@ def VSrc_f16_Deferred : RegOrF16_Deferred<"VS_32", "OPERAND_REG_IMM">; def VSrc_f32_Deferred : RegOrF32_Deferred<"VS_32", "OPERAND_REG_IMM">; -def VSrcT_f16_Lo128_Deferred : RegOrF16_Lo128_Deferred<"VS_32_Lo128", - "OPERAND_REG_IMM">; +def VSrcFake16_f16_Lo128_Deferred : RegOrF16_Lo128_Deferred<"VS_32_Lo128", + "OPERAND_REG_IMM">; //===----------------------------------------------------------------------===// // VRegSrc_* Operands with a VGPR @@ -1269,6 +1313,11 @@ let DecoderMethod = "DecodeVGPR_32RegisterClass"; } +def VGPRSrc_16_Lo128 : RegisterOperand { + let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass"; + let EncoderMethod = "getMachineOpValueT16Lo128"; +} + //===----------------------------------------------------------------------===// // ASrc_* Operands with an AccVGPR //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1178,6 +1178,10 @@ /// Is Reg - scalar register bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); +/// \returns if \p Reg occupies the high 16-bits of a 32-bit register. +/// The bit indicating isHi is the LSB of the encoding. +bool isHi(unsigned Reg, const MCRegisterInfo &MRI); + /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2157,6 +2157,10 @@ Reg == AMDGPU::SCC; } +bool isHi(unsigned Reg, const MCRegisterInfo &MRI) { + return MRI.getEncodingValue(Reg) & AMDGPU::EncValues::IS_HI; +} + #define MAP_REG2REG \ using namespace AMDGPU; \ switch(Reg) { \ diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -381,7 +381,7 @@ def VOP_MADAK_F16_t16 : VOP_MADAK { let IsTrue16 = 1; let DstRC = VOPDstOperand; - let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); + let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); } def VOP_MADAK_F32 : VOP_MADAK ; @@ -406,7 +406,7 @@ def VOP_MADMK_F16_t16 : VOP_MADMK { let IsTrue16 = 1; let DstRC = VOPDstOperand; - let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); + let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); } def VOP_MADMK_F32 : VOP_MADMK ;