diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -273,6 +273,10 @@ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); } + bool isRegOrImmWithIntT16InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16); + } + bool isRegOrImmWithInt32InputMods() const { return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); } @@ -293,6 +297,10 @@ return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); } + bool isRegOrImmWithFPT16InputMods() const { + return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16); + } + bool isRegOrImmWithFP32InputMods() const { return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); } @@ -512,7 +520,15 @@ return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); } + bool isVCSrcTB16() const { + return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16); + } + bool isVCSrcTB16_Lo128() const { + return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16); + } + + bool isVCSrcFake16B16_Lo128() const { return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); } @@ -532,7 +548,15 @@ return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); } + bool isVCSrcTF16() const { + return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); + } + bool isVCSrcTF16_Lo128() const { + return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16); + } + + bool isVCSrcFake16F16_Lo128() const { return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); } @@ -552,10 +576,16 @@ return isVCSrcF64() || isLiteralImm(MVT::i64); } + bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); } + bool isVSrcTB16_Lo128() const { return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); } + bool isVSrcFake16B16_Lo128() const { + return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16); + } + bool isVSrcB16() const { return isVCSrcB16() || isLiteralImm(MVT::i16); } @@ -588,10 +618,16 @@ return isVCSrcF64() || isLiteralImm(MVT::f64); } + bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); } + bool isVSrcTF16_Lo128() const { return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); } + bool isVSrcFake16F16_Lo128() const { + return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16); + } + bool isVSrcF16() const { return isVCSrcF16() || isLiteralImm(MVT::f16); } diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -234,6 +234,12 @@ bool MandatoryLiteral = false, unsigned ImmWidth = 0) const; + MCOperand decodeVGPR_16(unsigned Val) const; + MCOperand decodeVGPR_16_Lo128(unsigned Val) const; + MCOperand decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val, + bool MandatoryLiteral = false, + unsigned ImmWidth = 0) const; + MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -260,6 +260,76 @@ DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16) DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32) +inline MCOperand AMDGPUDisassembler::decodeVGPR_16(unsigned Val) const { + // Move the suffix bit from pos 9 to pos 0. + return createRegOperand(AMDGPU::VGPR_16RegClassID, + ((Val & 255) << 1) | (Val >> 9)); +} + +static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + // Imm{0-7} is 8-bit VGPR number like for VGPR_32 and Imm{9} is + // opsel_lo for dst and acts like a True16 modifier (.h or .l). + // Imm{8} is not used. + assert(isUInt<10>(Imm) && "10-bit encoding expected"); + assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used"); + + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeVGPR_16(Imm)); +} + +inline MCOperand AMDGPUDisassembler::decodeVGPR_16_Lo128(unsigned Val) const { + // Move the suffix bit from pos 7 to pos 0. + assert(isUInt<8>(Val)); + return createRegOperand(AMDGPU::VGPR_16_Lo128RegClassID, + ((Val & 127) << 1) | (Val >> 7)); +} + +static DecodeStatus +DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + // This uses 8-bit encoding but instead of being 8-bit VGPR number + // like for VGPR_32 this is 7-bit VGPR number and Imm{7} is + // True16 modifier (.h or .l). Used on instructions without opsel. + assert(isUInt<8>(Imm) && "8-bit encoding expected"); + + auto DAsm = static_cast(Decoder); + return addOperand(Inst, DAsm->decodeVGPR_16_Lo128(Imm)); +} + +static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + const auto *DAsm = static_cast(Decoder); + assert(isUInt<9>(Imm) && "9-bit encoding expected"); + + if (Imm & AMDGPU::EncValues::IS_VGPR) { + // When Imm{8} is set (IS_VGPR), Imm{0-7} corresponds to vgpr number. + // Here Imm{0-6} is vgpr number Imm{7} is True16 modifier (.h or .l). + // Note: instructions that use this don't have opsel. + return addOperand(Inst, DAsm->decodeVGPR_16_Lo128(Imm & 0xFF)); + } + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16, + Imm & 0xFF, false, 16)); +} + +static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, + uint64_t /*Addr*/, + const MCDisassembler *Decoder) { + const auto *DAsm = static_cast(Decoder); + assert(isUInt<10>(Imm) && "10-bit encoding expected"); + + if (Imm & AMDGPU::EncValues::IS_VGPR) { + // Imm{0-8} is standard 9-bit encoding for Src operand. Imm{9} is opsel and + // act as True16 modifier (.h or .l). Set Imm{8} to 0 to use decodeVGPR_16 + // helper function. + return addOperand(Inst, DAsm->decodeVGPR_16(Imm & 0x2FF)); + } + return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16, + Imm & 0xFF, false, 16)); +} + static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder) { @@ -1406,6 +1476,52 @@ return createSRegOperand(getTtmpClassId(Width), TTmpIdx); } + if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) + return decodeIntImmed(Val); + + if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) + return decodeFPImmed(ImmWidth, Val); + + if (Val == LITERAL_CONST) { + if (MandatoryLiteral) + // Keep a sentinel value for deferred setting. + return MCOperand::createImm(LITERAL_CONST); + else + return decodeLiteralConstant(); + } + + switch (Width) { + case OPW32: + case OPW16: + case OPWV216: + return decodeSpecialReg32(Val); + case OPW64: + case OPWV232: + return decodeSpecialReg64(Val); + default: + llvm_unreachable("unexpected immediate type"); + } +} + +MCOperand AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, + unsigned Val, + bool MandatoryLiteral, + unsigned ImmWidth) const { + // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been + // decoded earlier. + assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0"); + using namespace AMDGPU::EncValues; + + if (Val <= SGPR_MAX) { + // "SGPR_MIN <= Val" is always true and causes compilation warning. + static_assert(SGPR_MIN == 0); + return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN); + } + + int TTmpIdx = getTTmpIdx(Val); + if (TTmpIdx >= 0) + return createSRegOperand(getTtmpClassId(Width), TTmpIdx); + if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) return decodeIntImmed(Val); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -49,6 +49,14 @@ SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + void getMachineOpValueT16(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + void getMachineOpValueT16Lo128(const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + /// Use a fixup to encode the simm16 field for SOPP branch /// instructions. void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op, @@ -547,6 +555,72 @@ getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI); } +void AMDGPUMCCodeEmitter::getMachineOpValueT16( + const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg()) { + Op = MRI.getEncodingValue(MO.getReg()); + return; + } + + getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI); + + // VGPRs include the suffix/op_sel bit in the register encoding, but + // immediates and SGPRs include it in src_modifiers. Therefore, copy the + // op_sel bit from the src operands into src_modifier operands if Op is + // src_modifiers and the corresponding src is a VGPR. + unsigned OpSelBits = 0; + int SrcMOIdx = -1; + assert(OpNo < INT_MAX); + if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::src0_modifiers)) { + SrcMOIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); + int VDstMOIdx = + AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst); + if (VDstMOIdx != -1) { + auto DstVal = MRI.getEncodingValue(MI.getOperand(VDstMOIdx).getReg()); + if (AMDGPU::isHi(DstVal, MRI)) + OpSelBits |= SISrcMods::DST_OP_SEL; + } + } else if ((int)OpNo == AMDGPU::getNamedOperandIdx( + MI.getOpcode(), AMDGPU::OpName::src1_modifiers)) + SrcMOIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1); + else if ((int)OpNo == AMDGPU::getNamedOperandIdx( + MI.getOpcode(), AMDGPU::OpName::src2_modifiers)) + SrcMOIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src2); + if (SrcMOIdx == -1) + return; + + const MCOperand &SrcMO = MI.getOperand(SrcMOIdx); + if (!SrcMO.isReg()) + return; + + auto SrcReg = SrcMO.getReg(); + if (AMDGPU::isSGPR(SrcReg, &MRI)) + return; + + if (AMDGPU::isHi(SrcReg, MRI)) + OpSelBits |= SISrcMods::OP_SEL_0; + Op |= OpSelBits; +} + +void AMDGPUMCCodeEmitter::getMachineOpValueT16Lo128( + const MCInst &MI, unsigned OpNo, APInt &Op, + SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isReg()) { + auto Encoding = MRI.getEncodingValue(MO.getReg()); + if ((Encoding & (1 << 9))) { // isVGPR + assert((Encoding & (1 << 8)) == 0 && "Did not expect VGPR RegNo > 127"); + Encoding = ((Encoding & 1) << 8) | Encoding; + } + Op = Encoding; + return; + } + getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI); +} + void AMDGPUMCCodeEmitter::getMachineOpValueCommon( const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -299,6 +299,16 @@ class VOPDstOperand : RegisterOperand ; +def VOPDstOperand_t16 : VOPDstOperand { + let EncoderMethod = "getMachineOpValueT16"; + let DecoderMethod = "DecodeVGPR_16RegisterClass"; +} + +def VOPDstOperand_t16Lo128 : VOPDstOperand { + let EncoderMethod = "getMachineOpValueT16Lo128"; + let DecoderMethod = "DecodeVGPR_16_Lo128RegisterClass"; +} + class VINTRPe op> : Enc32 { bits<8> vdst; bits<8> vsrc; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1170,6 +1170,10 @@ } def FP16InputModsMatchClass : FPInputModsMatchClass<16>; +def FPT16InputModsMatchClass : FPInputModsMatchClass<16> { + let Name = "RegOrImmWithFPT16InputMods"; + let PredicateMethod = "isRegOrImmWithFPT16InputMods"; +} def FP32InputModsMatchClass : FPInputModsMatchClass<32>; def FP64InputModsMatchClass : FPInputModsMatchClass<64>; @@ -1187,6 +1191,7 @@ } def FP16InputMods : FPInputMods; +def FPT16InputMods : FPInputMods; def FP32InputMods : FPInputMods; def FP64InputMods : FPInputMods; @@ -1202,6 +1207,10 @@ let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; } +def IntT16InputModsMatchClass : IntInputModsMatchClass<16> { + let Name = "RegOrImmWithIntT16InputMods"; + let PredicateMethod = "isRegOrImmWithIntT16InputMods"; +} def Int32InputModsMatchClass : IntInputModsMatchClass<32>; def Int64InputModsMatchClass : IntInputModsMatchClass<64>; def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; @@ -1209,6 +1218,7 @@ class IntInputMods : InputMods { let PrintMethod = "printOperandAndIntInputMods"; } +def IntT16InputMods : IntInputMods; def Int32InputMods : IntInputMods; def Int64InputMods : IntInputMods; def Int32VCSrcInputMods : IntInputMods; @@ -1454,15 +1464,18 @@ // Returns the register class to use for the destination of VOP[123C] // instructions for the given VT. -class getVALUDstForVT { +class getVALUDstForVT { + defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16, + VOPDstOperand_t16Lo128), + VOPDstOperand); RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, !if(!eq(VT.Size, 128), VOPDstOperand, !if(!eq(VT.Size, 64), VOPDstOperand, - !if(!eq(VT.Size, 16), VOPDstOperand, + !if(!eq(VT.Size, 16), op16, VOPDstS64orS32)))); // else VT == i1 } -class getVALUDstForVT_t16 { +class getVALUDstForVT_not16 { RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, !if(!eq(VT.Size, 128), VOPDstOperand, !if(!eq(VT.Size, 64), VOPDstOperand, @@ -1480,7 +1493,7 @@ // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. -class getVOPSrc0ForVT { +class getVOPSrc0ForVT { bit isFP = isFloatType.ret; RegisterOperand ret = @@ -1489,7 +1502,7 @@ VSrc_f64, !if(!eq(VT.Value, f16.Value), !if(IsTrue16, - VSrcT_f16_Lo128, + !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128), VSrc_f16 ), !if(!eq(VT.Value, v2f16.Value), @@ -1505,7 +1518,7 @@ VSrc_b64, !if(!eq(VT.Value, i16.Value), !if(IsTrue16, - VSrcT_b16_Lo128, + !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128), VSrc_b16 ), !if(!eq(VT.Value, v2i16.Value), @@ -1530,12 +1543,13 @@ VGPR_32)))); } -class getVregSrcForVT_t16 { +class getVregSrcForVT_t16 { RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128, !if(!eq(VT.Size, 96), VReg_96, !if(!eq(VT.Size, 64), VReg_64, !if(!eq(VT.Size, 48), VReg_64, - !if(!eq(VT.Size, 16), VGPR_32_Lo128, + !if(!eq(VT.Size, 16), + !if(IsFake16, VGPR_32_Lo128, VGPR_16_Lo128), VGPR_32))))); } @@ -1548,7 +1562,7 @@ // Returns the register class to use for sources of VOP3 instructions for the // given VT. -class getVOP3SrcForVT { +class getVOP3SrcForVT { bit isFP = isFloatType.ret; RegisterOperand ret = !if(!eq(VT.Size, 128), @@ -1565,7 +1579,7 @@ SSrc_i1, !if(isFP, !if(!eq(VT.Value, f16.Value), - VSrc_f16, + !if(IsTrue16, VSrcT_f16, VSrc_f16), !if(!eq(VT.Value, v2f16.Value), VSrc_v2f16, !if(!eq(VT.Value, v4f16.Value), @@ -1575,7 +1589,7 @@ ) ), !if(!eq(VT.Value, i16.Value), - VSrc_b16, + !if(IsTrue16, VSrcT_b16, VSrc_b16), !if(!eq(VT.Value, v2i16.Value), VSrc_v2b16, VSrc_b32 @@ -1622,18 +1636,15 @@ } // Return type of input modifiers operand for specified input operand -class getSrcMod { +class getSrcMod { bit isFP = isFloatType.ret; bit isPacked = isPackedType.ret; Operand ret = !if(!eq(VT.Size, 64), !if(isFP, FP64InputMods, Int64InputMods), - !if(isFP, - !if(!eq(VT.Value, f16.Value), - FP16InputMods, - FP32InputMods - ), - Int32InputMods) - ); + !if(!eq(VT.Size, 16), + !if(isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods), + !if(IsTrue16, IntT16InputMods, IntOpSelMods)), + !if(isFP, FP32InputMods, Int32InputMods))); } class getOpSelMod { @@ -2447,7 +2458,7 @@ class VOPProfile_True16 : VOPProfile { let IsTrue16 = 1; // Most DstVT are 16-bit, but not all - let DstRC = getVALUDstForVT_t16.ret; + let DstRC = getVALUDstForVT_not16.ret; let DstRC64 = getVALUDstForVT.ret; let Src1RC32 = RegisterOperand.ret>; let Src0DPP = getVregSrcForVT_t16.ret; @@ -2462,7 +2473,7 @@ let IsTrue16 = 1; let IsFake16 = 1; // Most DstVT are 16-bit, but not all - let DstRC = getVALUDstForVT_t16.ret; + let DstRC = getVALUDstForVT_not16.ret; let DstRC64 = getVALUDstForVT.ret; let Src1RC32 = RegisterOperand.ret>; let Src0DPP = getVregSrcForVT_t16.ret; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1130,6 +1130,30 @@ : RegOrImmOperand ; +class RegOrB16T + : RegOrImmOperand { + let EncoderMethod = "getMachineOpValueT16"; +} + +class RegOrF16T + : RegOrImmOperand { + let EncoderMethod = "getMachineOpValueT16"; +} + +class RegOrB16_Lo128T + : RegOrImmOperand { + let EncoderMethod = "getMachineOpValueT16Lo128"; +} + +class RegOrF16_Lo128T + : RegOrImmOperand { + let EncoderMethod = "getMachineOpValueT16Lo128"; +} + class RegOrB32 : RegOrImmOperand ; @@ -1185,6 +1209,7 @@ : RegOrImmOperand ; + //===----------------------------------------------------------------------===// // SSrc_* Operands with an SGPR or a 32-bit immediate //===----------------------------------------------------------------------===// @@ -1208,6 +1233,24 @@ def VSrc_b16 : RegOrB16 <"VS_32", "OPERAND_REG_IMM">; def VSrc_f16 : RegOrF16 <"VS_32", "OPERAND_REG_IMM">; + +def VSrcT_b16 : RegOrB16T <"VS_16", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrcT16"; +} +def VSrcT_f16 : RegOrF16T <"VS_16", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrcT16"; +} + +def VSrcT_b16_Lo128 : RegOrB16_Lo128T <"VS_16_Lo128", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrcT16_Lo128"; +} +def VSrcT_f16_Lo128 : RegOrF16_Lo128T <"VS_16_Lo128", "OPERAND_REG_IMM"> { + let DecoderMethod = "decodeOperand_VSrcT16_Lo128"; +} + +def VSrcFake16_b16_Lo128 : RegOrB16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">; +def VSrcFake16_f16_Lo128 : RegOrF16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">; + def VSrc_b32 : RegOrB32 <"VS_32", "OPERAND_REG_IMM">; def VSrc_f32 : RegOrF32 <"VS_32", "OPERAND_REG_IMM">; def VSrc_v2b16 : RegOrV2B16 <"VS_32", "OPERAND_REG_IMM">; @@ -1217,9 +1260,6 @@ def VSrc_v2b32 : RegOrV2B32 <"VS_64", "OPERAND_REG_IMM">; def VSrc_v2f32 : RegOrV2F32 <"VS_64", "OPERAND_REG_IMM">; -def VSrcT_b16_Lo128 : RegOrB16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">; -def VSrcT_f16_Lo128 : RegOrF16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">; - //===----------------------------------------------------------------------===// // VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use // with FMAMK/FMAAK @@ -1228,8 +1268,8 @@ def VSrc_f16_Deferred : RegOrF16_Deferred<"VS_32", "OPERAND_REG_IMM">; def VSrc_f32_Deferred : RegOrF32_Deferred<"VS_32", "OPERAND_REG_IMM">; -def VSrcT_f16_Lo128_Deferred : RegOrF16_Lo128_Deferred<"VS_32_Lo128", - "OPERAND_REG_IMM">; +def VSrcFake16_f16_Lo128_Deferred : RegOrF16_Lo128_Deferred<"VS_32_Lo128", + "OPERAND_REG_IMM">; //===----------------------------------------------------------------------===// // VRegSrc_* Operands with a VGPR diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1173,6 +1173,10 @@ /// Is Reg - scalar register bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); +/// \returns if \p Reg occupies the high 16-bits of a 32-bit register. +/// The bit indicating isHi is the LSB of the encoding. +bool isHi(unsigned Reg, const MCRegisterInfo &MRI); + /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2120,6 +2120,10 @@ Reg == AMDGPU::SCC; } +bool isHi(unsigned Reg, const MCRegisterInfo &MRI) { + return MRI.getEncodingValue(Reg) & 1; +} + #define MAP_REG2REG \ using namespace AMDGPU; \ switch(Reg) { \ diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -381,7 +381,7 @@ def VOP_MADAK_F16_t16 : VOP_MADAK { let IsTrue16 = 1; let DstRC = VOPDstOperand; - let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); + let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, VGPR_32_Lo128:$src1, ImmOpType:$imm); } def VOP_MADAK_F32 : VOP_MADAK ; @@ -406,7 +406,7 @@ def VOP_MADMK_F16_t16 : VOP_MADMK { let IsTrue16 = 1; let DstRC = VOPDstOperand; - let Ins32 = (ins VSrcT_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); + let Ins32 = (ins VSrcFake16_f16_Lo128_Deferred:$src0, ImmOpType:$imm, VGPR_32_Lo128:$src1); } def VOP_MADMK_F32 : VOP_MADMK ;