diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -418,7 +418,7 @@ def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">; def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">; -multiclass VOP3P_Real_vi op> { +multiclass VOP3P_Real_vi op> { def _vi : VOP3P_Real(NAME), SIEncodingFamily.VI>, VOP3Pe (NAME).Pfl> { let AssemblerPredicate = HasVOP3PInsts; @@ -426,7 +426,7 @@ } } -multiclass VOP3P_Real_MAI op> { +multiclass VOP3P_Real_MAI op> { def _vi : VOP3P_Real(NAME), SIEncodingFamily.VI>, VOP3Pe_MAI (NAME).Pfl> { let AssemblerPredicate = HasMAIInsts; @@ -434,32 +434,32 @@ } } -defm V_PK_MAD_I16 : VOP3P_Real_vi <0x380>; -defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x381>; -defm V_PK_ADD_I16 : VOP3P_Real_vi <0x382>; -defm V_PK_SUB_I16 : VOP3P_Real_vi <0x383>; -defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x384>; -defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x385>; -defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x386>; -defm V_PK_MAX_I16 : VOP3P_Real_vi <0x387>; -defm V_PK_MIN_I16 : VOP3P_Real_vi <0x388>; -defm V_PK_MAD_U16 : VOP3P_Real_vi <0x389>; - -defm V_PK_ADD_U16 : VOP3P_Real_vi <0x38a>; -defm V_PK_SUB_U16 : VOP3P_Real_vi <0x38b>; -defm V_PK_MAX_U16 : VOP3P_Real_vi <0x38c>; -defm V_PK_MIN_U16 : VOP3P_Real_vi <0x38d>; -defm V_PK_FMA_F16 : VOP3P_Real_vi <0x38e>; -defm V_PK_ADD_F16 : VOP3P_Real_vi <0x38f>; -defm V_PK_MUL_F16 : VOP3P_Real_vi <0x390>; -defm V_PK_MIN_F16 : VOP3P_Real_vi <0x391>; -defm V_PK_MAX_F16 : VOP3P_Real_vi <0x392>; +defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>; +defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>; +defm V_PK_ADD_I16 : VOP3P_Real_vi <0x02>; +defm V_PK_SUB_I16 : VOP3P_Real_vi <0x03>; +defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x04>; +defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x05>; +defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x06>; +defm V_PK_MAX_I16 : VOP3P_Real_vi <0x07>; +defm V_PK_MIN_I16 : VOP3P_Real_vi <0x08>; +defm V_PK_MAD_U16 : VOP3P_Real_vi <0x09>; + +defm V_PK_ADD_U16 : VOP3P_Real_vi <0x0a>; +defm V_PK_SUB_U16 : VOP3P_Real_vi <0x0b>; +defm V_PK_MAX_U16 : VOP3P_Real_vi <0x0c>; +defm V_PK_MIN_U16 : VOP3P_Real_vi <0x0d>; +defm V_PK_FMA_F16 : VOP3P_Real_vi <0x0e>; +defm V_PK_ADD_F16 : VOP3P_Real_vi <0x0f>; +defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>; +defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>; +defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>; let SubtargetPredicate = HasMadMixInsts in { -defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x3a0>; -defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x3a1>; -defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x3a2>; +defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>; +defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>; +defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>; } let SubtargetPredicate = HasFmaMixInsts in { @@ -467,54 +467,54 @@ // The mad_mix instructions were renamed and their behaviors changed, // but the opcode stayed the same so we need to put these in a // different DecoderNamespace to avoid the ambiguity. -defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x3a0>; -defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x3a1>; -defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x3a2>; +defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>; +defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>; +defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>; } } let SubtargetPredicate = HasDot2Insts in { -defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x3a3>; -defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x3a6>; -defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x3a7>; -defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x3a9>; -defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x3ab>; +defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>; +defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>; +defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>; +defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>; +defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>; } // End SubtargetPredicate = HasDot2Insts let SubtargetPredicate = HasDot1Insts in { -defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x3a8>; -defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x3aa>; +defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>; +defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>; } // End SubtargetPredicate = HasDot1Insts let SubtargetPredicate = HasMAIInsts in { -defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x3d8>; -defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x3d9>; -defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MAI <0x3c0>; -defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MAI <0x3c1>; -defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MAI <0x3c2>; -defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MAI <0x3c4>; -defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MAI <0x3c5>; -defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MAI <0x3c8>; -defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MAI <0x3c9>; -defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MAI <0x3ca>; -defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MAI <0x3cc>; -defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x3cd>; -defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MAI <0x3d0>; -defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MAI <0x3d1>; -defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MAI <0x3d2>; -defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MAI <0x3d4>; -defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MAI <0x3d5>; -defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x3e8>; -defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x3e9>; -defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MAI <0x3eb>; -defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x3ec>; -defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x3ed>; +defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>; +defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>; +defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MAI <0x40>; +defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MAI <0x41>; +defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MAI <0x42>; +defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MAI <0x44>; +defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MAI <0x45>; +defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MAI <0x48>; +defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MAI <0x49>; +defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MAI <0x4a>; +defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MAI <0x4c>; +defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MAI <0x4d>; +defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MAI <0x50>; +defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MAI <0x51>; +defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MAI <0x52>; +defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MAI <0x54>; +defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MAI <0x55>; +defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MAI <0x68>; +defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MAI <0x69>; +defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MAI <0x6b>; +defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MAI <0x6c>; +defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MAI <0x6d>; } // End SubtargetPredicate = HasMAIInsts @@ -523,48 +523,48 @@ //===----------------------------------------------------------------------===// let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in { - multiclass VOP3P_Real_gfx10 op> { + multiclass VOP3P_Real_gfx10 op> { def _gfx10 : VOP3P_Real(NAME), SIEncodingFamily.GFX10>, VOP3Pe_gfx10 (NAME).Pfl>; } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" -defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x000>; -defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x001>; -defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x002>; -defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x003>; -defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x004>; -defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x005>; -defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x006>; -defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x007>; -defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x008>; -defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x009>; -defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x00a>; -defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x00b>; -defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x00c>; -defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x00d>; -defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x00e>; -defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x00f>; -defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x010>; -defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x011>; -defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x012>; -defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x020>; -defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x021>; -defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x022>; +defm V_PK_MAD_I16 : VOP3P_Real_gfx10<0x00>; +defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10<0x01>; +defm V_PK_ADD_I16 : VOP3P_Real_gfx10<0x02>; +defm V_PK_SUB_I16 : VOP3P_Real_gfx10<0x03>; +defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10<0x04>; +defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10<0x05>; +defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10<0x06>; +defm V_PK_MAX_I16 : VOP3P_Real_gfx10<0x07>; +defm V_PK_MIN_I16 : VOP3P_Real_gfx10<0x08>; +defm V_PK_MAD_U16 : VOP3P_Real_gfx10<0x09>; +defm V_PK_ADD_U16 : VOP3P_Real_gfx10<0x0a>; +defm V_PK_SUB_U16 : VOP3P_Real_gfx10<0x0b>; +defm V_PK_MAX_U16 : VOP3P_Real_gfx10<0x0c>; +defm V_PK_MIN_U16 : VOP3P_Real_gfx10<0x0d>; +defm V_PK_FMA_F16 : VOP3P_Real_gfx10<0x0e>; +defm V_PK_ADD_F16 : VOP3P_Real_gfx10<0x0f>; +defm V_PK_MUL_F16 : VOP3P_Real_gfx10<0x10>; +defm V_PK_MIN_F16 : VOP3P_Real_gfx10<0x11>; +defm V_PK_MAX_F16 : VOP3P_Real_gfx10<0x12>; +defm V_FMA_MIX_F32 : VOP3P_Real_gfx10<0x20>; +defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10<0x21>; +defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>; let SubtargetPredicate = HasDot2Insts in { -defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x013>; -defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x014>; -defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x015>; -defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x017>; -defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x019>; +defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>; +defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>; +defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>; +defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>; +defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>; } // End SubtargetPredicate = HasDot2Insts let SubtargetPredicate = HasDot1Insts in { -defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x016>; -defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x018>; +defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>; +defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>; } // End SubtargetPredicate = HasDot1Insts diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -296,7 +296,7 @@ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); } -class VOP3Pe op, VOPProfile P> : Enc64 { +class VOP3Pe op, VOPProfile P> : Enc64 { bits<8> vdst; // neg, neg_hi, op_sel put in srcN_modifiers bits<4> src0_modifiers; @@ -320,8 +320,8 @@ let Inst{15} = !if(P.HasClamp, clamp{0}, 0); - let Inst{25-16} = op; - let Inst{31-26} = 0x34; //encoding + let Inst{22-16} = op; + let Inst{31-23} = 0x1a7; //encoding let Inst{40-32} = !if(P.HasSrc0, src0, 0); let Inst{49-41} = !if(P.HasSrc1, src1, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); @@ -332,7 +332,7 @@ let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) } -class VOP3Pe_MAI op, VOPProfile P> : Enc64 { +class VOP3Pe_MAI op, VOPProfile P> : Enc64 { bits<8> vdst; bits<10> src0; bits<10> src1; @@ -349,8 +349,8 @@ let Inst{15} = !if(P.HasClamp, clamp{0}, 0); - let Inst{25-16} = op; - let Inst{31-26} = 0x34; //encoding + let Inst{22-16} = op; + let Inst{31-23} = 0x1a7; //encoding let Inst{40-32} = !if(P.HasSrc0, src0{8-0}, 0); let Inst{49-41} = !if(P.HasSrc1, src1{8-0}, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); @@ -362,8 +362,8 @@ } -class VOP3Pe_gfx10 op, VOPProfile P> : VOP3Pe { - let Inst{31-26} = 0x33; //encoding +class VOP3Pe_gfx10 op, VOPProfile P> : VOP3Pe { + let Inst{31-23} = 0x198; //encoding } class VOP3be_gfx6_gfx7 op, VOPProfile p> : VOP3be

{