Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -599,9 +599,11 @@ case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10: + case AMDGPU::V_CNDMASK_B32_dpp8_gfx10: case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10: @@ -665,6 +667,7 @@ switch (MI->getOpcode()) { default: break; + case AMDGPU::V_CNDMASK_B32_sdwa_gfx10: case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10: case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10: Index: lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP2Instructions.td +++ lib/Target/AMDGPU/VOP2Instructions.td @@ -956,13 +956,15 @@ } // End DecoderNamespace = "SDWA10" //===------------------------------ VOP2be ------------------------------===// - multiclass VOP2be_Real_gfx10 op, string opName, string asmName> { + multiclass VOP2be_Real_e32_gfx10 op, string opName, string asmName> { def _e32_gfx10 : VOP2_Real(opName#"_e32"), SIEncodingFamily.GFX10>, VOP2e(opName#"_e32").Pfl> { VOP2_Pseudo Ps = !cast(opName#"_e32"); let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); } + } + multiclass VOP2be_Real_e64_gfx10 op, string opName, string asmName> { def _e64_gfx10 : VOP3_Real(opName#"_e64"), SIEncodingFamily.GFX10>, VOP3be_gfx10<{0, 1, 0, 0, op{5-0}}, @@ -970,6 +972,8 @@ VOP3_Pseudo Ps = !cast(opName#"_e64"); let AsmString = asmName # Ps.AsmOperands; } + } + multiclass VOP2be_Real_sdwa_gfx10 op, string opName, string asmName> { foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in def _sdwa_gfx10 : VOP_SDWA10_Real(opName#"_sdwa")>, @@ -978,6 +982,28 @@ let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands); let DecoderNamespace = "SDWA10"; } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w32_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in + def _sdwa_w64_gfx10 : + Base_VOP_SDWA10_Real(opName#"_sdwa")>, + VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { + VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); + let AsmString = asmName # Ps.AsmOperands; + let isAsmParserOnly = 1; + let DecoderNamespace = "SDWA10"; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp_gfx10 op, string opName, string asmName> { foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp_gfx10 : VOP2_DPP16(opName#"_dpp"), asmName> { @@ -986,60 +1012,46 @@ let DecoderNamespace = "SDWA10"; } foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w32_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp_w64_gfx10 : + Base_VOP2_DPP16(opName#"_dpp"), asmName> { + string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; + let AsmString = asmName # AsmDPP; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } + } + multiclass VOP2be_Real_dpp8_gfx10 op, string opName, string asmName> { + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in def _dpp8_gfx10 : VOP2_DPP8(opName#"_e32"), asmName> { string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; let AsmString = asmName # !subst(", vcc", "", AsmDPP8); let DecoderNamespace = "DPP8"; } - - let WaveSizePredicate = isWave32 in { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in - def _sdwa_w32_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # !subst("vcc", "vcc_lo", Ps.AsmOperands); - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w32_gfx10 : - Base_VOP2_DPP16(opName#"_dpp"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP); - let isAsmParserOnly = 1; - } - def _dpp8_w32_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave32 - - let WaveSizePredicate = isWave64 in { - foreach _ = BoolToList(opName#"_e32").Pfl.HasExtSDWA9>.ret in - def _sdwa_w64_gfx10 : - Base_VOP_SDWA10_Real(opName#"_sdwa")>, - VOP2_SDWA9Ae(opName#"_sdwa").Pfl> { - VOP2_SDWA_Pseudo Ps = !cast(opName#"_sdwa"); - let AsmString = asmName # Ps.AsmOperands; - let isAsmParserOnly = 1; - let DecoderNamespace = "SDWA10"; - } - def _dpp_w64_gfx10 : - Base_VOP2_DPP16(opName#"_dpp"), asmName> { - string AsmDPP = !cast(opName#"_e32").Pfl.AsmDPP16; - let AsmString = asmName # AsmDPP; - let isAsmParserOnly = 1; - } - def _dpp8_w64_gfx10 : - VOP2_DPP8(opName#"_e32"), asmName> { - string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; - let AsmString = asmName # AsmDPP8; - let isAsmParserOnly = 1; - } - } // End WaveSizePredicate = isWave64 + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w32_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8); + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave32; + } + foreach _ = BoolToList(opName#"_e32").Pfl.HasExtDPP>.ret in + def _dpp8_w64_gfx10 : + VOP2_DPP8(opName#"_e32"), asmName> { + string AsmDPP8 = !cast(opName#"_e32").Pfl.AsmDPP8; + let AsmString = asmName # AsmDPP8; + let isAsmParserOnly = 1; + let WaveSizePredicate = isWave64; + } } //===----------------------------- VOP3Only -----------------------------===// @@ -1060,8 +1072,19 @@ } } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" -multiclass Base_VOP2_Real_gfx10 op> : - VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10; +multiclass VOP2be_Real_gfx10 op, string opName, string asmName> : + VOP2be_Real_e32_gfx10, + VOP2be_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; + +multiclass VOP2e_Real_gfx10 op, string opName, string asmName> : + VOP2_Real_e32_gfx10, + VOP2_Real_e64_gfx10, + VOP2be_Real_sdwa_gfx10, + VOP2be_Real_dpp_gfx10, + VOP2be_Real_dpp8_gfx10; multiclass VOP2_Real_gfx10 op> : VOP2_Real_e32_gfx10, VOP2_Real_e64_gfx10, @@ -1075,7 +1098,6 @@ VOP2_Real_dpp_gfx10_with_name, VOP2_Real_dpp8_gfx10_with_name; -defm V_CNDMASK_B32 : Base_VOP2_Real_gfx10<0x001>; defm V_XNOR_B32 : VOP2_Real_gfx10<0x01e>; defm V_FMAC_F32 : VOP2_Real_gfx10<0x02b>; defm V_FMAMK_F32 : VOP2Only_Real_MADK_gfx10<0x02c>; @@ -1108,6 +1130,9 @@ defm V_SUBREV_CO_CI_U32 : VOP2be_Real_gfx10<0x02a, "V_SUBBREV_U32", "v_subrev_co_ci_u32">; +defm V_CNDMASK_B32 : + VOP2e_Real_gfx10<0x001, "V_CNDMASK_B32", "v_cndmask_b32">; + // VOP3 only. defm V_BFM_B32 : VOP3Only_Real_gfx10<0x363>; defm V_BCNT_U32_B32 : VOP3Only_Real_gfx10<0x364>; Index: test/MC/AMDGPU/gfx10_asm_dpp8.s =================================================================== --- test/MC/AMDGPU/gfx10_asm_dpp8.s +++ test/MC/AMDGPU/gfx10_asm_dpp8.s @@ -510,6 +510,26 @@ v_ldexp_f16_dpp v5, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 // GFX10: encoding: [0xea,0x04,0x0a,0x76,0x01,0x88,0xc6,0xfa] +v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 +// W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +// W64-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 +// W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +// W32-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v0, v1, v2 dpp8:[0,1,2,3,4,5,6,7] fi:1 +// W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +// W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] + v_add_co_ci_u32_dpp v0, vcc_lo, v0, v0, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: [0xe9,0x00,0x00,0x50,0x00,0x77,0x39,0x05] // W64-ERR: error: instruction not supported on this GPU Index: test/MC/AMDGPU/wave32.s =================================================================== --- test/MC/AMDGPU/wave32.s +++ test/MC/AMDGPU/wave32.s @@ -63,6 +63,30 @@ // GFX1032-ERR: error: instruction not supported on this GPU // GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02] +v_cndmask_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] +// GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] + +v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] +// GFX1064-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06] + +v_cndmask_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] +// GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] + +v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] +// GFX1064-ERR: error: instruction not supported on this GPU + +v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +// GFX1032-ERR: error: instruction not supported on this GPU +// GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00] + v_add_co_u32_e32 v2, vcc_lo, s0, v2 // GFX1032-ERR: error: instruction not supported on this GPU // GFX1064-ERR: error: instruction not supported on this GPU Index: test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt =================================================================== --- test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt +++ test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp16.txt @@ -316,6 +316,14 @@ # GFX10: v_mac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x3e,0x01,0x1b,0x00,0x00] 0xfa,0x04,0x0a,0x3e,0x01,0x1b,0x00,0x00 +# W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05] +0xe9,0x04,0x00,0x02,0x01,0x77,0x39,0x05 + +# W32: v_cndmask_b32_dpp v0, v1, v2, vcc_lo dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +# W64: v_cndmask_b32_dpp v0, v1, v2, vcc dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa] +0xea,0x04,0x00,0x02,0x01,0x88,0xc6,0xfa + # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0x1b,0x00,0x00] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0x1b,0x00,0x00] 0xfa,0x04,0x0a,0x50,0x01,0x1b,0x00,0x00 Index: test/MC/Disassembler/AMDGPU/wave32.txt =================================================================== --- test/MC/Disassembler/AMDGPU/wave32.txt +++ test/MC/Disassembler/AMDGPU/wave32.txt @@ -37,6 +37,14 @@ # GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; 0x02,0x07,0x02,0x02 +# GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +# GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD +0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06 + +# GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +# GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00 + # GFX1032: v_add_co_u32_e64 v2, vcc_lo, s0, v2 # GFX1064: v_add_co_u32_e64 v2, vcc, s0, v2 0x02,0x6a,0x0f,0xd7,0x00,0x04,0x02,0x00