Index: llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -890,6 +890,10 @@ Opc == AMDGPU::V_MAC_F32_e32)) return false; + // FIXME: has SDWA but require handling of implicit VCC use + if (Opc == AMDGPU::V_CNDMASK_B32_e32) + return false; + return true; } Index: llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td +++ llvm/trunk/lib/Target/AMDGPU/VOP2Instructions.td @@ -168,6 +168,10 @@ let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in { def _e32 : VOP2_Pseudo , Commutable_REV; + + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } } def _e64 : VOP3_Pseudo .ret>, @@ -294,12 +298,30 @@ let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above. let Asm32 = "$vdst, $src0, $src1, vcc"; let Asm64 = "$vdst, $src0, $src1, $src2"; + let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; + let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst); // Suppress src2 implied by type since the 32-bit encoding uses an // implicit VCC use. let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); + + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, + Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, + clampmod:$clamp, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel); + + let InsDPP = (ins DstRCDPP:$old, + Src0DPP:$src0, + Src1DPP:$src1, + dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, + bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); + let HasExt = 1; + let HasSDWA9 = 1; } def VOP_READLANE : VOPProfile<[i32, i32, i32]> { @@ -820,7 +842,7 @@ def _dpp : VOP2_DPP(NAME#"_e32")>; } -defm V_CNDMASK_B32 : Base_VOP2_Real_e32e64_vi <0x0>; +defm V_CNDMASK_B32 : VOP2_Real_e32e64_vi <0x0>; defm V_ADD_F32 : VOP2_Real_e32e64_vi <0x1>; defm V_SUB_F32 : VOP2_Real_e32e64_vi <0x2>; defm V_SUBREV_F32 : VOP2_Real_e32e64_vi <0x3>; Index: llvm/trunk/test/MC/AMDGPU/vop_dpp.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop_dpp.s +++ llvm/trunk/test/MC/AMDGPU/vop_dpp.s @@ -588,6 +588,14 @@ // GFX9: v_subbrev_co_u32_dpp v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x3c,0x02,0x01,0x09,0xa1] v_subbrev_co_u32 v1, vcc, v2, v3, vcc row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 +// NOSICI: error +// VI9: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00] +v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 + +// NOSICI: error +// VI9: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00] +v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 + //===----------------------------------------------------------------------===// // Check that immideates and scalar regs are not supported //===----------------------------------------------------------------------===// Index: llvm/trunk/test/MC/AMDGPU/vop_sdwa.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop_sdwa.s +++ llvm/trunk/test/MC/AMDGPU/vop_sdwa.s @@ -581,6 +581,20 @@ // GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +// NOSICI: error +// GFX89: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06] +v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD + +// NOSICI: error +// NOVI: error +// GFX9: v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06] +v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD + +// NOSICI: error +// NOVI: error +// GFX9: v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e] +v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD + //===----------------------------------------------------------------------===// // Check VOPC opcodes //===----------------------------------------------------------------------===// Index: llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/dpp_vi.txt @@ -91,4 +91,10 @@ 0xfa 0xe4 0x98 0x2c 0x4c 0x4e 0x00 0xff # VI: v_mac_f16_dpp v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x06,0x02,0x46,0x02,0x01,0x09,0xa1] -0xfa 0x06 0x02 0x46 0x02 0x01 0x09 0xa1 \ No newline at end of file +0xfa 0x06 0x02 0x46 0x02 0x01 0x09 0xa1 + +# VI: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00] +0xfa,0x04,0x0a,0x00,0x01,0xe4,0x00,0x00 + +# VI: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00] +0xfa,0x04,0x0a,0x00,0x01,0x0f,0x01,0x00 Index: llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_gfx9.txt @@ -393,6 +393,15 @@ # GFX9: v_subbrev_co_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] 0xf9 0x06 0x02 0x3c 0x02 0x06 0x05 0x02 +# GFX9: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06] +0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06 + +# GFX9: v_cndmask_b32_sdwa v5, -1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06] +0xf9,0x04,0x0a,0x00,0xc1,0x06,0x86,0x06 + +# GFX9: v_cndmask_b32_sdwa v5, v1, sext(v2), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e] +0xf9,0x04,0x0a,0x00,0x01,0x06,0x06,0x0e + #-----------------------------------------------------------------------------# # VOPC #-----------------------------------------------------------------------------# Index: llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt =================================================================== --- llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt +++ llvm/trunk/test/MC/Disassembler/AMDGPU/sdwa_vi.txt @@ -365,3 +365,6 @@ # VI: v_subbrev_u32_sdwa v1, vcc, v2, v3, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x3c,0x02,0x06,0x05,0x02] 0xf9 0x06 0x02 0x3c 0x02 0x06 0x05 0x02 + +# VI: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06] +0xf9,0x04,0x0a,0x00,0x01,0x00,0x06,0x06