Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -724,7 +724,9 @@ OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); - void cvtSDWA(MCInst &Inst, const OperandVector &Operands, bool IsVOP1); + void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); + void cvtSDWA(MCInst &Inst, const OperandVector &Operands, + uint64_t BasicInstType); }; struct OptionalOperand { @@ -2677,15 +2679,19 @@ } void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { - cvtSDWA(Inst, Operands, true); + cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); } void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { - cvtSDWA(Inst, Operands, false); + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); +} + +void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOPC); } void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, - bool IsVOP1) { + uint64_t BasicInstType) { OptionalImmIndexMap OptionalIdx; unsigned I = 1; @@ -2697,7 +2703,12 @@ for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); // Add the register arguments - if (Op.isRegOrImmWithInputMods()) { + if (BasicInstType == SIInstrFlags::VOPC && + Op.isReg() && + Op.Reg.RegNo == AMDGPU::VCC) { + // VOPC sdwa use "vcc" token as dst. Skip it. + continue; + } else if (Op.isRegOrImmWithInputMods()) { Op.addRegOrImmWithInputModsOperands(Inst, 2); } else if (Op.isImm()) { // Handle optional arguments @@ -2713,15 +2724,27 @@ // V_NOP_sdwa has no optional sdwa arguments return; } - if (IsVOP1) { + switch (BasicInstType) { + case SIInstrFlags::VOP1: { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6); - } else { // VOP2 + break; + } + case SIInstrFlags::VOP2: { addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6); + break; + } + case SIInstrFlags::VOPC: { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6); + break; + } + default: + llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); } } Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -1253,36 +1253,48 @@ } class getInsSDWA { + bit HasFloatModifiers, ValueType DstVT> { - dag ret = !if (!eq(NumSrcArgs, 0), - // VOP1 without input operands (V_NOP) - (ins), - !if (!eq(NumSrcArgs, 1), - !if (HasFloatModifiers, - // VOP1_SDWA with float modifiers - (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel) - /* else */, - // VOP1_SDWA with sext modifier - (ins IntInputMods:$src0_imodifiers, Src0RC:$src0, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel) - /* endif */) + dag ret = !if(!eq(NumSrcArgs, 0), + // VOP1 without input operands (V_NOP) + (ins), + !if(!eq(NumSrcArgs, 1), + !if(HasFloatModifiers, + // VOP1_SDWA with float modifiers + (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel) + /* else */, + // VOP1_SDWA with sext modifier + (ins IntInputMods:$src0_imodifiers, Src0RC:$src0, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel) + /* endif */) /* NumSrcArgs == 2 */, - !if (HasFloatModifiers, - // VOP2_SDWA with float modifiers - (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0, - FPInputMods:$src1_fmodifiers, Src1RC:$src1, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel) - /* else */, - // VOP2_DPP with sext modifier - (ins IntInputMods:$src0_imodifiers, Src0RC:$src0, - IntInputMods:$src1_imodifiers, Src1RC:$src1, - clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, - src0_sel:$src0_sel, src1_sel:$src1_sel) + !if(HasFloatModifiers, + !if(!eq(DstVT.Size, 1), + // VOPC_SDWA with float modifiers + (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0, + FPInputMods:$src1_fmodifiers, Src1RC:$src1, + clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP2_SDWA or VOPC_SDWA with float modifiers + (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0, + FPInputMods:$src1_fmodifiers, Src1RC:$src1, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel) + ), + /* else */ + !if(!eq(DstVT.Size, 1), + // VOPC_SDWA with sext modifiers + (ins IntInputMods:$src0_imodifiers, Src0RC:$src0, + IntInputMods:$src1_imodifiers, Src1RC:$src1, + clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP2_SDWA or VOPC_SDWA with sext modifier + (ins IntInputMods:$src0_imodifiers, Src0RC:$src0, + IntInputMods:$src1_imodifiers, Src1RC:$src1, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel) + ) /* endif */))); } @@ -1290,7 +1302,7 @@ class getOutsExt { dag ret = !if(HasDst, !if(!eq(DstVT.Size, 1), - (outs DstRCDPP:$sdst), // sdst for VOPC + (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions (outs DstRCDPP:$vdst)), (outs)); // V_NOP } @@ -1344,7 +1356,7 @@ ValueType DstVT = i32> { string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), - "$sdst", // use $sdst for VOPC + " vcc", // use vcc token as dst for VOPC instructioins "$vdst"), ""); string src0 = !if(HasFloatModifiers, "$src0_fmodifiers", "$src0_imodifiers"); @@ -1360,7 +1372,10 @@ "", !if(!eq(NumSrcArgs, 1), " $dst_sel $dst_unused $src0_sel", - " $dst_sel $dst_unused $src0_sel $src1_sel" + !if(!eq(DstVT.Size, 1), + " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC + " $dst_sel $dst_unused $src0_sel $src1_sel" + ) ) ); string ret = dst#args#sdwa; @@ -1425,7 +1440,7 @@ field dag Ins64 = getIns64.ret; field dag InsDPP = getInsDPP.ret; - field dag InsSDWA = getInsSDWA.ret; + field dag InsSDWA = getInsSDWA.ret; field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; @@ -1538,6 +1553,11 @@ class VOPC_Class_Profile : VOPC_Profile { let Ins64 = (ins FPInputMods:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); let Asm64 = "$sdst, $src0_modifiers, $src1"; + let InsSDWA = (ins FPInputMods:$src0_fmodifiers, Src0RC64:$src0, + IntInputMods:$src1_imodifiers, Src1RC64:$src1, + clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); + let AsmSDWA = " vcc, $src0_fmodifiers, $src1_imodifiers$clamp $src0_sel $src1_sel"; + } def VOPC_I1_F32_F32 : VOPC_Profile; @@ -2247,6 +2267,18 @@ let isCodeGenOnly = 1; } +class VOPC_SDWA : + VOPC_SDWAe , + VOP_SDWA , + SDWADisableFields

{ + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let hasSideEffects = DefExec; + let AsmMatchConverter = "cvtSdwaVOPC"; + let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]); + let DecoderNamespace = "SDWA"; + let DisableDecoder = DisableVIDecoder; +} + multiclass VOPC_m pattern, string opName, bit DefExec, VOPProfile p, list sched, @@ -2293,6 +2325,8 @@ defm _e64 : VOP3_C_m ; + + def _sdwa : VOPC_SDWA ; } // Special case for class instructions which only have modifiers on @@ -2305,6 +2339,11 @@ defm _e64 : VOP3_C_m , VOP3DisableModFields<1, 0, 0>; + + def _sdwa : VOPC_SDWA { + let src1_fmodifiers = 0; + let src1_imodifiers = ?; + } } multiclass VOPCInst : VOPCX ; -multiclass VOP3_Helper pat, int NumSrcArgs, bit HasMods, - bit VOP3Only = 0> : VOP3_m < - op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods, VOP3Only ->; multiclass VOPC_CLASS_F32 : VOPCClassInst ; @@ -2385,6 +2419,13 @@ multiclass VOPCX_CLASS_F64 : VOPCClassInst ; + +multiclass VOP3_Helper pat, int NumSrcArgs, bit HasMods, + bit VOP3Only = 0> : VOP3_m < + op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods, VOP3Only +>; + multiclass VOP3Inst : VOP3_Helper < Index: llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td +++ llvm/trunk/lib/Target/AMDGPU/VIInstrFormats.td @@ -282,6 +282,10 @@ let Inst{16-9} = src1; let Inst{24-17} = op; let Inst{31-25} = 0x3e; // encoding + + // VOPC disallows dst_sel and dst_unused as they have no effect on destination + let Inst{42-40} = 0x6; + let Inst{44-43} = 0x2; } class EXPe_vi : EXPe { Index: llvm/trunk/test/MC/AMDGPU/vop_sdwa.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/vop_sdwa.s +++ llvm/trunk/test/MC/AMDGPU/vop_sdwa.s @@ -112,6 +112,10 @@ // VI: v_and_b32_sdwa v0, sext(v0), sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 ; encoding: [0xf9,0x00,0x00,0x26,0x00,0x06,0x0e,0x0a] v_and_b32 v0, sext(v0), sext(v0) dst_unused:UNUSED_PAD src1_sel:BYTE_2 +// NOSICI: error: +// VI: v_cmp_class_f32 vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x16,0x12,0x0c] +v_cmp_class_f32 vcc, -v1, sext(v2) src0_sel:BYTE_2 src1_sel:WORD_0 + //===----------------------------------------------------------------------===// // Check VOP1 opcodes //===----------------------------------------------------------------------===// @@ -496,3 +500,62 @@ // VI: v_ldexp_f16_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 ; encoding: [0xf9,0x06,0x02,0x66,0x02,0x06,0x05,0x02] v_ldexp_f16 v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_2 +//===----------------------------------------------------------------------===// +// Check VOPC opcodes +//===----------------------------------------------------------------------===// + +// NOSICI: error: +// VI: v_cmp_eq_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x84,0x7c,0x01,0x16,0x02,0x04] +v_cmp_eq_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmp_nle_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7c,0x01,0x16,0x02,0x04] +v_cmp_nle_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmpx_gt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa8,0x7c,0x01,0x16,0x02,0x04] +v_cmpx_gt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmpx_nlt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xbc,0x7c,0x01,0x16,0x02,0x04] +v_cmpx_nlt_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmp_lt_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x82,0x7d,0x01,0x16,0x02,0x04] +v_cmp_lt_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmp_t_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x8e,0x7d,0x01,0x16,0x02,0x04] +v_cmp_t_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmpx_eq_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xa4,0x7d,0x01,0x16,0x02,0x04] +v_cmpx_eq_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmpx_ne_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xaa,0x7d,0x01,0x16,0x02,0x04] +v_cmpx_ne_i32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmp_f_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x90,0x7d,0x01,0x16,0x02,0x04] +v_cmp_f_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmp_gt_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x98,0x7d,0x01,0x16,0x02,0x04] +v_cmp_gt_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmpx_le_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xb6,0x7d,0x01,0x16,0x02,0x04] +v_cmpx_le_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmpx_ne_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0xba,0x7d,0x01,0x16,0x02,0x04] +v_cmpx_ne_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmp_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x20,0x7c,0x01,0x16,0x02,0x04] +v_cmp_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +// NOSICI: error: +// VI: v_cmpx_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 ; encoding: [0xf9,0x04,0x22,0x7c,0x01,0x16,0x02,0x04] +v_cmpx_class_f32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0