Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -4261,12 +4261,17 @@ const OperandVector &Operands, bool IsPacked) { OptionalImmIndexMap OptIdx; + int Opc = Inst.getOpcode(); cvtVOP3(Inst, Operands, OptIdx); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { + assert(!IsPacked); + Inst.addOperand(Inst.getOperand(0)); + } + // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 // instruction, and then figure out where to actually put the modifiers - int Opc = Inst.getOpcode(); addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); Index: lib/Target/AMDGPU/VOP3PInstructions.td =================================================================== --- lib/Target/AMDGPU/VOP3PInstructions.td +++ lib/Target/AMDGPU/VOP3PInstructions.td @@ -18,20 +18,33 @@ // Non-packed instructions that use the VOP3P encoding. // VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed. -class VOP3_VOP3PInst : +class VOP3_VOP3PInst : VOP3P_Pseudo { // These operands are only sort of f16 operands. Depending on // op_sel_hi, these may be interpreted as f32. The inline immediate // values are really f16 converted to f32, so we treat these as f16 // operands. let InOperandList = - (ins - FP16InputMods:$src0_modifiers, VCSrc_f16:$src0, - FP16InputMods:$src1_modifiers, VCSrc_f16:$src1, - FP16InputMods:$src2_modifiers, VCSrc_f16:$src2, - clampmod:$clamp, - op_sel:$op_sel, - op_sel_hi:$op_sel_hi); + !if(UseTiedOutput, + (ins + FP16InputMods:$src0_modifiers, VCSrc_f16:$src0, + FP16InputMods:$src1_modifiers, VCSrc_f16:$src1, + FP16InputMods:$src2_modifiers, VCSrc_f16:$src2, + clampmod:$clamp, + VGPR_32:$vdst_in, + op_sel:$op_sel, + op_sel_hi:$op_sel_hi), + (ins + FP16InputMods:$src0_modifiers, VCSrc_f16:$src0, + FP16InputMods:$src1_modifiers, VCSrc_f16:$src1, + FP16InputMods:$src2_modifiers, VCSrc_f16:$src2, + clampmod:$clamp, + op_sel:$op_sel, + op_sel_hi:$op_sel_hi)); + + let Constraints = !if(UseTiedOutput, "$vdst = $vdst_in", ""); + let DisableEncoding = !if(UseTiedOutput, "$vdst_in", ""); let AsmOperands = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp"; } @@ -70,8 +83,8 @@ def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile>; // Clamp modifier is applied after conversion to f16. -def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile>; -def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile>; +def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile, 1>; +def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile, 1>; } let Predicates = [HasMadMix] in { @@ -83,7 +96,8 @@ (V_MAD_MIXLO_F16 $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, - 0) + 0, + (i32 (IMPLICIT_DEF))) >; } // End Predicates = [HasMadMix]