Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -51,10 +51,6 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; -def gi_vop3pmods0 : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - def gi_vop3opselmods : GIComplexOperandMatcher, GIComplexPatternEquiv; Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -263,8 +263,6 @@ SDValue &Clamp, SDValue &Omod) const; bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, - SDValue &Clamp) const; bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods, @@ -2590,17 +2588,6 @@ return true; } -bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, - SDValue &SrcMods, - SDValue &Clamp) const { - SDLoc SL(In); - - // FIXME: Handle clamp and op_sel - Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); - - return SelectVOP3PMods(In, Src, SrcMods); -} - bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const { Src = In; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -153,9 +153,6 @@ InstructionSelector::ComplexRendererFns selectVOP3PMods(MachineOperand &Root) const; - InstructionSelector::ComplexRendererFns - selectVOP3PMods0(MachineOperand &Root) const; - InstructionSelector::ComplexRendererFns selectVOP3OpSelMods0(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2422,23 +2422,6 @@ }}; } -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectVOP3PMods0(MachineOperand &Root) const { - MachineRegisterInfo &MRI - = Root.getParent()->getParent()->getParent()->getRegInfo(); - - Register Src; - unsigned Mods; - std::tie(Src, Mods) = selectVOP3PModsImpl(Root.getReg(), MRI); - - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } , // src_mods - // FIXME: Handle clamp and op_sel - [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } - }}; -} - InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectVOP3Mods_nnan(MachineOperand &Root) const { Register Src; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1302,7 +1302,6 @@ def VOP3OMods : ComplexPattern; def VOP3PMods : ComplexPattern; -def VOP3PMods0 : ComplexPattern; def VOP3OpSel : ComplexPattern; def VOP3OpSel0 : ComplexPattern; @@ -1704,7 +1703,7 @@ !if (HasClamp, (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, - clampmod:$clamp, + clampmod0:$clamp, op_sel:$op_sel, op_sel_hi:$op_sel_hi, neg_lo:$neg_lo, neg_hi:$neg_hi), (ins Src0Mod:$src0_modifiers, Src0RC:$src0, @@ -1716,7 +1715,7 @@ (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Src1Mod:$src1_modifiers, Src1RC:$src1, Src2Mod:$src2_modifiers, Src2RC:$src2, - clampmod:$clamp, + clampmod0:$clamp, op_sel:$op_sel, op_sel_hi:$op_sel_hi, neg_lo:$neg_lo, neg_hi:$neg_hi), (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Index: llvm/lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -34,18 +34,16 @@ class getVOP3PModPat { list ret3 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), - (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))), - (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)), - (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))]; + (DivergentFragOrOp.ret (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers)), + (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)), + (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), - (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))), - (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))]; + (DivergentFragOrOp.ret (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers)), + (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, - (DivergentFragOrOp.ret (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + (DivergentFragOrOp.ret (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, Index: llvm/lib/Target/AMDGPU/VOP3PInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -29,9 +29,14 @@ !con( (ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0, FP16InputMods:$src1_modifiers, VCSrc_f16:$src1, - FP16InputMods:$src2_modifiers, VCSrc_f16:$src2, - clampmod:$clamp), - !if(UseTiedOutput, (ins VGPR_32:$vdst_in), (ins))), + FP16InputMods:$src2_modifiers, VCSrc_f16:$src2), + // FIXME: clampmod0 misbehaves with the non-default vdst_in + // following it. For now workaround thiss by requiring clamp + // in tied patterns. This should use undef_tied_input, but it + // seems underdeveloped and doesns't apply the right register + // class constraints. + !if(UseTiedOutput, (ins clampmod:$clamp, VGPR_32:$vdst_in), + (ins clampmod0:$clamp))), (ins op_sel:$op_sel, op_sel_hi:$op_sel_hi)); let Constraints = !if(UseTiedOutput, "$vdst = $vdst_in", ""); @@ -75,8 +80,8 @@ // The constant will be emitted as a mov, and folded later. // TODO: We could directly encode the immediate now def : GCNPat< - (add (v2i16 (VOP3PMods0 v2i16:$src0, i32:$src0_modifiers, i1:$clamp)), NegSubInlineConstV216:$src1), - (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1, $clamp) + (add (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), NegSubInlineConstV216:$src1), + (V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1) >; multiclass MadFmaMixPats { let SubtargetPredicate = dot_inst.SubtargetPredicate in def : GCNPat < - (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)), + (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)), (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)), (dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), timm:$clamp), (dot_inst $src0_modifiers, VSrc_v2f16:$src0,