Index: lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- lib/Target/AMDGPU/SOPInstructions.td +++ lib/Target/AMDGPU/SOPInstructions.td @@ -442,19 +442,19 @@ [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] >; def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64", - [(set i64:$sdst, (shl i64:$src0, i32:$src1))] + [(set i64:$sdst, (UniformBinFrag i64:$src0, i32:$src1))] >; def S_LSHR_B32 : SOP2_32 <"s_lshr_b32", [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] >; def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64", - [(set i64:$sdst, (srl i64:$src0, i32:$src1))] + [(set i64:$sdst, (UniformBinFrag i64:$src0, i32:$src1))] >; def S_ASHR_I32 : SOP2_32 <"s_ashr_i32", [(set i32:$sdst, (UniformBinFrag i32:$src0, i32:$src1))] >; def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64", - [(set i64:$sdst, (sra i64:$src0, i32:$src1))] + [(set i64:$sdst, (UniformBinFrag i64:$src0, i32:$src1))] >; } // End Defs = [SCC] Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -17,16 +17,16 @@ (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)); list ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT src0), + (DivergentFragOrOp.ret (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, - (node (P.Src0VT src0), + (DivergentFragOrOp.ret (P.Src0VT src0), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT src0)))]; + (DivergentFragOrOp.ret (P.Src0VT src0)))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -35,18 +35,18 @@ class getVOP3PModPat { list ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (DivergentFragOrOp.ret (P.Src0VT !if(P.HasClamp, (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3PMods P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, - (node !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (DivergentFragOrOp.ret !if(P.HasClamp, (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), (P.Src0VT (VOP3PMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3PMods P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + (DivergentFragOrOp.ret (P.Src0VT (VOP3PMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -55,18 +55,18 @@ class getVOP3OpSelPat { list ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (DivergentFragOrOp.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3OpSel P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, - (node !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (DivergentFragOrOp.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), (P.Src0VT (VOP3OpSel P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSel P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSel0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -75,18 +75,18 @@ class getVOP3OpSelModPat { list ret3 = [(set P.DstVT:$vdst, - (node (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), + (DivergentFragOrOp.ret (P.Src0VT !if(P.HasClamp, (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp), (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3OpSelMods P.Src2VT:$src2, i32:$src2_modifiers))))]; list ret2 = [(set P.DstVT:$vdst, - (node !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), + (DivergentFragOrOp.ret !if(P.HasClamp, (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp)), (P.Src0VT (VOP3OpSelMods P.Src0VT:$src0, i32:$src0_modifiers))), (P.Src1VT (VOP3OpSelMods P.Src1VT:$src1, i32:$src1_modifiers))))]; list ret1 = [(set P.DstVT:$vdst, - (node (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; + (DivergentFragOrOp.ret (P.Src0VT (VOP3OpSelMods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp))))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, @@ -94,9 +94,9 @@ } class getVOP3Pat { - list ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; - list ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]; - list ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]; + list ret3 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))]; + list ret2 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0, P.Src1VT:$src1))]; + list ret1 = [(set P.DstVT:$vdst, (DivergentFragOrOp.ret P.Src0VT:$src0))]; list ret = !if(!eq(P.NumSrcArgs, 3), ret3, !if(!eq(P.NumSrcArgs, 2), ret2, ret1)); @@ -381,20 +381,36 @@ let SchedRW = [Write64Bit] in { // These instructions only exist on SI and CI -let SubtargetPredicate = isSICI in { -def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile>; -def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile>; -def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile>; +let SubtargetPredicate = isSICI, Predicates = [isSICI] in { +def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP_PAT_GEN>, shl>; +def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP_PAT_GEN>, srl>; +def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP_PAT_GEN>, sra>; def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile>; -} // End SubtargetPredicate = isSICI +} // End SubtargetPredicate = isSICI, Predicates = [isSICI] -let SubtargetPredicate = isVI in { -def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile>; -def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile>; -def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile>; -} // End SubtargetPredicate = isVI +let SubtargetPredicate = isVI, Predicates = [isVI] in { +def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP_PAT_GEN>, shl>; +def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP_PAT_GEN>, srl>; +def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP_PAT_GEN>, sra>; +} // End SubtargetPredicate = isVI, Predicates = [isVI] } // End SchedRW = [Write64Bit] +let Predicates = [isVI] in { +def : AMDGPUPat < + (shl i64:$x, i32:$y), + (V_LSHLREV_B64 $y, $x) +>; +def : AMDGPUPat < + (srl i64:$x, i32:$y), + (V_LSHRREV_B64 $y, $x) +>; +def : AMDGPUPat < + (sra i64:$x, i32:$y), + (V_ASHRREV_I64 $y, $x) +>; +} + + let SubtargetPredicate = isCIVI in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { @@ -564,6 +580,17 @@ VOP3be_si (NAME).Pfl>; } +multiclass VOP3_Real_e64only_si op> { + def _si: + VOP3_Real(NAME), SIEncodingFamily.SI>, + VOP3e_si (NAME).Pfl> { + // Hack to stop printing _e64 + VOP3_Pseudo ps = !cast(NAME); + let OutOperandList = (outs VReg_64:$vdst); + let AsmString = ps.Mnemonic # " " # ps.AsmOperands; + } +} + } // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" defm V_MAD_LEGACY_F32 : VOP3_Real_si <0x140>; @@ -599,9 +626,9 @@ defm V_CVT_PK_U8_F32 : VOP3_Real_si <0x15e>; defm V_DIV_FIXUP_F32 : VOP3_Real_si <0x15f>; defm V_DIV_FIXUP_F64 : VOP3_Real_si <0x160>; -defm V_LSHL_B64 : VOP3_Real_si <0x161>; -defm V_LSHR_B64 : VOP3_Real_si <0x162>; -defm V_ASHR_I64 : VOP3_Real_si <0x163>; +defm V_LSHL_B64 : VOP3_Real_e64only_si <0x161>; +defm V_LSHR_B64 : VOP3_Real_e64only_si <0x162>; +defm V_ASHR_I64 : VOP3_Real_e64only_si <0x163>; defm V_ADD_F64 : VOP3_Real_si <0x164>; defm V_MUL_F64 : VOP3_Real_si <0x165>; defm V_MIN_F64 : VOP3_Real_si <0x166>; @@ -670,6 +697,17 @@ VOP3Interp_vi (NAME).Pfl>; } +multiclass VOP3_Real_e64only_vi op> { + def _vi: + VOP3_Real(NAME), SIEncodingFamily.VI>, + VOP3e_vi (NAME).Pfl> { + // Hack to stop printing _e64 + VOP3_Pseudo ps = !cast(NAME); + let OutOperandList = (outs VReg_64:$vdst); + let AsmString = ps.Mnemonic # " " # ps.AsmOperands; + } +} + } // End AssemblerPredicates = [isVI], DecoderNamespace = "VI" let AssemblerPredicates = [isVIOnly], DecoderNamespace = "VI" in { @@ -816,9 +854,9 @@ defm V_READLANE_B32 : VOP3_Real_vi <0x289>; defm V_WRITELANE_B32 : VOP3_Real_vi <0x28a>; -defm V_LSHLREV_B64 : VOP3_Real_vi <0x28f>; -defm V_LSHRREV_B64 : VOP3_Real_vi <0x290>; -defm V_ASHRREV_I64 : VOP3_Real_vi <0x291>; +defm V_LSHLREV_B64 : VOP3_Real_e64only_vi <0x28f>; +defm V_LSHRREV_B64 : VOP3_Real_e64only_vi <0x290>; +defm V_ASHRREV_I64 : VOP3_Real_e64only_vi <0x291>; defm V_TRIG_PREOP_F64 : VOP3_Real_vi <0x292>; defm V_LSHL_ADD_U32 : VOP3_Real_vi <0x1fd>; Index: lib/Target/AMDGPU/VOPInstructions.td =================================================================== --- lib/Target/AMDGPU/VOPInstructions.td +++ lib/Target/AMDGPU/VOPInstructions.td @@ -572,6 +572,11 @@ list ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen.ret, []); } +class DivergentFragOrOp { + SDPatternOperator ret = !if(!eq(P.NeedPatGen,PatGenMode.Pattern), + !if(!isa(Op), getDivergentFrag.ret, Op), Op); +} + include "VOPCInstructions.td" include "VOP1Instructions.td" include "VOP2Instructions.td"