diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7735,39 +7735,43 @@ // multiclass avx2_pmovmask { + Intrinsic IntSt128, Intrinsic IntSt256, + X86SchedWriteMaskMove schedX, + X86SchedWriteMaskMove schedY> { def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, - VEX_4V, Sched<[WriteVecMaskedLoad]>; + VEX_4V, Sched<[schedX.RM]>; def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, - VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>; + VEX_4V, VEX_L, Sched<[schedY.RM]>; def mr : AVX28I<0x8e, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, - VEX_4V, Sched<[WriteVecMaskedStore]>; + VEX_4V, Sched<[schedX.MR]>; def Ymr : AVX28I<0x8e, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, - VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>; + VEX_4V, VEX_L, Sched<[schedY.MR]>; } defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", int_x86_avx2_maskload_d, int_x86_avx2_maskload_d_256, int_x86_avx2_maskstore_d, - int_x86_avx2_maskstore_d_256>; + int_x86_avx2_maskstore_d_256, + WriteVecMaskMove32, WriteVecMaskMove32Y>; defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskload_q, int_x86_avx2_maskload_q_256, int_x86_avx2_maskstore_q, - int_x86_avx2_maskstore_q_256>, VEX_W; + int_x86_avx2_maskstore_q_256, + WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W; multiclass maskmov_lowering { diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -352,8 +352,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -392,8 +392,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -367,8 +367,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -343,8 +343,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -343,8 +343,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -341,8 +341,10 @@ def WriteVecStoreY : SchedWrite; def WriteVecStoreNT : SchedWrite; def WriteVecStoreNTY : SchedWrite; -def WriteVecMaskedStore : SchedWrite; -def WriteVecMaskedStoreY : SchedWrite; +def WriteVecMaskedStore32 : SchedWrite; +def WriteVecMaskedStore64 : SchedWrite; +def WriteVecMaskedStore32Y : SchedWrite; +def WriteVecMaskedStore64Y : SchedWrite; def WriteVecMove : SchedWrite; def WriteVecMoveX : SchedWrite; def WriteVecMoveY : SchedWrite; @@ -550,6 +552,14 @@ : X86SchedWriteMaskMove; def WriteFMaskMove64Y : X86SchedWriteMaskMove; +def WriteVecMaskMove32 + : X86SchedWriteMaskMove; +def WriteVecMaskMove64 + : X86SchedWriteMaskMove; +def WriteVecMaskMove32Y + : X86SchedWriteMaskMove; +def WriteVecMaskMove64Y + : X86SchedWriteMaskMove; // Vector width wrappers. def SchedWriteFAdd diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -369,8 +369,10 @@ defm : X86WriteResUnsupported; def : WriteRes; defm : X86WriteResUnsupported; -def : WriteRes; -defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; def : WriteRes; def : WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td --- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td @@ -1093,8 +1093,10 @@ defm : PdWriteRes; defm : PdWriteRes; -defm : PdWriteRes; -defm : PdWriteRes; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; defm : PdWriteRes; defm : PdWriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -670,8 +670,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -311,8 +311,10 @@ def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes; -def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -388,8 +388,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -370,8 +370,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes;