Index: lib/Target/X86/X86Instr3DNow.td =================================================================== --- lib/Target/X86/X86Instr3DNow.td +++ lib/Target/X86/X86Instr3DNow.td @@ -38,7 +38,7 @@ [(set VR64:$dst, (!cast( !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, (bitconvert (load_mmx addr:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass I3DNow_conv_rm_int opc, string Mn, @@ -51,7 +51,7 @@ [(set VR64:$dst, (!cast( !strconcat("int_x86_3dnow", Ver, "_", Mn)) (bitconvert (load_mmx addr:$src))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -524,7 +524,7 @@ (From.VT (bitconvert (From.LdFrag addr:$src2))), (iPTR imm))>, AVX512AIi8Base, EVEX_4V, EVEX_CD8, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -783,7 +783,7 @@ (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>, - Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; + Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } //===----------------------------------------------------------------------===// @@ -1742,7 +1742,7 @@ OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>, - EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -1758,7 +1758,7 @@ (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_perm_i_sizes opc, string OpcodeStr, @@ -1860,7 +1860,7 @@ OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, (bitconvert (_.LdFrag addr:$src3)))), 1>, - EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx512_perm_t_mb opc, string OpcodeStr, @@ -1874,7 +1874,7 @@ (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_perm_t_sizes opc, string OpcodeStr, @@ -1955,19 +1955,19 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX5128I, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; def rmkz : AVX5128I, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; + Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; } } } @@ -1980,7 +1980,7 @@ "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbkz : AVX5128I, EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; + Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; def rmb : AVX5128I, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -2067,7 +2067,7 @@ "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), @@ -2094,7 +2094,7 @@ "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; + Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), @@ -2123,7 +2123,7 @@ (_.ScalarLdFrag addr:$src2), imm:$cc))]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -2150,7 +2150,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, - EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCommutable = IsCommutable in def rrk : AVX512BI, - EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_icmp_packed_rmb opc, string OpcodeStr, PatFrag OpNode, @@ -2180,7 +2180,7 @@ "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>, - EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512BI, EVEX_4V, EVEX_K, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_icmp_packed_vl opc, string OpcodeStr, PatFrag OpNode, @@ -2293,7 +2293,7 @@ (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), cond)))]>, - EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCommutable = 1 in def rrik : AVX512AIi8, - EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { @@ -2333,7 +2333,7 @@ (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|", "$dst, $src1, $src2, $cc}"), []>, - EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; def rrik_alt : AVX512AIi8, - EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>, + EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; } @@ -2380,7 +2380,7 @@ (X86VBroadcast (_.ScalarLdFrag addr:$src2)), cond)))]>, - EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmibk : AVX512AIi8, - EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in { @@ -2403,7 +2403,7 @@ !strconcat("vpcmp", Suffix, "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>, - EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>, + EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; def rmibk_alt : AVX512AIi8, - EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>, + EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; } @@ -2546,7 +2546,7 @@ (X86cmpm (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc)>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2557,7 +2557,7 @@ (X86cmpm (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), imm:$cc)>, - EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, @@ -2573,7 +2573,7 @@ (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, - Sched<[sched.Folded, ReadAfterLd]>, + Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, @@ -2582,7 +2582,7 @@ "vcmp"#_.Suffix, "$cc, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $cc">, - EVEX_B, Sched<[sched.Folded, ReadAfterLd]>, + EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; } } @@ -2694,7 +2694,7 @@ [(set _.KRC:$dst, (OpNode _.ScalarIntMemCPat:$src1, (i32 imm:$src2)))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512, - EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -2734,7 +2734,7 @@ [(set _.KRC:$dst,(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i32 imm:$src2)))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; def rmk : AVX512, - EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmb : AVX512, - EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; def rmbk : AVX512, - EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -4644,7 +4644,7 @@ (_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))))>, AVX512BIBase, EVEX_4V, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_binop_rmb opc, string OpcodeStr, SDNode OpNode, @@ -4659,7 +4659,7 @@ (X86VBroadcast (_.ScalarLdFrag addr:$src2))))>, AVX512BIBase, EVEX_4V, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_binop_rm_vl opc, string OpcodeStr, SDNode OpNode, @@ -4775,7 +4775,7 @@ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Src.LdFrag addr:$src2))))>, AVX512BIBase, EVEX_4V, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb : AVX512_maskable, AVX512BIBase, EVEX_4V, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, @@ -4859,7 +4859,7 @@ (_Src.VT (X86VBroadcast (_Src.ScalarLdFrag addr:$src2))))))>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_packs_rm opc, string OpcodeStr, @@ -4880,7 +4880,7 @@ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Src.LdFrag addr:$src2))))>, EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_packs_all_i32_i16 opc, string OpcodeStr, @@ -5072,7 +5072,7 @@ (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))))))>, AVX512BIBase, EVEX_4V, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } // OpNodeMsk is the OpNode to use where element size is important. So use @@ -5096,7 +5096,7 @@ (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2))))))))>, AVX512BIBase, EVEX_4V, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_logic_rmb_vl opc, string OpcodeStr, @@ -5157,7 +5157,7 @@ (_.VT (VecNode _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT)))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), @@ -5171,7 +5171,7 @@ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } } @@ -5202,7 +5202,7 @@ "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.ScalarIntMemCPat:$src2))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), @@ -5217,7 +5217,7 @@ OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } defm rrb_Int : AVX512_maskable_scalar, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, @@ -5320,7 +5320,7 @@ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, - EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable, EVEX_4V, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } } @@ -5563,7 +5563,7 @@ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, - EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable, - EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5588,7 +5588,7 @@ "$src2, $src1", "$src1, $src2", (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5646,7 +5646,7 @@ (bitconvert (_.LdFrag addr:$src2))))), _.ImmAllZerosV)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } // Patterns for compare with 0 that just use the same source twice. @@ -5671,7 +5671,7 @@ (_.ScalarLdFrag addr:$src2))), _.ImmAllZerosV)>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } // Use 512bit version to implement 128/256 bit in case NoVLX. @@ -5839,7 +5839,7 @@ "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>, AVX512BIBase, - EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5993,7 +5993,7 @@ (_.VT (OpNode _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))>, AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6007,7 +6007,7 @@ (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))>, AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_var_shift_sizes opc, string OpcodeStr, SDNode OpNode, @@ -6335,7 +6335,7 @@ _.RC:$src1, (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmb: AVX512_maskable, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_permil_vec_common OpcVar, @@ -6448,7 +6448,7 @@ (OpNode _.RC:$src1, (_.VT (bitconvert (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>, - Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>, EVEX_4V; + Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V; } // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in @@ -6524,7 +6524,7 @@ (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>, - AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; + AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable_3src, - AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6598,7 +6598,7 @@ (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>, - AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; + AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable_3src, AVX512FMA3Base, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6675,7 +6675,7 @@ (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>, - AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; + AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; // Pattern is 312 order so that the load is in a different place from the // 213 and 231 patterns this helps tablegen's duplicate pattern detection. @@ -6685,7 +6685,7 @@ "$src2, ${src3}"##_.BroadcastStr, (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), _.RC:$src1, _.RC:$src2)), 1, 0>, - AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6750,7 +6750,7 @@ defm m_Int: AVX512_maskable_3src_scalar, - AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>; + AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>; defm rb_Int: AVX512_maskable_3src_scalar, Sched<[SchedWriteFMA.Scl.Folded, ReadAfterLd]>; + [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>; def rb : AVX512FMA3S, - AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; + AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable_3src, - AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } } // Constraints = "$src1 = $dst" @@ -7120,7 +7120,7 @@ def rm : SI, - EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; } // hasSideEffects = 0 let isCodeGenOnly = 1 in { def rr_Int : SI, - EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; }//isCodeGenOnly = 1 } @@ -7265,7 +7265,7 @@ !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>, - EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; + EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", (!cast(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">; @@ -7412,7 +7412,7 @@ def rm : AVX512, - EVEX, Sched<[sched.Folded, ReadAfterLd]>; + EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>; } def rr_Int : AVX512, - EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; + EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; def : InstAlias(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">; @@ -7501,7 +7501,7 @@ (_Src.VT _Src.ScalarIntMemCPat:$src2), (i32 FROUND_CURRENT)))>, EVEX_4V, VEX_LIG, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, hasSideEffects = 0 in { def rr : I, - EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8510,7 +8510,7 @@ "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>, EVEX_4V, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8539,13 +8539,13 @@ (ins _.MemOp:$src), OpcodeStr, "$src", "$src", (OpNode (_.VT (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable, - EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8599,7 +8599,7 @@ "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8635,7 +8635,7 @@ (OpNode (_.VT (bitconvert (_.LdFrag addr:$src))), (i32 FROUND_CURRENT))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb : AVX512_maskable, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } multiclass avx512_fp28_p_round opc, string OpcodeStr, X86VectorVTInfo _, @@ -8710,13 +8710,13 @@ (ins _.MemOp:$src), OpcodeStr, "$src", "$src", (fsqrt (_.VT (bitconvert (_.LdFrag addr:$src))))>, EVEX, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable, - EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8771,7 +8771,7 @@ (X86fsqrtRnds (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rb_Int : AVX512_maskable_scalar, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -8842,7 +8842,7 @@ "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { def r : I, Sched<[sched.Folded, ReadAfterLd]>; + []>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -9763,7 +9763,7 @@ (_.VT (X86expand (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass expand_by_vec_width_lowering { @@ -9825,14 +9825,14 @@ OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i32 imm:$src2))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -9888,7 +9888,7 @@ (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), (i32 imm:$src3))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -9920,7 +9920,7 @@ (SrcInfo.VT (bitconvert (SrcInfo.LdFrag addr:$src2))), (i8 imm:$src3)))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -9939,7 +9939,7 @@ (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (i8 imm:$src3))>, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) @@ -9961,7 +9961,7 @@ (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))), (i32 imm:$src3))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10327,7 +10327,7 @@ (CastInfo.VT (X86Shuf128 _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)), (i8 imm:$src3)))))>, - Sched<[sched.Folded, ReadAfterLd]>, + Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride; defm rmbi : AVX512_maskable, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10415,7 +10415,7 @@ (_.VT (X86VAlign _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)), (i8 imm:$src3)))>, - Sched<[sched.Folded, ReadAfterLd]>, + Sched<[sched.Folded, sched.ReadAfterFold]>, EVEX2VEXOverride<"VPALIGNRrmi">; defm rmbi : AVX512_maskable, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -10919,7 +10919,7 @@ OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>, - EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>; + EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } multiclass avx512_insert_elt_bw opc, string OpcodeStr, SDNode OpNode, @@ -10992,7 +10992,7 @@ [(set _.RC:$dst,(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i8 imm:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_shift_packed_all opc, SDNode OpNode, Format MRMr, @@ -11032,7 +11032,7 @@ (OpNode (_src.VT _src.RC:$src1), (_src.VT (bitconvert (_src.LdFrag addr:$src2))))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass avx512_psadbw_packed_all opc, SDNode OpNode, @@ -11134,7 +11134,7 @@ (_.VT (bitconvert (_.LdFrag addr:$src3))), (i8 imm:$src4)), 1, 0>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src, EVEX_B, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. @@ -11360,7 +11360,7 @@ (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))), (i32 imm:$src4), (i32 FROUND_CURRENT))>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmbi : AVX512_maskable_3src, - EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } // Constraints = "$src1 = $dst" } @@ -11413,7 +11413,7 @@ (_src3VT.VT _src3VT.RC:$src3), (i32 imm:$src4), (i32 FROUND_NO_EXC))>, - EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; defm rmi : AVX512_maskable_3src_scalar, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -11647,7 +11647,7 @@ (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>, AVX512FMA3Base, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -11663,7 +11663,7 @@ (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, AVX512FMA3Base, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass VBMI2_shift_var_rm_common Op, string OpStr, SDNode OpNode, @@ -11751,7 +11751,7 @@ (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb : AVX512_maskable_3src, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, - T8PD, Sched<[sched.Folded, ReadAfterLd]>; + T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass VNNI_common Op, string OpStr, SDNode OpNode, @@ -11807,7 +11807,7 @@ (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass VPSHUFBITQMB_common { @@ -11855,7 +11855,7 @@ (OpNode (VTI.VT VTI.RC:$src1), (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), (i8 imm:$src3))>, EVEX_B, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass GF2P8AFFINE_avx512_common Op, string OpStr, SDNode OpNode, Index: lib/Target/X86/X86InstrArithmetic.td =================================================================== --- lib/Target/X86/X86InstrArithmetic.td +++ lib/Target/X86/X86InstrArithmetic.td @@ -46,11 +46,11 @@ // (and possibly third) value from a register. // This is used for instructions that put the memory operands before other // uses. -class SchedLoadReg : Sched<[SW, +class SchedLoadReg : Sched<[Sched.Folded, // Memory operand. ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // Register reads (implicit or explicit). - ReadAfterLd, ReadAfterLd]>; + Sched.ReadAfterFold, Sched.ReadAfterFold]>; // Extra precision multiplication @@ -89,20 +89,20 @@ // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)]>, SchedLoadReg; + (implicit EFLAGS)]>, SchedLoadReg; // AX,DX = AX*[mem16] let mayLoad = 1, hasSideEffects = 0 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), - "mul{w}\t$src", []>, OpSize16, SchedLoadReg; + "mul{w}\t$src", []>, OpSize16, SchedLoadReg; // EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), - "mul{l}\t$src", []>, OpSize32, SchedLoadReg; + "mul{l}\t$src", []>, OpSize32, SchedLoadReg; // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", []>, SchedLoadReg, + "mul{q}\t$src", []>, SchedLoadReg, Requires<[In64BitMode]>; } @@ -128,19 +128,19 @@ // AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", []>, SchedLoadReg; + "imul{b}\t$src", []>, SchedLoadReg; // AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", []>, OpSize16, SchedLoadReg; + "imul{w}\t$src", []>, OpSize16, SchedLoadReg; // EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", []>, OpSize32, SchedLoadReg; + "imul{l}\t$src", []>, OpSize32, SchedLoadReg; // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", []>, SchedLoadReg, + "imul{q}\t$src", []>, SchedLoadReg, Requires<[In64BitMode]>; } } // hasSideEffects @@ -176,19 +176,19 @@ "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>, - Sched<[WriteIMul16Reg.Folded, ReadAfterLd]>, TB, OpSize16; + Sched<[WriteIMul16Reg.Folded, WriteIMul16Reg.ReadAfterFold]>, TB, OpSize16; def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>, - Sched<[WriteIMul32Reg.Folded, ReadAfterLd]>, TB, OpSize32; + Sched<[WriteIMul32Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB, OpSize32; def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>, - Sched<[WriteIMul64Reg.Folded, ReadAfterLd]>, TB; + Sched<[WriteIMul64Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB; } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] @@ -295,17 +295,17 @@ let mayLoad = 1 in { let Defs = [AL,AH,EFLAGS], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", []>, SchedLoadReg; + "div{b}\t$src", []>, SchedLoadReg; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", []>, OpSize16, SchedLoadReg; + "div{w}\t$src", []>, OpSize16, SchedLoadReg; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", []>, SchedLoadReg, OpSize32; + "div{l}\t$src", []>, SchedLoadReg, OpSize32; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", []>, SchedLoadReg, + "div{q}\t$src", []>, SchedLoadReg, Requires<[In64BitMode]>; } @@ -327,16 +327,16 @@ let mayLoad = 1 in { let Defs = [AL,AH,EFLAGS], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", []>, SchedLoadReg; + "idiv{b}\t$src", []>, SchedLoadReg; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", []>, OpSize16, SchedLoadReg; + "idiv{w}\t$src", []>, OpSize16, SchedLoadReg; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - "idiv{l}\t$src", []>, OpSize32, SchedLoadReg; + "idiv{l}\t$src", []>, OpSize32, SchedLoadReg; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", []>, SchedLoadReg, + "idiv{q}\t$src", []>, SchedLoadReg, Requires<[In64BitMode]>; } } // hasSideEffects = 0 @@ -691,7 +691,7 @@ : ITy, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; // BinOpRM_F - Instructions like "cmp reg, [mem]". class BinOpRM_F opcode, string mnemonic, X86TypeInfo typeinfo, @@ -806,8 +806,8 @@ : BinOpMR, - Sched<[WriteALULd, ReadDefault, ReadDefault, ReadDefault, - ReadDefault, ReadDefault, ReadAfterLd]>; + Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>; // BinOpMI - Instructions like "add [mem], imm". class BinOpMI opcode, string mnemonic, X86TypeInfo typeinfo, @@ -839,7 +839,7 @@ : BinOpMI, - Sched<[WriteALULd]>; + Sched<[WriteALU.Folded]>; // BinOpMI8 - Instructions like "add [mem], imm8". class BinOpMI8, - Sched<[WriteALULd]>; + Sched<[WriteALU.Folded]>; // BinOpAI - Instructions like "add %eax, %eax, imm", that imp-def EFLAGS. class BinOpAI opcode, string mnemonic, X86TypeInfo typeinfo, @@ -1259,7 +1259,7 @@ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))]>, - Sched<[WriteALULd, ReadAfterLd]>; + Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; } // Complexity is reduced to give and with immediate a chance to match first. @@ -1328,7 +1328,7 @@ "adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS; } // SchedRW - let mayLoad = 1, SchedRW = [WriteADCLd, ReadAfterLd] in { + let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in { def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD; @@ -1344,5 +1344,5 @@ def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS; - } // mayLoad = 1, SchedRW = [WriteADCLd] + } // mayLoad, SchedRW } Index: lib/Target/X86/X86InstrCMovSetCC.td =================================================================== --- lib/Target/X86/X86InstrCMovSetCC.td +++ lib/Target/X86/X86InstrCMovSetCC.td @@ -38,7 +38,7 @@ } let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - SchedRW = [Sched.Folded, ReadAfterLd] in { + SchedRW = [Sched.Folded, Sched.ReadAfterFold] in { def NAME#16rm : I, - Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } multiclass fma3p_rm_231 opc, string OpcodeStr, RegisterClass RC, @@ -71,7 +71,7 @@ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op RC:$src2, (MemFrag addr:$src3), RC:$src1)))]>, - Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } multiclass fma3p_rm_132 opc, string OpcodeStr, RegisterClass RC, @@ -93,7 +93,7 @@ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (VT (Op (MemFrag addr:$src3), RC:$src1, RC:$src2)))]>, - Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in @@ -192,7 +192,7 @@ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>, - Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } multiclass fma3s_rm_231 opc, string OpcodeStr, @@ -212,7 +212,7 @@ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src2, (load addr:$src3), RC:$src1))]>, - Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } multiclass fma3s_rm_132 opc, string OpcodeStr, @@ -234,7 +234,7 @@ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, (OpNode (load addr:$src3), RC:$src1, RC:$src2))]>, - Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in @@ -279,7 +279,7 @@ (ins RC:$src1, RC:$src2, memopr:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + []>, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; } // The FMA 213 form is created for lowering of scalar FMA intrinscis @@ -402,19 +402,19 @@ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (OpNode RC:$src1, RC:$src2, (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG, - Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; def mr : FMA4S, VEX_LIG, - Sched<[sched.Folded, ReadAfterLd, + Sched<[sched.Folded, sched.ReadAfterFold, // x86memop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // RC:$src3 - ReadAfterLd]>; + sched.ReadAfterFold]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : FMA4S, VEX_W, VEX_LIG, - Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; let mayLoad = 1 in def mr_Int : FMA4S_Int, - VEX_LIG, Sched<[sched.Folded, ReadAfterLd, + VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold, // memop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // VR128::$src3 - ReadAfterLd]>; + sched.ReadAfterFold]>; def rr_Int_REV : FMA4S_Int, VEX_W, - Sched<[sched.XMM.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold, sched.XMM.ReadAfterFold]>; def mr : FMA4, - Sched<[sched.XMM.Folded, ReadAfterLd, + Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold, // f128mem:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // VR128::$src3 - ReadAfterLd]>; + sched.XMM.ReadAfterFold]>; let isCommutable = 1 in def Yrr : FMA4, VEX_W, VEX_L, - Sched<[sched.YMM.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold, sched.YMM.ReadAfterFold]>; def Ymr : FMA4, VEX_L, - Sched<[sched.YMM.Folded, ReadAfterLd, + Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold, // f256mem:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // VR256::$src3 - ReadAfterLd]>; + sched.YMM.ReadAfterFold]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def rr_REV : FMA4; + Sched.ReadAfterFold]>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { @@ -2449,7 +2449,7 @@ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // RC:$src2 - ReadAfterLd]>; + Sched.ReadAfterFold]>; } let Predicates = [HasBMI2], Defs = [EFLAGS] in { @@ -2585,7 +2585,7 @@ def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>, - VEX_4V, Sched<[WriteALULd, ReadAfterLd]>; + VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>; } let Predicates = [HasBMI2] in { Index: lib/Target/X86/X86InstrMMX.td =================================================================== --- lib/Target/X86/X86InstrMMX.td +++ lib/Target/X86/X86InstrMMX.td @@ -47,7 +47,7 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass MMXI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, @@ -64,7 +64,7 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; def ri : MMXIi8, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -122,7 +122,7 @@ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass sse12_cvt_pint opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -553,7 +553,7 @@ [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, (i32 (anyext (loadi16 addr:$src2))), imm:$src3))]>, - Sched<[WriteVecInsertLd, ReadAfterLd]>; + Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } } Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -35,7 +35,7 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], d>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class @@ -57,7 +57,7 @@ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], d>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -81,7 +81,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -103,7 +103,7 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), pat_rm, d>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -652,7 +652,7 @@ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "s", asm_opr), [], SSEPackedSingle>, PS, - Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; + Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; def PDrm : PI, PD, - Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; + Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } multiclass sse12_mov_hilo_packedopc, SDPatternOperator pdnode, @@ -861,7 +861,7 @@ def rm : SI, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } // hasSideEffects = 0 } @@ -1018,7 +1018,7 @@ !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - []>, Sched<[sched.Folded, ReadAfterLd]>; + []>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -1172,7 +1172,7 @@ (ins FR32:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>; + Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; } def : Pat<(f32 (fpround FR64:$src)), @@ -1203,7 +1203,7 @@ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))]>, XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>, - Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>; + Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; let Constraints = "$src1 = $dst" in { def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1217,7 +1217,7 @@ [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))]>, XD, Requires<[UseSSE2]>, - Sched<[WriteCvtSD2SS.Folded, ReadAfterLd]>; + Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; } } // isCodeGenOnly = 1 @@ -1234,7 +1234,7 @@ (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_LIG, VEX_WIG, - Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>, + Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>, Requires<[UseAVX, OptForSize]>; } @@ -1281,7 +1281,7 @@ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_WIG, Requires<[HasAVX]>, - Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>; + Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1293,7 +1293,7 @@ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", []>, XS, Requires<[UseSSE2]>, - Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>; + Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>; } } // isCodeGenOnly = 1 @@ -1812,7 +1812,7 @@ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { @@ -1822,7 +1822,7 @@ let mayLoad = 1 in def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, []>, - Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable; + Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; } } @@ -1864,7 +1864,7 @@ (ins VR128:$src1, memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, mem_cpat:$src, imm:$cc))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let isCodeGenOnly = 1 in { @@ -1906,7 +1906,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), (ld_frag addr:$src2)))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -1924,7 +1924,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), mem_cpat:$src2))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Defs = [EFLAGS] in { @@ -1989,7 +1989,7 @@ (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { @@ -1999,7 +1999,7 @@ let mayLoad = 1 in def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), - asm_alt, [], d>, Sched<[sched.Folded, ReadAfterLd]>, + asm_alt, [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; } } @@ -2095,7 +2095,7 @@ (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), (i8 imm:$src3))))], d>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, @@ -2151,7 +2151,7 @@ asm, [(set RC:$dst, (vt (OpNode RC:$src1, (mem_frag addr:$src2))))], d>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Predicates = [HasAVX, NoVLX] in { @@ -2290,7 +2290,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } // ExeDomain = SSEPackedInt @@ -2727,7 +2727,7 @@ let mayLoad = 1 in def m_Int : I, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -2781,7 +2781,7 @@ let mayLoad = 1 in def m : I, Sched<[sched.Folded, ReadAfterLd]>; + [], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, ExeDomain = d in { def r_Int : I, Sched<[sched.Folded, ReadAfterLd]>; + []>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -3350,7 +3350,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } // ExeDomain = SSEPackedInt @@ -3458,7 +3458,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT (bitconvert (ld_frag addr:$src2))))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; def ri : PDIi8, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass sse4_pack opc, string OpcodeStr, ValueType OutVT, @@ -3688,7 +3688,7 @@ [(set RC:$dst, (OutVT (OpNode (ArgVT RC:$src1), (bitconvert (ld_frag addr:$src2)))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { @@ -3760,7 +3760,7 @@ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (bitconvert (ld_frag addr:$src2)))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { @@ -3868,7 +3868,7 @@ [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), imm:$src3))]>, - Sched<[WriteVecInsertLd, ReadAfterLd]>; + Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } // Extract @@ -4457,7 +4457,7 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (X86Addsub RC:$src1, (ld_frag addr:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Predicates = [HasAVX] in { @@ -4508,7 +4508,7 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, @@ -4526,7 +4526,7 @@ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Predicates = [HasAVX] in { @@ -4658,7 +4658,7 @@ [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. @@ -4681,7 +4681,7 @@ [(set VR128:$dst, (IntId128 VR128:$src1, (bitconvert (ld_frag addr:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass SS3I_binop_rm_int_y opc, string OpcodeStr, @@ -4698,7 +4698,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { @@ -4849,7 +4849,7 @@ [(set RC:$dst, (VT (X86PAlignr RC:$src1, (bitconvert (memop_frag addr:$src2)), (i8 imm:$src3))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -5302,8 +5302,8 @@ !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), - imm:$src3))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>; + (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), imm:$src3))]>, + Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } let Predicates = [HasAVX, NoBWI] in @@ -5328,8 +5328,8 @@ !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), - imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>; + (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), imm:$src3)))]>, + Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } let Predicates = [HasAVX, NoDQI] in @@ -5354,8 +5354,8 @@ !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), - imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>; + (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), imm:$src3)))]>, + Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>; } let Predicates = [HasAVX, NoDQI] in @@ -5387,7 +5387,7 @@ (X86insertps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, - Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>; + Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>; } let ExeDomain = SSEPackedSingle in { @@ -5450,7 +5450,7 @@ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3), !strconcat(OpcodeStr, "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, Sched<[sched.Folded, ReadAfterLd]>; + []>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, hasSideEffects = 0 let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in { @@ -5465,7 +5465,7 @@ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3), !strconcat(OpcodeStr, "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, Sched<[sched.Folded, ReadAfterLd]>; + []>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, hasSideEffects = 0 } @@ -5483,7 +5483,7 @@ (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, Sched<[sched.Folded, ReadAfterLd]>; + []>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, hasSideEffects = 0 let ExeDomain = SSEPackedDouble, hasSideEffects = 0 in { @@ -5498,7 +5498,7 @@ (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2), !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, Sched<[sched.Folded, ReadAfterLd]>; + []>, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, hasSideEffects = 0 } @@ -5526,7 +5526,7 @@ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in { @@ -5549,7 +5549,7 @@ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, (OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 } @@ -5850,7 +5850,7 @@ def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, - Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>, + Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>, VEX, VEX_WIG; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), @@ -5860,7 +5860,7 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, - Sched<[SchedWriteVecTest.YMM.Folded, ReadAfterLd]>, + Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>, VEX, VEX_L, VEX_WIG; } @@ -5872,7 +5872,7 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "ptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, - Sched<[SchedWriteVecTest.XMM.Folded, ReadAfterLd]>; + Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>; } // The bit test instructions below are AVX only @@ -5886,7 +5886,7 @@ def rm : SS48I, - Sched<[sched.Folded, ReadAfterLd]>, VEX; + Sched<[sched.Folded, sched.ReadAfterFold]>, VEX; } let Defs = [EFLAGS], Predicates = [HasAVX] in { @@ -5988,7 +5988,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Predicates = [HasAVX, NoVLX] in { @@ -6126,7 +6126,7 @@ [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } /// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate @@ -6154,7 +6154,7 @@ [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } def BlendCommuteImm2 : SDNodeXForm, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } // Pattern to commute if load is in first source. @@ -6327,12 +6327,12 @@ [(set RC:$dst, (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), RC:$src3))], SSEPackedInt>, TAPD, VEX_4V, - Sched<[sched.Folded, ReadAfterLd, + Sched<[sched.Folded, sched.ReadAfterFold, // x86memop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // RC::$src3 - ReadAfterLd]>; + sched.ReadAfterFold]>; } let Predicates = [HasAVX] in { @@ -6491,7 +6491,7 @@ [(set VR128:$dst, (IntId VR128:$src1, (bitconvert (mem_frag addr:$src2)), XMM0))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } } @@ -6602,7 +6602,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Predicates = [HasAVX] in @@ -6632,7 +6632,7 @@ def rm :SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, Sched<[WritePCmpIStrM.Folded, ReadAfterLd]>; + []>, Sched<[WritePCmpIStrM.Folded, WritePCmpIStrM.ReadAfterFold]>; } let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { @@ -6650,7 +6650,7 @@ def rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, u8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, Sched<[WritePCmpEStrM.Folded, ReadAfterLd]>; + []>, Sched<[WritePCmpEStrM.Folded, WritePCmpEStrM.ReadAfterFold]>; } let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { @@ -6668,7 +6668,7 @@ def rm : SS42AI<0x63, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, Sched<[WritePCmpIStrI.Folded, ReadAfterLd]>; + []>, Sched<[WritePCmpIStrI.Folded, WritePCmpIStrI.ReadAfterFold]>; } let Defs = [ECX, EFLAGS], hasSideEffects = 0 in { @@ -6686,7 +6686,7 @@ def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, u8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, Sched<[WritePCmpEStrI.Folded, ReadAfterLd]>; + []>, Sched<[WritePCmpEStrI.Folded, WritePCmpEStrI.ReadAfterFold]>; } let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { @@ -6716,7 +6716,7 @@ SS42FI, - Sched<[WriteCRC32.Folded, ReadAfterLd]>; + Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>; let Constraints = "$src1 = $dst" in { def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, @@ -6771,7 +6771,7 @@ (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)), (set VR128:$dst, (IntId VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { @@ -6789,7 +6789,8 @@ (int_x86_sha1rnds4 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$src3)))]>, TA, - Sched<[SchedWriteVecIMul.XMM.Folded, ReadAfterLd]>; + Sched<[SchedWriteVecIMul.XMM.Folded, + SchedWriteVecIMul.XMM.ReadAfterFold]>; defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, SchedWriteVecIMul.XMM>; @@ -6832,7 +6833,7 @@ def rm : AES8I, - Sched<[WriteAESDecEnc.Folded, ReadAfterLd]>; + Sched<[WriteAESDecEnc.Folded, WriteAESDecEnc.ReadAfterFold]>; } } @@ -6954,7 +6955,7 @@ [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2), imm:$src3))]>, - Sched<[WriteCLMul.Folded, ReadAfterLd]>; + Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; } // Constraints = "$src1 = $dst" def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1, @@ -6990,7 +6991,7 @@ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set RC:$dst, (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>, - Sched<[WriteCLMul.Folded, ReadAfterLd]>; + Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>; // We can commute a load in the first operand by swapping the sources and // rotating the immediate. @@ -7198,7 +7199,7 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f128mem:$src2, u8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, Sched<[WriteFShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; + []>, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; } // To create a 256-bit all ones value, we should produce VCMPTRUEPS @@ -7334,7 +7335,7 @@ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V, - Sched<[varsched.Folded, ReadAfterLd]>; + Sched<[varsched.Folded, sched.ReadAfterFold]>; def ri : AVXAIi8, VEX_4V, VEX_L, - Sched<[WriteFShuffle256Ld, ReadAfterLd]>; + Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>; } // Immediate transform to help with commuting. @@ -7535,7 +7536,7 @@ [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>, - Sched<[sched.Folded, ReadAfterLd]>, VEX_4V; + Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V; // Pattern to commute if load is in first source. def : Pat<(OpVT (OpNode (bitconvert (memop_frag addr:$src2)), @@ -7800,7 +7801,7 @@ [(set VR256:$dst, (OpVT (X86VPermv VR256:$src1, (bitconvert (mem_frag addr:$src2)))))]>, - Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L; + Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX_4V, VEX_L; } } @@ -7828,7 +7829,7 @@ [(set VR256:$dst, (OpVT (X86VPermi (mem_frag addr:$src1), (i8 imm:$src2))))]>, - Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L; + Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L; } } @@ -7853,7 +7854,7 @@ "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), (i8 imm:$src3)))]>, - Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; + Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; let Predicates = [HasAVX2] in def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2), @@ -7873,7 +7874,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i128mem:$src2, u8imm:$src3), "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; + []>, Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L; } let Predicates = [HasAVX2, NoVLX] in { @@ -8040,7 +8041,8 @@ [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, - VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, ReadAfterLd]>; + VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, + SchedWriteVarVecShift.XMM.ReadAfterFold]>; def Yrr : AVX28I, - VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, ReadAfterLd]>; + VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, + SchedWriteVarVecShift.YMM.ReadAfterFold]>; } let Predicates = [HasAVX2, NoVLX] in { @@ -8154,7 +8157,7 @@ def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, (bitconvert (MemOpFrag addr:$src2)))))]>, - Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>, T8PD; + Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>, T8PD; } } @@ -8173,7 +8176,7 @@ [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (MemOpFrag addr:$src2)), imm:$src3)))], SSEPackedInt>, - Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>; + Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>; } } Index: lib/Target/X86/X86InstrShiftRotate.td =================================================================== --- lib/Target/X86/X86InstrShiftRotate.td +++ lib/Target/X86/X86InstrShiftRotate.td @@ -850,12 +850,12 @@ def rm : I<0xF7, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - VEX, Sched<[WriteShiftLd, + VEX, Sched<[WriteShift.Folded, // x86memop:$src1 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // RC:$src2 - ReadAfterLd]>; + WriteShift.ReadAfterFold]>; } } Index: lib/Target/X86/X86InstrXOP.td =================================================================== --- lib/Target/X86/X86InstrXOP.td +++ lib/Target/X86/X86InstrXOP.td @@ -18,7 +18,7 @@ def rm : IXOP, XOP, - Sched<[SchedWritePHAdd.XMM.Folded, ReadAfterLd]>; + Sched<[SchedWritePHAdd.XMM.Folded, SchedWritePHAdd.XMM.ReadAfterFold]>; } let ExeDomain = SSEPackedInt in { @@ -49,7 +49,7 @@ def rm : IXOP, XOP, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass xop2op128 opc, string OpcodeStr, Intrinsic Int, @@ -60,7 +60,7 @@ def rm : IXOP, XOP, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } multiclass xop2op256 opc, string OpcodeStr, Intrinsic Int, @@ -71,7 +71,7 @@ def Yrm : IXOP, XOP, VEX_L, - Sched<[sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold]>; } let ExeDomain = SSEPackedSingle in { @@ -106,14 +106,14 @@ [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, - XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd]>; + XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold]>; def mr : IXOP, - XOP, Sched<[sched.Folded, ReadAfterLd]>; + XOP, Sched<[sched.Folded, sched.ReadAfterFold]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : IXOP, - XOP, Sched<[sched.Folded, ReadAfterLd]>; + XOP, Sched<[sched.Folded, sched.ReadAfterFold]>; } let ExeDomain = SSEPackedInt in { @@ -182,7 +182,7 @@ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)), - VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>; + VR128:$src3))]>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; } let ExeDomain = SSEPackedInt in { @@ -262,7 +262,7 @@ (vt128 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))), imm:$cc)))]>, - XOP_4V, Sched<[sched.Folded, ReadAfterLd]>; + XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; let isAsmParserOnly = 1, hasSideEffects = 0 in { def ri_alt : IXOPi8, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>, + []>, XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable; } } @@ -311,7 +311,7 @@ [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>, - XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; def rmr : IXOPi8Reg, - XOP_4V, Sched<[sched.Folded, ReadAfterLd, + XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold, // 128mem:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // VR128:$src3 - ReadAfterLd]>; + sched.ReadAfterFold]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rrr_REV : IXOPi8Reg, - XOP_4V, VEX_W, Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; def rmr : IXOPi8Reg, - XOP_4V, Sched<[sched.Folded, ReadAfterLd, + XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold, // x86memop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // RC::$src3 - ReadAfterLd]>; + sched.ReadAfterFold]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rrr_REV : IXOPi8Reg, VEX_W, - Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>; + Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>; def mr : IXOP5, - Sched<[sched.Folded, ReadAfterLd, + Sched<[sched.Folded, sched.ReadAfterFold, // fpmemop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // RC:$src3 - ReadAfterLd]>; + sched.ReadAfterFold]>; // For disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : IXOP5; +// Vector loads are 5/5/6 cycles, so ReadAfterVec*Ld registers needn't be available +// until 5/5/6 cycles after the memory operand. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear // as two micro-ops when queued in the reservation station. Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -81,6 +81,12 @@ // cycles after the memory operand. def : ReadAdvance; +// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available +// until 5/6/7 cycles after the memory operand. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear // as two micro-ops when queued in the reservation station. Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -71,6 +71,12 @@ // cycles after the memory operand. def : ReadAdvance; +// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available +// until 5/6/7 cycles after the memory operand. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear // as two micro-ops when queued in the reservation station. Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -75,6 +75,12 @@ // cycles after the memory operand. def : ReadAdvance; +// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available +// until 5/6/7 cycles after the memory operand. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear // as two micro-ops when queued in the reservation station. Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -71,10 +71,16 @@ let BufferSize=60; } -// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 +// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 // cycles after the memory operand. def : ReadAdvance; +// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available +// until 5/6/7 cycles after the memory operand. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear // as two micro-ops when queued in the reservation station. Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -14,6 +14,9 @@ // but other register operands don't have to be read until the load is ready. // These operands are marked with ReadAfterLd. def ReadAfterLd : SchedRead; +def ReadAfterVecLd : SchedRead; +def ReadAfterVecXLd : SchedRead; +def ReadAfterVecYLd : SchedRead; // Instructions with both a load and a store folded are modeled as a folded // load + WriteRMW. @@ -37,15 +40,19 @@ class X86FoldableSchedWrite : SchedWrite { // The SchedWrite to use when a load is folded into the instruction. SchedWrite Folded; + // The SchedRead to tag register operands than don't need to be ready + // until the folded load has completed. + SchedRead ReadAfterFold; } // Multiclass that produces a linked pair of SchedWrites. -multiclass X86SchedWritePair { +multiclass X86SchedWritePair { // Register-Memory operation. def Ld : SchedWrite; // Register-Register operation. def NAME : X86FoldableSchedWrite { let Folded = !cast(NAME#"Ld"); + let ReadAfterFold = ReadAfter; } } @@ -213,98 +220,98 @@ def WriteFMoveX : SchedWrite; def WriteFMoveY : SchedWrite; -defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. -defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM). -defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM). -defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM). -defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub. -defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM). -defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM). -defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM). -defm WriteFCmp : X86SchedWritePair; // Floating point compare. -defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM). -defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM). -defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM). -defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare. -defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM). -defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM). -defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM). -defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. -defm WriteFMul : X86SchedWritePair; // Floating point multiplication. -defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM). -defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM). -defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM). -defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication. -defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM). -defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM). -defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM). -defm WriteFDiv : X86SchedWritePair; // Floating point division. -defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM). -defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM). -defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM). -defm WriteFDiv64 : X86SchedWritePair; // Floating point double division. -defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM). -defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM). -defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM). -defm WriteFSqrt : X86SchedWritePair; // Floating point square root. -defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM). -defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM). -defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM). -defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root. -defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM). -defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM). -defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM). -defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root. -defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. -defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM). -defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM). -defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM). -defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. -defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM). -defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM). -defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM). -defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. -defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM). -defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM). -defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM). -defm WriteDPPD : X86SchedWritePair; // Floating point double dot product. -defm WriteDPPS : X86SchedWritePair; // Floating point single dot product. -defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM). -defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM). -defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. -defm WriteFRnd : X86SchedWritePair; // Floating point rounding. -defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM). -defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM). -defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. -defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM). -defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM). -defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions. -defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM). -defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM). -defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. -defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM). -defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM). -defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. -defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM). -defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM). -defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. -defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM). -defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM). -defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. -defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM). -defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM). +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. +defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM). +defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM). +defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM). +defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub. +defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM). +defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM). +defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM). +defm WriteFCmp : X86SchedWritePair; // Floating point compare. +defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM). +defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM). +defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM). +defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare. +defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM). +defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM). +defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM). +defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM). +defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM). +defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM). +defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication. +defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM). +defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM). +defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM). +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM). +defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM). +defm WriteFDivZ : X86SchedWritePair; // Floating point division (ZMM). +defm WriteFDiv64 : X86SchedWritePair; // Floating point double division. +defm WriteFDiv64X : X86SchedWritePair; // Floating point double division (XMM). +defm WriteFDiv64Y : X86SchedWritePair; // Floating point double division (YMM). +defm WriteFDiv64Z : X86SchedWritePair; // Floating point double division (ZMM). +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFSqrtX : X86SchedWritePair; // Floating point square root (XMM). +defm WriteFSqrtY : X86SchedWritePair; // Floating point square root (YMM). +defm WriteFSqrtZ : X86SchedWritePair; // Floating point square root (ZMM). +defm WriteFSqrt64 : X86SchedWritePair; // Floating point double square root. +defm WriteFSqrt64X : X86SchedWritePair; // Floating point double square root (XMM). +defm WriteFSqrt64Y : X86SchedWritePair; // Floating point double square root (YMM). +defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZMM). +defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. +defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM). +defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM). +defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM). +defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. +defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM). +defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM). +defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM). +defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM). +defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM). +defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM). +defm WriteDPPD : X86SchedWritePair; // Floating point double dot product. +defm WriteDPPS : X86SchedWritePair; // Floating point single dot product. +defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM). +defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM). +defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. +defm WriteFRnd : X86SchedWritePair; // Floating point rounding. +defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM). +defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM). +defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. +defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM). +defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM). +defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions. +defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM). +defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM). +defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. +defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM). +defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM). +defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. +defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM). +defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM). +defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. +defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM). +defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM). +defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. +defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM). +defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM). // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } // Horizontal Add/Sub (float and integer) -defm WriteFHAdd : X86SchedWritePair; -defm WriteFHAddY : X86SchedWritePair; -defm WriteFHAddZ : X86SchedWritePair; -defm WritePHAdd : X86SchedWritePair; -defm WritePHAddX : X86SchedWritePair; -defm WritePHAddY : X86SchedWritePair; -defm WritePHAddZ : X86SchedWritePair; +defm WriteFHAdd : X86SchedWritePair; +defm WriteFHAddY : X86SchedWritePair; +defm WriteFHAddZ : X86SchedWritePair; +defm WritePHAdd : X86SchedWritePair; +defm WritePHAddX : X86SchedWritePair; +defm WritePHAddY : X86SchedWritePair; +defm WritePHAddZ : X86SchedWritePair; // Vector integer operations. def WriteVecLoad : SchedWrite; @@ -327,54 +334,54 @@ def WriteVecMoveToGpr : SchedWrite; def WriteVecMoveFromGpr : SchedWrite; -defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. -defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM). -defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM). -defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM). -defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals. -defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM). -defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM). -defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM). -defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions. -defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM). -defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM). -defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default). -defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM). -defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM). -defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM). -defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default). -defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM). -defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM). -defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM). -defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default). -defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM). -defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM). -defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM). -defm WritePMULLD : X86SchedWritePair; // Vector PMULLD. -defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM). -defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM). -defm WriteShuffle : X86SchedWritePair; // Vector shuffles. -defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM). -defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM). -defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM). -defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles. -defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM). -defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM). -defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM). -defm WriteBlend : X86SchedWritePair; // Vector blends. -defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM). -defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM). -defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. -defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM). -defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM). -defm WritePSADBW : X86SchedWritePair; // Vector PSADBW. -defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM). -defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM). -defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM). -defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. -defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM). -defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM). -defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS. +defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM). +defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM). +defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM). +defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals. +defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM). +defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM). +defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM). +defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions. +defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM). +defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM). +defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default). +defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM). +defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM). +defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM). +defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default). +defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM). +defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM). +defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM). +defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default). +defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM). +defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM). +defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM). +defm WritePMULLD : X86SchedWritePair; // Vector PMULLD. +defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM). +defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM). +defm WriteShuffle : X86SchedWritePair; // Vector shuffles. +defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM). +defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM). +defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM). +defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles. +defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM). +defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM). +defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM). +defm WriteBlend : X86SchedWritePair; // Vector blends. +defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM). +defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM). +defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. +defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM). +defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM). +defm WritePSADBW : X86SchedWritePair; // Vector PSADBW. +defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM). +defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM). +defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM). +defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. +defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM). +defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM). +defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS. // Vector insert/extract operations. defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element. @@ -388,39 +395,39 @@ def WriteMMXMOVMSK : SchedWrite; // Conversion between integer and float. -defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer. -defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM). -defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM). -defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM). - -defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer. -defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM). -defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM). -defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM). - -defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double. -defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM). -defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM). -defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM). - -defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float. -defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM). -defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM). -defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM). - -defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion. -defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM). -defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM). -defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM). - -defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion. -defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM). -defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM). -defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM). - -defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion. -defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM). -defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM). +defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer. +defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM). +defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM). +defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM). + +defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer. +defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM). +defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM). +defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM). + +defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double. +defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM). +defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM). +defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM). + +defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float. +defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM). +defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM). +defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM). + +defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion. +defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM). +defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM). +defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM). + +defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion. +defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM). +defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM). +defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM). + +defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion. +defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM). +defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM). def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion. def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM). @@ -430,25 +437,25 @@ def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM). // CRC32 instruction. -defm WriteCRC32 : X86SchedWritePair; +defm WriteCRC32 : X86SchedWritePair; // Strings instructions. // Packed Compare Implicit Length Strings, Return Mask -defm WritePCmpIStrM : X86SchedWritePair; +defm WritePCmpIStrM : X86SchedWritePair; // Packed Compare Explicit Length Strings, Return Mask -defm WritePCmpEStrM : X86SchedWritePair; +defm WritePCmpEStrM : X86SchedWritePair; // Packed Compare Implicit Length Strings, Return Index -defm WritePCmpIStrI : X86SchedWritePair; +defm WritePCmpIStrI : X86SchedWritePair; // Packed Compare Explicit Length Strings, Return Index -defm WritePCmpEStrI : X86SchedWritePair; +defm WritePCmpEStrI : X86SchedWritePair; // AES instructions. -defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption. -defm WriteAESIMC : X86SchedWritePair; // InvMixColumn. -defm WriteAESKeyGen : X86SchedWritePair; // Key Generation. +defm WriteAESDecEnc : X86SchedWritePair; // Decryption, encryption. +defm WriteAESIMC : X86SchedWritePair; // InvMixColumn. +defm WriteAESKeyGen : X86SchedWritePair; // Key Generation. // Carry-less multiplication instructions. -defm WriteCLMul : X86SchedWritePair; +defm WriteCLMul : X86SchedWritePair; // EMMS/FEMMS def WriteEMMS : SchedWrite; @@ -461,13 +468,13 @@ def WriteSystem : SchedWrite; // AVX2. -defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles. -defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles. -defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. -defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles. -defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. -defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM). -defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM). +defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles. +defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles. +defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. +defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles. +defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. +defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM). +defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM). // Old microcoded instructions that nobody use. def WriteMicrocoded : SchedWrite; Index: lib/Target/X86/X86ScheduleAtom.td =================================================================== --- lib/Target/X86/X86ScheduleAtom.td +++ lib/Target/X86/X86ScheduleAtom.td @@ -43,6 +43,9 @@ // Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3 // cycles after the memory operand. def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -93,6 +93,12 @@ // cycles after the memory operand. def : ReadAdvance; +// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available until 5 +// cycles after the memory operand. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear // as two micro-ops when dispatched by the schedulers. Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -49,6 +49,9 @@ // Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3 // cycles after the memory operand. def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; // Many SchedWrites are defined in pairs with and without a folded load. // Instructions with folded loads are usually micro-fused, so they only appear Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -87,9 +87,14 @@ // Integer division issued on ALU2. def ZnDivider : ProcResource<1>; -// 4 Cycles load-to use Latency is captured +// 4 Cycles integer load-to use Latency is captured def : ReadAdvance; +// 8 Cycles vector load-to use Latency is captured +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + // The Integer PRF for Zen is 168 entries, and it holds the architectural and // speculative version of the 64-bit integer registers. // Reference: "Software Optimization Guide for AMD Family 17h Processors" Index: test/tools/llvm-mca/X86/read-after-ld-1.s =================================================================== --- test/tools/llvm-mca/X86/read-after-ld-1.s +++ test/tools/llvm-mca/X86/read-after-ld-1.s @@ -15,39 +15,39 @@ # BDWELL-NEXT: Total Cycles: 17 # BDWELL-NEXT: Total uOps: 3 -# BTVER2-NEXT: Total Cycles: 27 +# BTVER2-NEXT: Total Cycles: 25 # BTVER2-NEXT: Total uOps: 2 -# HASWELL-NEXT: Total Cycles: 20 +# HASWELL-NEXT: Total Cycles: 19 # HASWELL-NEXT: Total uOps: 3 -# SANDY-NEXT: Total Cycles: 21 +# SANDY-NEXT: Total Cycles: 20 # SANDY-NEXT: Total uOps: 3 -# SKYLAKE-NEXT: Total Cycles: 19 +# SKYLAKE-NEXT: Total Cycles: 18 # SKYLAKE-NEXT: Total uOps: 3 -# ZNVER1-NEXT: Total Cycles: 24 +# ZNVER1-NEXT: Total Cycles: 20 # ZNVER1-NEXT: Total uOps: 2 # BTVER2: Dispatch Width: 2 -# BTVER2-NEXT: uOps Per Cycle: 0.07 -# BTVER2-NEXT: IPC: 0.07 +# BTVER2-NEXT: uOps Per Cycle: 0.08 +# BTVER2-NEXT: IPC: 0.08 # BTVER2-NEXT: Block RThroughput: 19.0 # ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.08 -# ZNVER1-NEXT: IPC: 0.08 +# ZNVER1-NEXT: uOps Per Cycle: 0.10 +# ZNVER1-NEXT: IPC: 0.10 # ZNVER1-NEXT: Block RThroughput: 1.0 # SANDY: Dispatch Width: 4 -# SANDY-NEXT: uOps Per Cycle: 0.14 +# SANDY-NEXT: uOps Per Cycle: 0.15 # SANDY-NEXT: IPC: 0.10 # SANDY-NEXT: Block RThroughput: 14.0 # HASWELL: Dispatch Width: 4 -# HASWELL-NEXT: uOps Per Cycle: 0.15 -# HASWELL-NEXT: IPC: 0.10 +# HASWELL-NEXT: uOps Per Cycle: 0.16 +# HASWELL-NEXT: IPC: 0.11 # HASWELL-NEXT: Block RThroughput: 7.0 # BDWELL: Dispatch Width: 4 @@ -56,7 +56,7 @@ # BDWELL-NEXT: Block RThroughput: 5.0 # SKYLAKE: Dispatch Width: 6 -# SKYLAKE-NEXT: uOps Per Cycle: 0.16 +# SKYLAKE-NEXT: uOps Per Cycle: 0.17 # SKYLAKE-NEXT: IPC: 0.11 # SKYLAKE-NEXT: Block RThroughput: 3.0 @@ -66,37 +66,37 @@ # BDWELL-NEXT: Index 0123456789 # BTVER2-NEXT: 0123456789 -# BTVER2-NEXT: Index 0123456789 0123456 +# BTVER2-NEXT: Index 0123456789 01234 -# HASWELL-NEXT: 0123456789 +# HASWELL-NEXT: 012345678 # HASWELL-NEXT: Index 0123456789 # SANDY-NEXT: 0123456789 -# SANDY-NEXT: Index 0123456789 0 +# SANDY-NEXT: Index 0123456789 -# SKYLAKE-NEXT: 012345678 +# SKYLAKE-NEXT: 01234567 # SKYLAKE-NEXT: Index 0123456789 # ZNVER1-NEXT: 0123456789 -# ZNVER1-NEXT: Index 0123456789 0123 +# ZNVER1-NEXT: Index 0123456789 -# SKYLAKE: [0,0] DeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1 -# SKYLAKE-NEXT: [0,1] D======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# SKYLAKE: [0,0] DeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1 +# SKYLAKE-NEXT: [0,1] D=====eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 # BDWELL: [0,0] DeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 # BDWELL-NEXT: [0,1] D======eeeeeeeeER vaddps (%rax), %xmm1, %xmm1 -# HASWELL: [0,0] DeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 -# HASWELL-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# HASWELL: [0,0] DeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 +# HASWELL-NEXT: [0,1] D=======eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 -# SANDY: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 -# SANDY-NEXT: [0,1] D=========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# SANDY: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 +# SANDY-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 -# ZNVER1: [0,0] DeeeeeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1 -# ZNVER1-NEXT: [0,1] D===========eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# ZNVER1: [0,0] DeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 +# ZNVER1-NEXT: [0,1] D=======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 -# BTVER2: [0,0] DeeeeeeeeeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 -# BTVER2-NEXT: [0,1] D================eeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# BTVER2: [0,0] DeeeeeeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 +# BTVER2-NEXT: [0,1] D==============eeeeeeeeER vaddps (%rax), %xmm1, %xmm1 # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions @@ -108,8 +108,8 @@ # ALL-NEXT: 0. 1 1.0 1.0 0.0 vdivps %xmm0, %xmm1, %xmm1 # BDWELL-NEXT: 1. 1 7.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 -# BTVER2-NEXT: 1. 1 17.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 -# HASWELL-NEXT: 1. 1 9.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 -# SANDY-NEXT: 1. 1 10.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 -# SKYLAKE-NEXT: 1. 1 7.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 -# ZNVER1-NEXT: 1. 1 12.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# BTVER2-NEXT: 1. 1 15.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# HASWELL-NEXT: 1. 1 8.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# SANDY-NEXT: 1. 1 9.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# SKYLAKE-NEXT: 1. 1 6.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 +# ZNVER1-NEXT: 1. 1 8.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1 Index: test/tools/llvm-mca/X86/read-after-ld-2.s =================================================================== --- test/tools/llvm-mca/X86/read-after-ld-2.s +++ test/tools/llvm-mca/X86/read-after-ld-2.s @@ -17,31 +17,31 @@ # ALL: Iterations: 100 # ALL-NEXT: Instructions: 400 -# BDWELL-NEXT: Total Cycles: 208 +# BDWELL-NEXT: Total Cycles: 142 # BDWELL-NEXT: Total uOps: 500 -# HASWELL-NEXT: Total Cycles: 308 +# HASWELL-NEXT: Total Cycles: 143 # HASWELL-NEXT: Total uOps: 500 # SKYLAKE-NEXT: Total Cycles: 803 # SKYLAKE-NEXT: Total uOps: 500 -# ZNVER1-NEXT: Total Cycles: 407 +# ZNVER1-NEXT: Total Cycles: 110 # ZNVER1-NEXT: Total uOps: 400 # ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.98 -# ZNVER1-NEXT: IPC: 0.98 +# ZNVER1-NEXT: uOps Per Cycle: 3.64 +# ZNVER1-NEXT: IPC: 3.64 # ZNVER1-NEXT: Block RThroughput: 1.0 # HASWELL: Dispatch Width: 4 -# HASWELL-NEXT: uOps Per Cycle: 1.62 -# HASWELL-NEXT: IPC: 1.30 +# HASWELL-NEXT: uOps Per Cycle: 3.50 +# HASWELL-NEXT: IPC: 2.80 # HASWELL-NEXT: Block RThroughput: 1.3 # BDWELL: Dispatch Width: 4 -# BDWELL-NEXT: uOps Per Cycle: 2.40 -# BDWELL-NEXT: IPC: 1.92 +# BDWELL-NEXT: uOps Per Cycle: 3.52 +# BDWELL-NEXT: IPC: 2.82 # BDWELL-NEXT: Block RThroughput: 1.3 # SKYLAKE: Dispatch Width: 6 @@ -52,16 +52,16 @@ # ALL: Timeline view: # BDWELL-NEXT: 0123456789 -# BDWELL-NEXT: Index 0123456789 01234567 +# BDWELL-NEXT: Index 0123456789 01 -# HASWELL-NEXT: 0123456789 01234567 -# HASWELL-NEXT: Index 0123456789 0123456789 +# HASWELL-NEXT: 0123456789 +# HASWELL-NEXT: Index 0123456789 012 # SKYLAKE-NEXT: 0123456789 0123456789 0123456789 01234 # SKYLAKE-NEXT: Index 0123456789 0123456789 0123456789 0123456789 -# ZNVER1-NEXT: 0123456789 0123456789 -# ZNVER1-NEXT: Index 0123456789 0123456789 0123456 +# ZNVER1-NEXT: 0123456789 +# ZNVER1-NEXT: Index 0123456789 # SKYLAKE: [0,0] DeER . . . . . . . . . . . . . . . addl $1, %edx # SKYLAKE-NEXT: [0,1] DeeeeeeeeER . . . . . . . . . . . . . vpaddd (%r8), %ymm0, %ymm0 @@ -101,128 +101,128 @@ # SKYLAKE-NEXT: [8,3] . . D=eE--------------------------------------------------------------R cmpl %edi, %edx # SKYLAKE-NEXT: [9,0] . . D=eE--------------------------------------------------------------R addl $1, %edx -# ZNVER1: [0,0] DeER . . . . . . . . .. addl $1, %edx -# ZNVER1-NEXT: [0,1] DeeeeeeeeER . . . . . . .. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [0,2] DeE-------R . . . . . . .. addq $32, %r8 -# ZNVER1-NEXT: [0,3] D=eE------R . . . . . . .. cmpl %edi, %edx -# ZNVER1-NEXT: [1,0] .DeE------R . . . . . . .. addl $1, %edx -# ZNVER1-NEXT: [1,1] .D===eeeeeeeeER. . . . . . .. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [1,2] .DeE----------R. . . . . . .. addq $32, %r8 -# ZNVER1-NEXT: [1,3] .D=eE---------R. . . . . . .. cmpl %edi, %edx -# ZNVER1-NEXT: [2,0] . DeE---------R. . . . . . .. addl $1, %edx -# ZNVER1-NEXT: [2,1] . D======eeeeeeeeER . . . . . .. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [2,2] . DeE-------------R . . . . . .. addq $32, %r8 -# ZNVER1-NEXT: [2,3] . D=eE------------R . . . . . .. cmpl %edi, %edx -# ZNVER1-NEXT: [3,0] . DeE------------R . . . . . .. addl $1, %edx -# ZNVER1-NEXT: [3,1] . D=========eeeeeeeeER . . . . .. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [3,2] . DeE----------------R . . . . .. addq $32, %r8 -# ZNVER1-NEXT: [3,3] . D=eE---------------R . . . . .. cmpl %edi, %edx -# ZNVER1-NEXT: [4,0] . DeE---------------R . . . . .. addl $1, %edx -# ZNVER1-NEXT: [4,1] . D============eeeeeeeeER . . . .. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [4,2] . DeE-------------------R . . . .. addq $32, %r8 -# ZNVER1-NEXT: [4,3] . D=eE------------------R . . . .. cmpl %edi, %edx -# ZNVER1-NEXT: [5,0] . DeE------------------R . . . .. addl $1, %edx -# ZNVER1-NEXT: [5,1] . D===============eeeeeeeeER . . .. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [5,2] . DeE----------------------R . . .. addq $32, %r8 -# ZNVER1-NEXT: [5,3] . D=eE---------------------R . . .. cmpl %edi, %edx -# ZNVER1-NEXT: [6,0] . .DeE---------------------R . . .. addl $1, %edx -# ZNVER1-NEXT: [6,1] . .D==================eeeeeeeeER. . .. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [6,2] . .DeE-------------------------R. . .. addq $32, %r8 -# ZNVER1-NEXT: [6,3] . .D=eE------------------------R. . .. cmpl %edi, %edx -# ZNVER1-NEXT: [7,0] . . DeE------------------------R. . .. addl $1, %edx -# ZNVER1-NEXT: [7,1] . . D=====================eeeeeeeeER . .. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [7,2] . . DeE----------------------------R . .. addq $32, %r8 -# ZNVER1-NEXT: [7,3] . . D=eE---------------------------R . .. cmpl %edi, %edx -# ZNVER1-NEXT: [8,0] . . DeE---------------------------R . .. addl $1, %edx -# ZNVER1-NEXT: [8,1] . . D========================eeeeeeeeER .. vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [8,2] . . DeE-------------------------------R .. addq $32, %r8 -# ZNVER1-NEXT: [8,3] . . D=eE------------------------------R .. cmpl %edi, %edx -# ZNVER1-NEXT: [9,0] . . DeE------------------------------R .. addl $1, %edx -# ZNVER1-NEXT: [9,1] . . D===========================eeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: [9,2] . . DeE----------------------------------R addq $32, %r8 -# ZNVER1-NEXT: [9,3] . . D=eE---------------------------------R cmpl %edi, %edx - -# HASWELL: [0,0] DeER . . . . . . . . addl $1, %edx -# HASWELL-NEXT: [0,1] DeeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [0,2] DeE-------R . . . . . . addq $32, %r8 -# HASWELL-NEXT: [0,3] .DeE------R . . . . . . cmpl %edi, %edx -# HASWELL-NEXT: [1,0] .DeE------R . . . . . . addl $1, %edx -# HASWELL-NEXT: [1,1] .D==eeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [1,2] . DeE--------R . . . . . . addq $32, %r8 -# HASWELL-NEXT: [1,3] . DeE--------R . . . . . . cmpl %edi, %edx -# HASWELL-NEXT: [2,0] . DeE--------R . . . . . . addl $1, %edx -# HASWELL-NEXT: [2,1] . D===eeeeeeeeER . . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [2,2] . DeE----------R . . . . . addq $32, %r8 -# HASWELL-NEXT: [2,3] . DeE----------R . . . . . cmpl %edi, %edx -# HASWELL-NEXT: [3,0] . DeE---------R . . . . . addl $1, %edx -# HASWELL-NEXT: [3,1] . D=====eeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [3,2] . DeE------------R. . . . . addq $32, %r8 -# HASWELL-NEXT: [3,3] . DeE-----------R. . . . . cmpl %edi, %edx -# HASWELL-NEXT: [4,0] . DeE-----------R. . . . . addl $1, %edx -# HASWELL-NEXT: [4,1] . D=======eeeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [4,2] . .DeE-------------R . . . . addq $32, %r8 -# HASWELL-NEXT: [4,3] . .DeE-------------R . . . . cmpl %edi, %edx -# HASWELL-NEXT: [5,0] . .DeE-------------R . . . . addl $1, %edx -# HASWELL-NEXT: [5,1] . . D========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [5,2] . . DeE---------------R . . . addq $32, %r8 -# HASWELL-NEXT: [5,3] . . DeE---------------R . . . cmpl %edi, %edx -# HASWELL-NEXT: [6,0] . . DeE--------------R . . . addl $1, %edx -# HASWELL-NEXT: [6,1] . . D==========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [6,2] . . DeE-----------------R . . . addq $32, %r8 -# HASWELL-NEXT: [6,3] . . DeE----------------R . . . cmpl %edi, %edx -# HASWELL-NEXT: [7,0] . . DeE----------------R . . . addl $1, %edx -# HASWELL-NEXT: [7,1] . . D============eeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [7,2] . . DeE------------------R . . addq $32, %r8 -# HASWELL-NEXT: [7,3] . . DeE------------------R . . cmpl %edi, %edx -# HASWELL-NEXT: [8,0] . . DeE------------------R . . addl $1, %edx -# HASWELL-NEXT: [8,1] . . .D=============eeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [8,2] . . .DeE--------------------R. . addq $32, %r8 -# HASWELL-NEXT: [8,3] . . .DeE--------------------R. . cmpl %edi, %edx -# HASWELL-NEXT: [9,0] . . . DeE-------------------R. . addl $1, %edx -# HASWELL-NEXT: [9,1] . . . D===============eeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [9,2] . . . DeE----------------------R addq $32, %r8 -# HASWELL-NEXT: [9,3] . . . DeE---------------------R cmpl %edi, %edx - -# BDWELL: [0,0] DeER . . . . . . addl $1, %edx -# BDWELL-NEXT: [0,1] DeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [0,2] DeE------R. . . . . addq $32, %r8 -# BDWELL-NEXT: [0,3] .DeE-----R. . . . . cmpl %edi, %edx -# BDWELL-NEXT: [1,0] .DeE-----R. . . . . addl $1, %edx -# BDWELL-NEXT: [1,1] .D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [1,2] . DeE------R . . . . addq $32, %r8 -# BDWELL-NEXT: [1,3] . DeE------R . . . . cmpl %edi, %edx -# BDWELL-NEXT: [2,0] . DeE------R . . . . addl $1, %edx -# BDWELL-NEXT: [2,1] . D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [2,2] . DeE-------R . . . . addq $32, %r8 -# BDWELL-NEXT: [2,3] . DeE-------R . . . . cmpl %edi, %edx -# BDWELL-NEXT: [3,0] . DeE------R . . . . addl $1, %edx -# BDWELL-NEXT: [3,1] . D==eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [3,2] . DeE--------R . . . addq $32, %r8 -# BDWELL-NEXT: [3,3] . DeE-------R . . . cmpl %edi, %edx -# BDWELL-NEXT: [4,0] . DeE-------R . . . addl $1, %edx -# BDWELL-NEXT: [4,1] . D===eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [4,2] . .DeE--------R . . . addq $32, %r8 -# BDWELL-NEXT: [4,3] . .DeE--------R . . . cmpl %edi, %edx -# BDWELL-NEXT: [5,0] . .DeE--------R . . . addl $1, %edx -# BDWELL-NEXT: [5,1] . . D===eeeeeeeER. . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [5,2] . . DeE---------R. . . addq $32, %r8 -# BDWELL-NEXT: [5,3] . . DeE---------R. . . cmpl %edi, %edx -# BDWELL-NEXT: [6,0] . . DeE--------R. . . addl $1, %edx -# BDWELL-NEXT: [6,1] . . D====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [6,2] . . DeE----------R . . addq $32, %r8 -# BDWELL-NEXT: [6,3] . . DeE---------R . . cmpl %edi, %edx -# BDWELL-NEXT: [7,0] . . DeE---------R . . addl $1, %edx -# BDWELL-NEXT: [7,1] . . D=====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [7,2] . . DeE----------R . . addq $32, %r8 -# BDWELL-NEXT: [7,3] . . DeE----------R . . cmpl %edi, %edx -# BDWELL-NEXT: [8,0] . . DeE----------R . . addl $1, %edx -# BDWELL-NEXT: [8,1] . . .D=====eeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [8,2] . . .DeE-----------R . addq $32, %r8 -# BDWELL-NEXT: [8,3] . . .DeE-----------R . cmpl %edi, %edx -# BDWELL-NEXT: [9,0] . . . DeE----------R . addl $1, %edx -# BDWELL-NEXT: [9,1] . . . D======eeeeeeeER vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [9,2] . . . DeE------------R addq $32, %r8 -# BDWELL-NEXT: [9,3] . . . DeE-----------R cmpl %edi, %edx +# ZNVER1: [0,0] DeER . . . . addl $1, %edx +# ZNVER1-NEXT: [0,1] DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [0,2] DeE-------R . . addq $32, %r8 +# ZNVER1-NEXT: [0,3] D=eE------R . . cmpl %edi, %edx +# ZNVER1-NEXT: [1,0] .DeE------R . . addl $1, %edx +# ZNVER1-NEXT: [1,1] .DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [1,2] .DeE-------R . . addq $32, %r8 +# ZNVER1-NEXT: [1,3] .D=eE------R . . cmpl %edi, %edx +# ZNVER1-NEXT: [2,0] . DeE------R . . addl $1, %edx +# ZNVER1-NEXT: [2,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [2,2] . DeE-------R . . addq $32, %r8 +# ZNVER1-NEXT: [2,3] . D=eE------R . . cmpl %edi, %edx +# ZNVER1-NEXT: [3,0] . DeE------R . . addl $1, %edx +# ZNVER1-NEXT: [3,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [3,2] . DeE-------R . . addq $32, %r8 +# ZNVER1-NEXT: [3,3] . D=eE------R . . cmpl %edi, %edx +# ZNVER1-NEXT: [4,0] . DeE------R . . addl $1, %edx +# ZNVER1-NEXT: [4,1] . DeeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [4,2] . DeE-------R. . addq $32, %r8 +# ZNVER1-NEXT: [4,3] . D=eE------R. . cmpl %edi, %edx +# ZNVER1-NEXT: [5,0] . DeE------R. . addl $1, %edx +# ZNVER1-NEXT: [5,1] . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [5,2] . DeE-------R . addq $32, %r8 +# ZNVER1-NEXT: [5,3] . D=eE------R . cmpl %edi, %edx +# ZNVER1-NEXT: [6,0] . .DeE------R . addl $1, %edx +# ZNVER1-NEXT: [6,1] . .DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [6,2] . .DeE-------R . addq $32, %r8 +# ZNVER1-NEXT: [6,3] . .D=eE------R . cmpl %edi, %edx +# ZNVER1-NEXT: [7,0] . . DeE------R . addl $1, %edx +# ZNVER1-NEXT: [7,1] . . DeeeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [7,2] . . DeE-------R . addq $32, %r8 +# ZNVER1-NEXT: [7,3] . . D=eE------R . cmpl %edi, %edx +# ZNVER1-NEXT: [8,0] . . DeE------R . addl $1, %edx +# ZNVER1-NEXT: [8,1] . . DeeeeeeeeER. vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [8,2] . . DeE-------R. addq $32, %r8 +# ZNVER1-NEXT: [8,3] . . D=eE------R. cmpl %edi, %edx +# ZNVER1-NEXT: [9,0] . . DeE------R. addl $1, %edx +# ZNVER1-NEXT: [9,1] . . DeeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: [9,2] . . DeE-------R addq $32, %r8 +# ZNVER1-NEXT: [9,3] . . D=eE------R cmpl %edi, %edx + +# HASWELL: [0,0] DeER . . . . . addl $1, %edx +# HASWELL-NEXT: [0,1] DeeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [0,2] DeE-------R . . . addq $32, %r8 +# HASWELL-NEXT: [0,3] .DeE------R . . . cmpl %edi, %edx +# HASWELL-NEXT: [1,0] .DeE------R . . . addl $1, %edx +# HASWELL-NEXT: [1,1] .DeeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [1,2] . DeE------R . . . addq $32, %r8 +# HASWELL-NEXT: [1,3] . DeE------R . . . cmpl %edi, %edx +# HASWELL-NEXT: [2,0] . DeE------R . . . addl $1, %edx +# HASWELL-NEXT: [2,1] . DeeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [2,2] . DeE-------R . . . addq $32, %r8 +# HASWELL-NEXT: [2,3] . DeE-------R . . . cmpl %edi, %edx +# HASWELL-NEXT: [3,0] . DeE------R . . . addl $1, %edx +# HASWELL-NEXT: [3,1] . DeeeeeeeeER. . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [3,2] . DeE-------R. . . addq $32, %r8 +# HASWELL-NEXT: [3,3] . DeE------R. . . cmpl %edi, %edx +# HASWELL-NEXT: [4,0] . DeE------R. . . addl $1, %edx +# HASWELL-NEXT: [4,1] . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [4,2] . .DeE------R . . addq $32, %r8 +# HASWELL-NEXT: [4,3] . .DeE------R . . cmpl %edi, %edx +# HASWELL-NEXT: [5,0] . .DeE------R . . addl $1, %edx +# HASWELL-NEXT: [5,1] . . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [5,2] . . DeE-------R . . addq $32, %r8 +# HASWELL-NEXT: [5,3] . . DeE-------R . . cmpl %edi, %edx +# HASWELL-NEXT: [6,0] . . DeE------R . . addl $1, %edx +# HASWELL-NEXT: [6,1] . . DeeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [6,2] . . DeE-------R . . addq $32, %r8 +# HASWELL-NEXT: [6,3] . . DeE------R . . cmpl %edi, %edx +# HASWELL-NEXT: [7,0] . . DeE------R . . addl $1, %edx +# HASWELL-NEXT: [7,1] . . DeeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [7,2] . . DeE------R. . addq $32, %r8 +# HASWELL-NEXT: [7,3] . . DeE------R. . cmpl %edi, %edx +# HASWELL-NEXT: [8,0] . . DeE------R. . addl $1, %edx +# HASWELL-NEXT: [8,1] . . .DeeeeeeeeER. vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [8,2] . . .DeE-------R. addq $32, %r8 +# HASWELL-NEXT: [8,3] . . .DeE-------R. cmpl %edi, %edx +# HASWELL-NEXT: [9,0] . . . DeE------R. addl $1, %edx +# HASWELL-NEXT: [9,1] . . . DeeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [9,2] . . . DeE-------R addq $32, %r8 +# HASWELL-NEXT: [9,3] . . . DeE------R cmpl %edi, %edx + +# BDWELL: [0,0] DeER . . . .. addl $1, %edx +# BDWELL-NEXT: [0,1] DeeeeeeeER. . .. vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [0,2] DeE------R. . .. addq $32, %r8 +# BDWELL-NEXT: [0,3] .DeE-----R. . .. cmpl %edi, %edx +# BDWELL-NEXT: [1,0] .DeE-----R. . .. addl $1, %edx +# BDWELL-NEXT: [1,1] .DeeeeeeeER . .. vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [1,2] . DeE-----R . .. addq $32, %r8 +# BDWELL-NEXT: [1,3] . DeE-----R . .. cmpl %edi, %edx +# BDWELL-NEXT: [2,0] . DeE-----R . .. addl $1, %edx +# BDWELL-NEXT: [2,1] . DeeeeeeeER . .. vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [2,2] . DeE------R . .. addq $32, %r8 +# BDWELL-NEXT: [2,3] . DeE------R . .. cmpl %edi, %edx +# BDWELL-NEXT: [3,0] . DeE-----R . .. addl $1, %edx +# BDWELL-NEXT: [3,1] . DeeeeeeeER . .. vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [3,2] . DeE------R . .. addq $32, %r8 +# BDWELL-NEXT: [3,3] . DeE-----R . .. cmpl %edi, %edx +# BDWELL-NEXT: [4,0] . DeE-----R . .. addl $1, %edx +# BDWELL-NEXT: [4,1] . DeeeeeeeER. .. vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [4,2] . .DeE-----R. .. addq $32, %r8 +# BDWELL-NEXT: [4,3] . .DeE-----R. .. cmpl %edi, %edx +# BDWELL-NEXT: [5,0] . .DeE-----R. .. addl $1, %edx +# BDWELL-NEXT: [5,1] . . DeeeeeeeER .. vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [5,2] . . DeE------R .. addq $32, %r8 +# BDWELL-NEXT: [5,3] . . DeE------R .. cmpl %edi, %edx +# BDWELL-NEXT: [6,0] . . DeE-----R .. addl $1, %edx +# BDWELL-NEXT: [6,1] . . DeeeeeeeER .. vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [6,2] . . DeE------R .. addq $32, %r8 +# BDWELL-NEXT: [6,3] . . DeE-----R .. cmpl %edi, %edx +# BDWELL-NEXT: [7,0] . . DeE-----R .. addl $1, %edx +# BDWELL-NEXT: [7,1] . . DeeeeeeeER .. vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [7,2] . . DeE-----R .. addq $32, %r8 +# BDWELL-NEXT: [7,3] . . DeE-----R .. cmpl %edi, %edx +# BDWELL-NEXT: [8,0] . . DeE-----R .. addl $1, %edx +# BDWELL-NEXT: [8,1] . . .DeeeeeeeER. vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [8,2] . . .DeE------R. addq $32, %r8 +# BDWELL-NEXT: [8,3] . . .DeE------R. cmpl %edi, %edx +# BDWELL-NEXT: [9,0] . . . DeE-----R. addl $1, %edx +# BDWELL-NEXT: [9,1] . . . DeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [9,2] . . . DeE------R addq $32, %r8 +# BDWELL-NEXT: [9,3] . . . DeE-----R cmpl %edi, %edx # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions @@ -232,22 +232,22 @@ # ALL: [0] [1] [2] [3] -# BDWELL-NEXT: 0. 10 1.0 0.4 6.9 addl $1, %edx -# BDWELL-NEXT: 1. 10 4.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: 2. 10 1.0 0.4 8.7 addq $32, %r8 -# BDWELL-NEXT: 3. 10 1.0 0.0 8.3 cmpl %edi, %edx - -# HASWELL-NEXT: 0. 10 1.0 0.4 11.4 addl $1, %edx -# HASWELL-NEXT: 1. 10 8.5 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: 2. 10 1.0 0.4 14.2 addq $32, %r8 -# HASWELL-NEXT: 3. 10 1.0 0.0 13.8 cmpl %edi, %edx +# BDWELL-NEXT: 0. 10 1.0 0.4 4.5 addl $1, %edx +# BDWELL-NEXT: 1. 10 1.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: 2. 10 1.0 0.4 5.7 addq $32, %r8 +# BDWELL-NEXT: 3. 10 1.0 0.0 5.3 cmpl %edi, %edx + +# HASWELL-NEXT: 0. 10 1.0 0.4 5.4 addl $1, %edx +# HASWELL-NEXT: 1. 10 1.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: 2. 10 1.0 0.4 6.7 addq $32, %r8 +# HASWELL-NEXT: 3. 10 1.0 0.0 6.3 cmpl %edi, %edx # SKYLAKE-NEXT: 0. 10 1.9 0.1 30.6 addl $1, %edx # SKYLAKE-NEXT: 1. 10 32.5 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0 # SKYLAKE-NEXT: 2. 10 1.0 0.1 38.5 addq $32, %r8 # SKYLAKE-NEXT: 3. 10 2.0 0.0 37.5 cmpl %edi, %edx -# ZNVER1-NEXT: 0. 10 1.0 0.1 16.2 addl $1, %edx -# ZNVER1-NEXT: 1. 10 14.5 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0 -# ZNVER1-NEXT: 2. 10 1.0 0.1 20.5 addq $32, %r8 -# ZNVER1-NEXT: 3. 10 2.0 0.0 19.5 cmpl %edi, %edx +# ZNVER1-NEXT: 0. 10 1.0 0.1 5.4 addl $1, %edx +# ZNVER1-NEXT: 1. 10 1.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0 +# ZNVER1-NEXT: 2. 10 1.0 0.1 7.0 addq $32, %r8 +# ZNVER1-NEXT: 3. 10 2.0 0.0 6.0 cmpl %edi, %edx