Index: lib/Target/X86/X86InstrShiftRotate.td =================================================================== --- lib/Target/X86/X86InstrShiftRotate.td +++ lib/Target/X86/X86InstrShiftRotate.td @@ -650,9 +650,9 @@ // Double shift instructions (generalizations of rotate) //===----------------------------------------------------------------------===// -let Constraints = "$src1 = $dst", SchedRW = [WriteShiftDouble] in { +let Constraints = "$src1 = $dst" in { -let Uses = [CL] in { +let Uses = [CL], SchedRW = [WriteSHDrrc] in { def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", @@ -685,7 +685,7 @@ TB; } -let isCommutable = 1 in { // These instructions commute to each other. +let isCommutable = 1, SchedRW = [WriteSHDrri] in { // These instructions commute to each other. def SHLD16rri8 : Ii8<0xA4, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, u8imm:$src3), @@ -731,8 +731,8 @@ } } // Constraints = "$src = $dst", SchedRW -let SchedRW = [WriteShiftDoubleLd, WriteRMW] in { -let Uses = [CL] in { +//let Uses = [CL], SchedRW = [WriteSHDmrc, WriteRMW] in { +let Uses = [CL], SchedRW = [WriteSHDmrc] in { def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}", [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL), @@ -761,6 +761,8 @@ addr:$dst)]>, TB; } +//let SchedRW = [WriteSHDmri, WriteRMW] in { +let SchedRW = [WriteSHDmri] in { def SHLD16mri8 : Ii8<0xA4, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3), "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}", Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -105,6 +105,28 @@ // 2/3/7 cycle to recompute the address. def : WriteRes; +// SHLD/SHRD +def : WriteRes { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def : WriteRes { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def : WriteRes { + let Latency = 11; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,1,2]; +} + // Arithmetic. defm : BWWriteResPair; // Simple integer ALU op. defm : BWWriteResPair; // Integer ALU + flags op. @@ -758,8 +780,6 @@ def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr", "PDEP(32|64)rr", "PEXT(32|64)rr", - "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8", "(V?)CVTDQ2PS(Y?)rr")>; def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> { @@ -1067,14 +1087,6 @@ def: InstRW<[BWWriteResGroup66], (instrs POP16r, POP32r, POP64r)>; def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>; -def BWWriteResGroup67 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { - let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[BWWriteResGroup67], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def BWWriteResGroup68 : SchedWriteRes<[BWPort1,BWPort6,BWPort06,BWPort0156]> { let Latency = 6; let NumMicroOps = 4; @@ -1319,13 +1331,13 @@ def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTB(Y?)rm", "VPBROADCASTW(Y?)rm")>; -def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[BWWriteResGroup111], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; +//def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> { +// let Latency = 9; +// let NumMicroOps = 4; +// let ResourceCycles = [1,1,1,1]; +//} +//def: InstRW<[BWWriteResGroup111], (instregex "SHLD(16|32|64)mri8", +// "SHRD(16|32|64)mri8")>; def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { let Latency = 9; @@ -1392,13 +1404,13 @@ } def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>; -def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> { - let Latency = 11; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,1,2]; -} -def: InstRW<[BWWriteResGroup130], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; +//def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> { +// let Latency = 11; +// let NumMicroOps = 6; +// let ResourceCycles = [1,1,1,1,2]; +//} +//def: InstRW<[BWWriteResGroup130], (instregex "SHLD(16|32|64)mrCL", +// "SHRD(16|32|64)mrCL")>; def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { let Latency = 11; Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -118,6 +118,28 @@ defm : X86WriteRes; def : WriteRes; +// SHLD/SHRD +def : WriteRes { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def : WriteRes { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def : WriteRes { + let Latency = 12; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,1,2]; +} + defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -1250,8 +1272,6 @@ def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirr", "PDEP(32|64)rr", "PEXT(32|64)rr", - "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8", "(V?)CVTDQ2PS(Y?)rr")>; def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> { @@ -1523,13 +1543,13 @@ } def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>; -def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> { - let Latency = 10; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[HWWriteResGroup86], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; +//def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> { +// let Latency = 10; +// let NumMicroOps = 4; +// let ResourceCycles = [1,1,1,1]; +//} +//def: InstRW<[HWWriteResGroup86], (instregex "SHLD(16|32|64)mri8", +// "SHRD(16|32|64)mri8")>; def HWWriteResGroup87 : SchedWriteRes<[HWPort1,HWPort6,HWPort23,HWPort0156]> { let Latency = 9; @@ -1648,14 +1668,6 @@ } def: InstRW<[HWWriteResGroup104], (instregex "VCVTDQ2PDYrm")>; -def HWWriteResGroup105 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { - let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[HWWriteResGroup105], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def HWWriteResGroup107 : SchedWriteRes<[HWPort1,HWPort6,HWPort06,HWPort0156]> { let Latency = 6; let NumMicroOps = 4; @@ -1670,13 +1682,13 @@ } def: InstRW<[HWWriteResGroup108], (instrs STD)>; -def HWWriteResGroup109 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort0156]> { - let Latency = 12; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,1,2]; -} -def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; +//def HWWriteResGroup109 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort06,HWPort0156]> { +// let Latency = 12; +// let NumMicroOps = 6; +// let ResourceCycles = [1,1,1,1,2]; +//} +//def: InstRW<[HWWriteResGroup109], (instregex "SHLD(16|32|64)mrCL", +// "SHRD(16|32|64)mrCL")>; def HWWriteResGroup114 : SchedWriteRes<[HWPort6,HWPort06,HWPort15,HWPort0156]> { let Latency = 7; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -106,6 +106,28 @@ def : WriteRes; def : WriteRes; +// SHLD/SHRD +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def : WriteRes { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; +} +def : WriteRes { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def : WriteRes { + let Latency = 10; + let NumMicroOps = 7; + let ResourceCycles = [1,2,3,1]; +} + defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -641,14 +663,6 @@ def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ)>; def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>; -def SBWriteResGroup19 : SchedWriteRes<[SBPort05,SBPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup19], (instregex "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8")>; - def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> { let Latency = 3; let NumMicroOps = 1; @@ -739,14 +753,6 @@ } def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>; -def SBWriteResGroup29_3 : SchedWriteRes<[SBPort05,SBPort015]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [3,1]; -} -def: InstRW<[SBWriteResGroup29_3], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> { let Latency = 5; let NumMicroOps = 1; @@ -1038,13 +1044,13 @@ } def: InstRW<[SBWriteResGroup87], (instrs FARCALL64)>; -def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; +//def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> { +// let Latency = 8; +// let NumMicroOps = 5; +// let ResourceCycles = [1,2,1,1]; +//} +//def: InstRW<[SBWriteResGroup88], (instregex "SHLD(16|32|64)mri8", +// "SHRD(16|32|64)mri8")>; def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { let Latency = 9; @@ -1141,13 +1147,13 @@ def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "ILD_F(16|32|64)m")>; -def SBWriteResGroup103_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> { - let Latency = 10; - let NumMicroOps = 7; - let ResourceCycles = [1,2,3,1]; -} -def: InstRW<[SBWriteResGroup103_2], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; +//def SBWriteResGroup103_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> { +// let Latency = 10; +// let NumMicroOps = 7; +// let ResourceCycles = [1,2,3,1]; +//} +//def: InstRW<[SBWriteResGroup103_2], (instregex "SHLD(16|32|64)mrCL", +// "SHRD(16|32|64)mrCL")>; def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> { let Latency = 11; Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -104,6 +104,28 @@ // 2/3/7 cycle to recompute the address. def : WriteRes; +// SHLD/SHRD +def : WriteRes { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def : WriteRes { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def : WriteRes { + let Latency = 11; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} + // Arithmetic. defm : SKLWriteResPair; // Simple integer ALU op. defm : SKLWriteResPair; // Integer ALU + flags op. @@ -754,9 +776,7 @@ let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr", - "PEXT(32|64)rr", - "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8")>; + "PEXT(32|64)rr")>; def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> { let Latency = 4; @@ -1107,14 +1127,6 @@ } def: InstRW<[SKLWriteResGroup78], (instregex "(V?)CVTSI642SSrr")>; -def SKLWriteResGroup79 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { - let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SKLWriteResGroup79], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def SKLWriteResGroup80 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 6; let NumMicroOps = 4; @@ -1403,13 +1415,13 @@ def: InstRW<[SKLWriteResGroup128], (instregex "(V?)PHADDSWrm", "(V?)PHSUBSWrm")>; -def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup130], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; +//def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> { +// let Latency = 9; +// let NumMicroOps = 4; +// let ResourceCycles = [1,1,1,1]; +//} +//def: InstRW<[SKLWriteResGroup130], (instregex "SHLD(16|32|64)mri8", +// "SHRD(16|32|64)mri8")>; def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { let Latency = 9; @@ -1530,13 +1542,13 @@ "CVT(T?)PD2DQrm", "MMX_CVT(T?)PD2PIirm")>; -def SKLWriteResGroup153 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 11; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,2,1]; -} -def: InstRW<[SKLWriteResGroup153], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; +//def SKLWriteResGroup153 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { +// let Latency = 11; +// let NumMicroOps = 6; +// let ResourceCycles = [1,1,1,2,1]; +//} +//def: InstRW<[SKLWriteResGroup153], (instregex "SHLD(16|32|64)mrCL", +// "SHRD(16|32|64)mrCL")>; def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 11; Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -104,6 +104,28 @@ // 2/3/7 cycle to recompute the address. def : WriteRes; +// SHLD/SHRD +def : WriteRes { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def : WriteRes { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def : WriteRes { + let Latency = 11; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} + // Arithmetic. defm : SKXWriteResPair; // Simple integer ALU op. defm : SKXWriteResPair; // Integer ALU + flags op. @@ -794,9 +816,7 @@ let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr", - "PEXT(32|64)rr", - "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8")>; + "PEXT(32|64)rr")>; def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> { let Latency = 4; @@ -1281,14 +1301,6 @@ "VCVTSI642SSZrr", "VCVTUSI642SSZrr")>; -def SKXWriteResGroup83 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { - let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SKXWriteResGroup83], (instregex "SHLD(16|32|64)rrCL", - "SHRD(16|32|64)rrCL")>; - def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> { let Latency = 6; let NumMicroOps = 4; @@ -1841,13 +1853,13 @@ def: InstRW<[SKXWriteResGroup143], (instregex "(V?)PHADDSWrm", "(V?)PHSUBSWrm")>; -def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKXWriteResGroup145], (instregex "SHLD(16|32|64)mri8", - "SHRD(16|32|64)mri8")>; +//def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> { +// let Latency = 9; +// let NumMicroOps = 4; +// let ResourceCycles = [1,1,1,1]; +//} +//def: InstRW<[SKXWriteResGroup145], (instregex "SHLD(16|32|64)mri8", +// "SHRD(16|32|64)mri8")>; def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { let Latency = 9; @@ -2044,13 +2056,13 @@ } def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>; -def SKXWriteResGroup168 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { - let Latency = 11; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,2,1]; -} -def: InstRW<[SKXWriteResGroup168], (instregex "SHLD(16|32|64)mrCL", - "SHRD(16|32|64)mrCL")>; +//def SKXWriteResGroup168 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { +// let Latency = 11; +// let NumMicroOps = 6; +// let ResourceCycles = [1,1,1,2,1]; +//} +//def: InstRW<[SKXWriteResGroup168], (instregex "SHLD(16|32|64)mrCL", +// "SHRD(16|32|64)mrCL")>; def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { let Latency = 11; Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -118,6 +118,11 @@ def WriteIMulH : SchedWrite; // Integer multiplication, high part. def WriteLEA : SchedWrite; // LEA instructions can't fold loads. +def WriteSHDrri : SchedWrite; +def WriteSHDrrc : SchedWrite; +def WriteSHDmri : SchedWrite; +def WriteSHDmrc : SchedWrite; + // Integer division. defm WriteDiv8 : X86SchedWritePair; defm WriteDiv16 : X86SchedWritePair; Index: lib/Target/X86/X86ScheduleAtom.td =================================================================== --- lib/Target/X86/X86ScheduleAtom.td +++ lib/Target/X86/X86ScheduleAtom.td @@ -72,6 +72,24 @@ // A folded store needs a cycle on Port0 for the store data. def : WriteRes; +// SHLD/SHRD (32-bit versions) +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 4; + let ResourceCycles = [4]; +} +def : WriteRes { + let Latency = 4; + let ResourceCycles = [4]; +} + //////////////////////////////////////////////////////////////////////////////// // Arithmetic. //////////////////////////////////////////////////////////////////////////////// @@ -560,9 +578,7 @@ def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r, PUSH16rmm, PUSH32rmm, PUSH64rmm, LODSB, LODSL, LODSQ, LODSW, - SCASB, SCASL, SCASQ, SCASW, - SHLD32rrCL, SHRD32rrCL, - SHLD32rri8, SHRD32rri8)>; + SCASB, SCASL, SCASQ, SCASW)>; def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8", "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)", "XADD(8|16|32|64)rr", @@ -596,8 +612,8 @@ } def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO, JCXZ, JECXZ, JRCXZ, - SHLD32mrCL, SHRD32mrCL, - SHLD32mri8, SHRD32mri8, +// SHLD32mrCL, SHRD32mrCL, +// SHLD32mri8, SHRD32mri8, LD_F80m)>; def : InstRW<[AtomWrite01_4], (instregex "PH(ADD|SUB)Drm", "(MMX_)?PEXTRWrr(_REV)?")>; Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -208,31 +208,27 @@ defm : JWriteResIntPair; -def JWriteSHLDrri : SchedWriteRes<[JALU01]> { +// SHLD/SHRD +def : WriteRes { let Latency = 3; let ResourceCycles = [6]; let NumMicroOps = 6; } -def: InstRW<[JWriteSHLDrri], (instrs SHLD16rri8, SHLD32rri8, SHLD64rri8, - SHRD16rri8, SHRD32rri8, SHRD64rri8)>; - -def JWriteSHLDrrCL : SchedWriteRes<[JALU01]> { +def : WriteRes { let Latency = 4; let ResourceCycles = [8]; let NumMicroOps = 7; } -def: InstRW<[JWriteSHLDrrCL], (instrs SHLD16rrCL, SHLD32rrCL, SHLD64rrCL, - SHRD16rrCL, SHRD32rrCL, SHRD64rrCL)>; - -def JWriteSHLDm : SchedWriteRes<[JLAGU, JALU01]> { +def : WriteRes { + let Latency = 9; + let ResourceCycles = [1, 22]; + let NumMicroOps = 8; +} +def : WriteRes { let Latency = 9; let ResourceCycles = [1, 22]; let NumMicroOps = 8; } -def: InstRW<[JWriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8, - SHLD16mrCL, SHLD32mrCL, SHLD64mrCL, - SHRD16mri8, SHRD32mri8, SHRD64mri8, - SHRD16mrCL, SHRD32mrCL, SHRD64mrCL)>; //////////////////////////////////////////////////////////////////////////////// // Loads, stores, and moves, not folded with other operations. Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -93,6 +93,19 @@ // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +def : WriteRes; +def : WriteRes; +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1, 2]; + let NumMicroOps = 2; +} +def : WriteRes { + let Latency = 4; + let ResourceCycles = [1, 2]; + let NumMicroOps = 2; +} + defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -173,6 +173,11 @@ def : WriteRes; def : WriteRes { let Latency = 8; } +def : WriteRes; // not used +def : WriteRes; // not used +def : WriteRes; // not used +def : WriteRes; // not used + def : WriteRes; def : WriteRes; defm : ZnWriteResPair; Index: test/tools/llvm-mca/X86/SLM/resources-x86_64.s =================================================================== --- test/tools/llvm-mca/X86/SLM/resources-x86_64.s +++ test/tools/llvm-mca/X86/SLM/resources-x86_64.s @@ -1281,7 +1281,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: 400.00 - - - - 533.00 375.00 491.00 +# CHECK-NEXT: 400.00 - - - - 545.00 375.00 479.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -1797,28 +1797,28 @@ # CHECK-NEXT: - - - - - 0.50 0.50 1.00 sbbq (%rax), %rdi # CHECK-NEXT: - - - - - 1.00 - - shldw %cl, %si, %di # CHECK-NEXT: - - - - - 1.00 - - shrdw %cl, %si, %di -# CHECK-NEXT: - - - - - 1.00 - 2.00 shldw %cl, %si, (%rax) -# CHECK-NEXT: - - - - - 1.00 - 2.00 shrdw %cl, %si, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shldw %cl, %si, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shrdw %cl, %si, (%rax) # CHECK-NEXT: - - - - - 1.00 - - shldw $7, %si, %di # CHECK-NEXT: - - - - - 1.00 - - shrdw $7, %si, %di -# CHECK-NEXT: - - - - - 1.00 - 2.00 shldw $7, %si, (%rax) -# CHECK-NEXT: - - - - - 1.00 - 2.00 shrdw $7, %si, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shldw $7, %si, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shrdw $7, %si, (%rax) # CHECK-NEXT: - - - - - 1.00 - - shldl %cl, %esi, %edi # CHECK-NEXT: - - - - - 1.00 - - shrdl %cl, %esi, %edi -# CHECK-NEXT: - - - - - 1.00 - 2.00 shldl %cl, %esi, (%rax) -# CHECK-NEXT: - - - - - 1.00 - 2.00 shrdl %cl, %esi, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shldl %cl, %esi, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shrdl %cl, %esi, (%rax) # CHECK-NEXT: - - - - - 1.00 - - shldl $7, %esi, %edi # CHECK-NEXT: - - - - - 1.00 - - shrdl $7, %esi, %edi -# CHECK-NEXT: - - - - - 1.00 - 2.00 shldl $7, %esi, (%rax) -# CHECK-NEXT: - - - - - 1.00 - 2.00 shrdl $7, %esi, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shldl $7, %esi, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shrdl $7, %esi, (%rax) # CHECK-NEXT: - - - - - 1.00 - - shldq %cl, %rsi, %rdi # CHECK-NEXT: - - - - - 1.00 - - shrdq %cl, %rsi, %rdi -# CHECK-NEXT: - - - - - 1.00 - 2.00 shldq %cl, %rsi, (%rax) -# CHECK-NEXT: - - - - - 1.00 - 2.00 shrdq %cl, %rsi, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shldq %cl, %rsi, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shrdq %cl, %rsi, (%rax) # CHECK-NEXT: - - - - - 1.00 - - shldq $7, %rsi, %rdi # CHECK-NEXT: - - - - - 1.00 - - shrdq $7, %rsi, %rdi -# CHECK-NEXT: - - - - - 1.00 - 2.00 shldq $7, %rsi, (%rax) -# CHECK-NEXT: - - - - - 1.00 - 2.00 shrdq $7, %rsi, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shldq $7, %rsi, (%rax) +# CHECK-NEXT: - - - - - 2.00 - 1.00 shrdq $7, %rsi, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 - subb $7, %al # CHECK-NEXT: - - - - - 0.50 0.50 - subb $7, %dil # CHECK-NEXT: - - - - - 1.00 1.00 2.00 subb $7, (%rax)