Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -1341,52 +1341,52 @@ def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))], - IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteBitScan]>; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))], - IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteBitScanLd]>; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], - IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteBitScan]>; def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))], - IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteBitScanLd]>; def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))], - IIC_BIT_SCAN_REG>, PS, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, PS, Sched<[WriteBitScan]>; def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))], - IIC_BIT_SCAN_MEM>, PS, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, PS, Sched<[WriteBitScanLd]>; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], - IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, PS, OpSize16, Sched<[WriteBitScan]>; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))], - IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, PS, OpSize16, Sched<[WriteBitScanLd]>; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], - IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, PS, OpSize32, Sched<[WriteBitScan]>; def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))], - IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, PS, OpSize32, Sched<[WriteBitScanLd]>; def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], - IIC_BIT_SCAN_REG>, PS, Sched<[WriteShift]>; + IIC_BIT_SCAN_REG>, PS, Sched<[WriteBitScan]>; def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))], - IIC_BIT_SCAN_MEM>, PS, Sched<[WriteShiftLd]>; + IIC_BIT_SCAN_MEM>, PS, Sched<[WriteBitScanLd]>; } // Defs = [EFLAGS] let SchedRW = [WriteMicrocoded] in { @@ -2269,32 +2269,32 @@ def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "lzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)], - IIC_LZCNT_RR>, XS, OpSize16, Sched<[WriteIMul]>; + IIC_LZCNT_RR>, XS, OpSize16, Sched<[WriteLZCNT]>; def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "lzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctlz (loadi16 addr:$src))), (implicit EFLAGS)], IIC_LZCNT_RM>, XS, OpSize16, - Sched<[WriteIMulLd]>; + Sched<[WriteLZCNTLd]>; def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "lzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)], - IIC_LZCNT_RR>, XS, OpSize32, Sched<[WriteIMul]>; + IIC_LZCNT_RR>, XS, OpSize32, Sched<[WriteLZCNT]>; def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "lzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctlz (loadi32 addr:$src))), (implicit EFLAGS)], IIC_LZCNT_RM>, XS, OpSize32, - Sched<[WriteIMulLd]>; + Sched<[WriteLZCNTLd]>; def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "lzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)], - IIC_LZCNT_RR>, XS, Sched<[WriteIMul]>; + IIC_LZCNT_RR>, XS, Sched<[WriteLZCNT]>; def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctlz (loadi64 addr:$src))), (implicit EFLAGS)], IIC_LZCNT_RM>, XS, - Sched<[WriteIMulLd]>; + Sched<[WriteLZCNTLd]>; } //===----------------------------------------------------------------------===// @@ -2304,32 +2304,32 @@ def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "tzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)], - IIC_TZCNT_RR>, XS, OpSize16, Sched<[WriteIMul]>; + IIC_TZCNT_RR>, XS, OpSize16, Sched<[WriteTZCNT]>; def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "tzcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (cttz (loadi16 addr:$src))), (implicit EFLAGS)], IIC_TZCNT_RM>, XS, OpSize16, - Sched<[WriteIMulLd]>; + Sched<[WriteTZCNTLd]>; def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "tzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)], - IIC_TZCNT_RR>, XS, OpSize32, Sched<[WriteIMul]>; + IIC_TZCNT_RR>, XS, OpSize32, Sched<[WriteTZCNT]>; def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "tzcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (cttz (loadi32 addr:$src))), (implicit EFLAGS)], IIC_TZCNT_RM>, XS, OpSize32, - Sched<[WriteIMulLd]>; + Sched<[WriteTZCNTLd]>; def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "tzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)], - IIC_TZCNT_RR>, XS, Sched<[WriteIMul]>; + IIC_TZCNT_RR>, XS, Sched<[WriteTZCNT]>; def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "tzcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (cttz (loadi64 addr:$src))), (implicit EFLAGS)], IIC_TZCNT_RM>, XS, - Sched<[WriteIMulLd]>; + Sched<[WriteTZCNTLd]>; } multiclass bmi_bls, Sched<[WriteFAdd]>, + IIC_SSE_POPCNT_RR>, Sched<[WritePOPCNT]>, OpSize16, XS; def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "popcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctpop (loadi16 addr:$src))), (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, - Sched<[WriteFAddLd]>, OpSize16, XS; + Sched<[WritePOPCNTLd]>, OpSize16, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)], - IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, + IIC_SSE_POPCNT_RR>, Sched<[WritePOPCNT]>, OpSize32, XS; def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctpop (loadi32 addr:$src))), (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, - Sched<[WriteFAddLd]>, OpSize32, XS; + Sched<[WritePOPCNTLd]>, OpSize32, XS; def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)], - IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, XS; + IIC_SSE_POPCNT_RR>, Sched<[WritePOPCNT]>, XS; def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctpop (loadi64 addr:$src))), (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, - Sched<[WriteFAddLd]>, XS; + Sched<[WritePOPCNTLd]>, XS; } // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -110,6 +110,12 @@ def : WriteRes; // LEA instructions can't fold loads. +// Bit counts. +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; +defm : BWWriteResPair; + // Integer shifts and rotates. defm : BWWriteResPair; @@ -851,13 +857,9 @@ def: InstRW<[BWWriteResGroup27], (instregex "ADD_FPrST0", "ADD_FST0r", "ADD_FrST0", - "BSF(16|32|64)rr", - "BSR(16|32|64)rr", - "LZCNT(16|32|64)rr", "MMX_CVTPI2PSirr", "PDEP(32|64)rr", "PEXT(32|64)rr", - "POPCNT(16|32|64)rr", "SHLD(16|32|64)rri8", "SHRD(16|32|64)rri8", "SUBR_FPrST0", @@ -866,7 +868,6 @@ "SUB_FPrST0", "SUB_FST0r", "SUB_FrST0", - "TZCNT(16|32|64)rr", "(V?)ADDPD(Y?)rr", "(V?)ADDPS(Y?)rr", "(V?)ADDSDrr", @@ -1889,16 +1890,11 @@ } def: InstRW<[BWWriteResGroup91], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi8, IMUL64rmi32)>; def: InstRW<[BWWriteResGroup91], (instrs IMUL8m, MUL8m)>; -def: InstRW<[BWWriteResGroup91], (instregex "BSF(16|32|64)rm", - "BSR(16|32|64)rm", - "LZCNT(16|32|64)rm", - "MMX_CVTPI2PSirm", +def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm", "MMX_CVTPS2PIirm", "MMX_CVTTPS2PIirm", "PDEP(32|64)rm", "PEXT(32|64)rm", - "POPCNT(16|32|64)rm", - "TZCNT(16|32|64)rm", "(V?)ADDPDrm", "(V?)ADDPSrm", "(V?)ADDSDrm", Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -121,6 +121,12 @@ // the port to read all inputs. We don't model that. def : WriteRes; +// Bit counts. +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; + // This is quite rough, latency depends on the dividend. def : WriteRes { let Latency = 25; @@ -1050,20 +1056,15 @@ def: InstRW<[HWWriteResGroup12], (instrs MUL8m, MUL16m, IMUL8m, IMUL16m, IMUL16rm, IMUL16rmi, IMUL16rmi8, IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; -def: InstRW<[HWWriteResGroup12], (instregex "BSF(16|32|64)rm", - "BSR(16|32|64)rm", - "FCOM32m", +def: InstRW<[HWWriteResGroup12], (instregex "FCOM32m", "FCOM64m", "FCOMP32m", "FCOMP64m", - "LZCNT(16|32|64)rm", "MMX_CVTPI2PSirm", "MMX_CVTPS2PIirm", "MMX_CVTTPS2PIirm", "PDEP(32|64)rm", "PEXT(32|64)rm", - "POPCNT(16|32|64)rm", - "TZCNT(16|32|64)rm", "(V?)ADDSDrm", "(V?)ADDSSrm", "(V?)CMPSDrm", @@ -1787,13 +1788,9 @@ def: InstRW<[HWWriteResGroup50], (instregex "ADD_FPrST0", "ADD_FST0r", "ADD_FrST0", - "BSF(16|32|64)rr", - "BSR(16|32|64)rr", - "LZCNT(16|32|64)rr", "MMX_CVTPI2PSirr", "PDEP(32|64)rr", "PEXT(32|64)rr", - "POPCNT(16|32|64)rr", "SHLD(16|32|64)rri8", "SHRD(16|32|64)rri8", "SUBR_FPrST0", @@ -1802,7 +1799,6 @@ "SUB_FPrST0", "SUB_FST0r", "SUB_FrST0", - "TZCNT(16|32|64)rr", "(V?)ADDPD(Y?)rr", "(V?)ADDPS(Y?)rr", "(V?)ADDSDrr", Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -112,6 +112,12 @@ // the port to read all inputs. We don't model that. def : WriteRes; +// Bit counts. +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; +defm : SBWriteResPair; + // Scalar and vector floating point. def : WriteRes; def : WriteRes { let Latency = 6; } @@ -672,8 +678,6 @@ def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0", "ADD_FST0r", "ADD_FrST0", - "BSF(16|32|64)rr", - "BSR(16|32|64)rr", "CRC32r(16|32|64)r8", "CRC32r(16|32|64)r64", "MMX_CVTPI2PSirr", @@ -1412,9 +1416,7 @@ let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup72], (instregex "BSF(16|32|64)rm", - "BSR(16|32|64)rm", - "CRC32r(16|32|64)m64", +def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m64", "CRC32r(16|32|64)m8", "FCOM32m", "FCOM64m", Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -116,6 +116,12 @@ def : WriteRes; // LEA instructions can't fold loads. +// Bit counts. +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; +defm : SKLWriteResPair; + // Integer shifts and rotates. defm : SKLWriteResPair; @@ -868,15 +874,10 @@ } def: InstRW<[SKLWriteResGroup29], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>; def: InstRW<[SKLWriteResGroup29], (instrs IMUL8r, MUL8r)>; -def: InstRW<[SKLWriteResGroup29], (instregex "BSF(16|32|64)rr", - "BSR(16|32|64)rr", - "LZCNT(16|32|64)rr", - "PDEP(32|64)rr", +def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr", - "POPCNT(16|32|64)rr", "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8", - "TZCNT(16|32|64)rr")>; + "SHRD(16|32|64)rri8")>; def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> { let Latency = 3; @@ -1880,13 +1881,8 @@ } def: InstRW<[SKLWriteResGroup107], (instrs IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; def: InstRW<[SKLWriteResGroup107], (instrs IMUL8m, MUL8m)>; -def: InstRW<[SKLWriteResGroup107], (instregex "BSF(16|32|64)rm", - "BSR(16|32|64)rm", - "LZCNT(16|32|64)rm", - "PDEP(32|64)rm", - "PEXT(32|64)rm", - "POPCNT(16|32|64)rm", - "TZCNT(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm", + "PEXT(32|64)rm")>; def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { let Latency = 8; Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -119,6 +119,12 @@ // Integer shifts and rotates. defm : SKXWriteResPair; +// Bit counts. +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; +defm : SKXWriteResPair; + // Loads, stores, and moves, not folded with other operations. def : WriteRes { let Latency = 5; } def : WriteRes; @@ -1709,15 +1715,10 @@ } def: InstRW<[SKXWriteResGroup31], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>; def: InstRW<[SKXWriteResGroup31], (instrs IMUL8r, MUL8r)>; -def: InstRW<[SKXWriteResGroup31], (instregex "BSF(16|32|64)rr", - "BSR(16|32|64)rr", - "LZCNT(16|32|64)rr", - "PDEP(32|64)rr", +def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr", - "POPCNT(16|32|64)rr", "SHLD(16|32|64)rri8", - "SHRD(16|32|64)rri8", - "TZCNT(16|32|64)rr")>; + "SHRD(16|32|64)rri8")>; def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> { let Latency = 3; @@ -3907,13 +3908,8 @@ } def: InstRW<[SKXWriteResGroup118], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; def: InstRW<[SKXWriteResGroup118], (instrs IMUL8m, MUL8m)>; -def: InstRW<[SKXWriteResGroup118], (instregex "BSF(16|32|64)rm", - "BSR(16|32|64)rm", - "LZCNT(16|32|64)rm", - "PDEP(32|64)rm", - "PEXT(32|64)rm", - "POPCNT(16|32|64)rm", - "TZCNT(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm", + "PEXT(32|64)rm")>; def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { let Latency = 8; Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -46,6 +46,11 @@ defm WriteIDiv : X86SchedWritePair; // Integer division. def WriteLEA : SchedWrite; // LEA instructions can't fold loads. +defm WriteBitScan : X86SchedWritePair; // Bit scan forward/reverse. +defm WritePOPCNT : X86SchedWritePair; // Bit population count. +defm WriteLZCNT : X86SchedWritePair; // Leading zero count. +defm WriteTZCNT : X86SchedWritePair; // Trailing zero count. + // Integer shifts and rotates. defm WriteShift : X86SchedWritePair; Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -134,27 +134,11 @@ // FIXME: SAGU 3-operand LEA def : WriteRes; -// FIXME: Why do bitcounts use WriteIMul? -def JWriteLZCNT : SchedWriteRes<[JALU01]> { -} -def JWriteLZCNTLd : SchedWriteRes<[JLAGU, JALU01]> { - let Latency = 4; -} -def : InstRW<[JWriteLZCNT], (instrs LZCNT16rr, LZCNT32rr, LZCNT64rr, - POPCNT16rr, POPCNT32rr, POPCNT64rr)>; -def : InstRW<[JWriteLZCNTLd], (instrs LZCNT16rm, LZCNT32rm, LZCNT64rm, - POPCNT16rm, POPCNT32rm, POPCNT64rm)>; - -def JWriteTZCNT : SchedWriteRes<[JALU01]> { - let Latency = 2; - let ResourceCycles = [2]; -} -def JWriteTZCNTLd : SchedWriteRes<[JLAGU, JALU01]> { - let Latency = 5; - let ResourceCycles = [1, 2]; -} -def : InstRW<[JWriteTZCNT], (instrs TZCNT16rr, TZCNT32rr, TZCNT64rr)>; -def : InstRW<[JWriteTZCNTLd], (instrs TZCNT16rm, TZCNT32rm, TZCNT64rm)>; +// Bit counts. +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; +defm : JWriteResIntPair; def JWriteIMul64 : SchedWriteRes<[JALU1, JMul]> { let Latency = 6; Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -97,6 +97,12 @@ // the port to read all inputs. We don't model that. def : WriteRes; +// Bit counts. +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; +defm : SLMWriteResPair; + // This is quite rough, latency depends on the dividend. def : WriteRes { let Latency = 25; Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -152,6 +152,12 @@ defm : ZnWriteResPair; defm : ZnWriteResPair; +// Bit counts. +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; +defm : ZnWriteResPair; + // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; @@ -522,19 +528,6 @@ let Latency = 6; } -def ZnWriteALULat3 : SchedWriteRes<[ZnALU]> { - let Latency = 3; -} -def ZnWriteALULat3Ld : SchedWriteRes<[ZnAGU, ZnALU]> { - let Latency = 7; -} - -// BSF BSR. -// r,r. -def : InstRW<[ZnWriteALULat3], (instregex "BS(R|F)(16|32|64)rr")>; -// r,m. -def : InstRW<[ZnWriteALULat3Ld, ReadAfterLd], (instregex "BS(R|F)(16|32|64)rm")>; - // BT. // r,r/i. def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>; @@ -630,12 +623,6 @@ def : InstRW<[WriteShift], (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>; -// LZCNT TZCNT. -// r,r. -def : InstRW<[ZnWriteALULat2], (instregex "(LZCNT|TZCNT)(16|32|64)rr")>; -// r,m. -def : InstRW<[ZnWriteALULat2Ld, ReadAfterLd], (instregex "(LZCNT|TZCNT)(16|32|64)rm")>; - //-- Misc instructions --// // CMPXCHG. def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> { Index: test/CodeGen/X86/popcnt-schedule.ll =================================================================== --- test/CodeGen/X86/popcnt-schedule.ll +++ test/CodeGen/X86/popcnt-schedule.ll @@ -70,8 +70,8 @@ ; ; ZNVER1-LABEL: test_ctpop_i16: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: popcntw (%rsi), %cx # sched: [10:1.00] -; ZNVER1-NEXT: popcntw %di, %ax # sched: [3:1.00] +; ZNVER1-NEXT: popcntw (%rsi), %cx # sched: [5:0.50] +; ZNVER1-NEXT: popcntw %di, %ax # sched: [1:0.25] ; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] ; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax ; ZNVER1-NEXT: retq # sched: [1:0.50] @@ -135,8 +135,8 @@ ; ; ZNVER1-LABEL: test_ctpop_i32: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: popcntl (%rsi), %ecx # sched: [10:1.00] -; ZNVER1-NEXT: popcntl %edi, %eax # sched: [3:1.00] +; ZNVER1-NEXT: popcntl (%rsi), %ecx # sched: [5:0.50] +; ZNVER1-NEXT: popcntl %edi, %eax # sched: [1:0.25] ; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = load i32, i32 *%a1 @@ -199,8 +199,8 @@ ; ; ZNVER1-LABEL: test_ctpop_i64: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: popcntq (%rsi), %rcx # sched: [10:1.00] -; ZNVER1-NEXT: popcntq %rdi, %rax # sched: [3:1.00] +; ZNVER1-NEXT: popcntq (%rsi), %rcx # sched: [5:0.50] +; ZNVER1-NEXT: popcntq %rdi, %rax # sched: [1:0.25] ; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = load i64, i64 *%a1 Index: test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- test/CodeGen/X86/schedule-x86_64.ll +++ test/CodeGen/X86/schedule-x86_64.ll @@ -1964,8 +1964,8 @@ ; SLM-LABEL: test_bsf16: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: bsfw %di, %ax # sched: [1:1.00] -; SLM-NEXT: bsfw (%rsi), %cx # sched: [4:1.00] +; SLM-NEXT: bsfw %di, %ax # sched: [10:10.00] +; SLM-NEXT: bsfw (%rsi), %cx # sched: [13:10.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: # kill: def $ax killed $ax killed $eax @@ -2024,8 +2024,8 @@ ; BTVER2-LABEL: test_bsf16: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsfw %di, %ax # sched: [1:0.50] -; BTVER2-NEXT: bsfw (%rsi), %cx # sched: [4:1.00] +; BTVER2-NEXT: bsfw %di, %ax # sched: [5:2.00] +; BTVER2-NEXT: bsfw (%rsi), %cx # sched: [8:2.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax @@ -2068,8 +2068,8 @@ ; SLM-LABEL: test_bsf32: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: bsfl %edi, %eax # sched: [1:1.00] -; SLM-NEXT: bsfl (%rsi), %ecx # sched: [4:1.00] +; SLM-NEXT: bsfl %edi, %eax # sched: [10:10.00] +; SLM-NEXT: bsfl (%rsi), %ecx # sched: [13:10.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] @@ -2122,8 +2122,8 @@ ; BTVER2-LABEL: test_bsf32: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsfl %edi, %eax # sched: [1:0.50] -; BTVER2-NEXT: bsfl (%rsi), %ecx # sched: [4:1.00] +; BTVER2-NEXT: bsfl %edi, %eax # sched: [5:2.00] +; BTVER2-NEXT: bsfl (%rsi), %ecx # sched: [8:2.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -2164,8 +2164,8 @@ ; SLM-LABEL: test_bsf64: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: bsfq %rdi, %rax # sched: [1:1.00] -; SLM-NEXT: bsfq (%rsi), %rcx # sched: [4:1.00] +; SLM-NEXT: bsfq %rdi, %rax # sched: [10:10.00] +; SLM-NEXT: bsfq (%rsi), %rcx # sched: [13:10.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] @@ -2218,8 +2218,8 @@ ; BTVER2-LABEL: test_bsf64: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsfq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: bsfq (%rsi), %rcx # sched: [4:1.00] +; BTVER2-NEXT: bsfq %rdi, %rax # sched: [5:2.00] +; BTVER2-NEXT: bsfq (%rsi), %rcx # sched: [8:2.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -2263,8 +2263,8 @@ ; SLM-LABEL: test_bsr16: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: bsrw %di, %ax # sched: [1:1.00] -; SLM-NEXT: bsrw (%rsi), %cx # sched: [4:1.00] +; SLM-NEXT: bsrw %di, %ax # sched: [10:10.00] +; SLM-NEXT: bsrw (%rsi), %cx # sched: [13:10.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: # kill: def $ax killed $ax killed $eax @@ -2323,8 +2323,8 @@ ; BTVER2-LABEL: test_bsr16: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsrw %di, %ax # sched: [1:0.50] -; BTVER2-NEXT: bsrw (%rsi), %cx # sched: [4:1.00] +; BTVER2-NEXT: bsrw %di, %ax # sched: [5:2.00] +; BTVER2-NEXT: bsrw (%rsi), %cx # sched: [8:2.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax @@ -2367,8 +2367,8 @@ ; SLM-LABEL: test_bsr32: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: bsrl %edi, %eax # sched: [1:1.00] -; SLM-NEXT: bsrl (%rsi), %ecx # sched: [4:1.00] +; SLM-NEXT: bsrl %edi, %eax # sched: [10:10.00] +; SLM-NEXT: bsrl (%rsi), %ecx # sched: [13:10.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] @@ -2421,8 +2421,8 @@ ; BTVER2-LABEL: test_bsr32: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsrl %edi, %eax # sched: [1:0.50] -; BTVER2-NEXT: bsrl (%rsi), %ecx # sched: [4:1.00] +; BTVER2-NEXT: bsrl %edi, %eax # sched: [5:2.00] +; BTVER2-NEXT: bsrl (%rsi), %ecx # sched: [8:2.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] @@ -2463,8 +2463,8 @@ ; SLM-LABEL: test_bsr64: ; SLM: # %bb.0: ; SLM-NEXT: #APP -; SLM-NEXT: bsrq %rdi, %rax # sched: [1:1.00] -; SLM-NEXT: bsrq (%rsi), %rcx # sched: [4:1.00] +; SLM-NEXT: bsrq %rdi, %rax # sched: [10:10.00] +; SLM-NEXT: bsrq (%rsi), %rcx # sched: [13:10.00] ; SLM-NEXT: #NO_APP ; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] @@ -2517,8 +2517,8 @@ ; BTVER2-LABEL: test_bsr64: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP -; BTVER2-NEXT: bsrq %rdi, %rax # sched: [1:0.50] -; BTVER2-NEXT: bsrq (%rsi), %rcx # sched: [4:1.00] +; BTVER2-NEXT: bsrq %rdi, %rax # sched: [5:2.00] +; BTVER2-NEXT: bsrq (%rsi), %rcx # sched: [8:2.00] ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00]