Index: llvm/trunk/lib/Target/X86/X86InstrCMovSetCC.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrCMovSetCC.td +++ llvm/trunk/lib/Target/X86/X86InstrCMovSetCC.td @@ -16,7 +16,7 @@ // CMOV instructions. multiclass CMOV opc, string Mnemonic, PatLeaf CondNode> { let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - isCommutable = 1, SchedRW = [WriteALU] in { + isCommutable = 1, SchedRW = [WriteCMOV] in { def NAME#16rr : I, TB, Sched<[WriteALU]>; + IIC_SET_R>, TB, Sched<[WriteSETCC]>; def m : I, TB, Sched<[WriteALU, WriteStore]>; + IIC_SET_M>, TB, Sched<[WriteSETCCStore]>; } // Uses = [EFLAGS] } Index: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td +++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td @@ -113,6 +113,13 @@ def : WriteRes; // LEA instructions can't fold loads. +defm : BWWriteResPair; // Conditional move. +def : WriteRes; // Setcc. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 3; +} + // Bit counts. defm : BWWriteResPair; defm : BWWriteResPair; @@ -469,7 +476,6 @@ "BTR(16|32|64)rr", "BTS(16|32|64)ri8", "BTS(16|32|64)rr", - "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr", "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_1", "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_4", "JMP_1", @@ -481,7 +487,6 @@ "SBB(16|32|64)ri", "SBB(16|32|64)i", "SBB(8|16|32|64)rr", - "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r", "SHL(8|16|32|64)r1", "SHL(8|16|32|64)ri", "SHLX(32|64)rr", @@ -791,13 +796,6 @@ } def: InstRW<[BWWriteResGroup22], (instregex "FNSTCW16m")>; -def BWWriteResGroup23 : SchedWriteRes<[BWPort4,BWPort237,BWPort06]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup23], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; - def BWWriteResGroup24 : SchedWriteRes<[BWPort4,BWPort237,BWPort15]> { let Latency = 2; let NumMicroOps = 3; @@ -1398,7 +1396,6 @@ let ResourceCycles = [1,1]; } def: InstRW<[BWWriteResGroup63], (instregex "BT(16|32|64)mi8", - "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm", "RORX(32|64)mi", "SARX(32|64)rm", "SHLX(32|64)rm", Index: llvm/trunk/lib/Target/X86/X86SchedHaswell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedHaswell.td +++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td @@ -119,6 +119,13 @@ defm : HWWriteResPair; defm : HWWriteResPair; +defm : HWWriteResPair; // Conditional move. +def : WriteRes; // Setcc. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 3; +} + // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on // the port to read all inputs. We don't model that. @@ -830,7 +837,6 @@ "SAR(8|16|32|64)r1", "SAR(8|16|32|64)ri", "SARX(32|64)rr", - "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r", "SHL(8|16|32|64)r1", "SHL(8|16|32|64)ri", "SHLX(32|64)rr", @@ -1405,13 +1411,6 @@ } def: InstRW<[HWWriteResGroup21], (instregex "FNSTCW16m")>; -def HWWriteResGroup22 : SchedWriteRes<[HWPort4,HWPort237,HWPort06]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup22], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; - def HWWriteResGroup23 : SchedWriteRes<[HWPort4,HWPort237,HWPort15]> { let Latency = 2; let NumMicroOps = 3; @@ -1568,7 +1567,6 @@ def: InstRW<[HWWriteResGroup35], (instregex "ADC(8|16|32|64)ri", "ADC(8|16|32|64)rr", "ADC(8|16|32|64)i", - "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr", "SBB(8|16|32|64)ri", "SBB(8|16|32|64)rr", "SBB(8|16|32|64)i", @@ -1663,7 +1661,6 @@ let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup43], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm")>; def: InstRW<[HWWriteResGroup43, ReadAfterLd], (instrs ADC8rm, ADC16rm, ADC32rm, ADC64rm, SBB8rm, SBB16rm, SBB32rm, SBB64rm)>; Index: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td +++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td @@ -110,6 +110,13 @@ defm : SBWriteResPair; defm : SBWriteResPair; +defm : SBWriteResPair; // Conditional move. +def : WriteRes; // Setcc. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 3; +} + // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on // the port to read all inputs. We don't model that. @@ -382,7 +389,6 @@ "SAHF", "SAR(8|16|32|64)ri", "SAR(8|16|32|64)r1", - "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r", "SHL(8|16|32|64)ri", "SHL(8|16|32|64)r1", "SHR(8|16|32|64)ri", @@ -624,7 +630,6 @@ def: InstRW<[SBWriteResGroup19], (instregex "ADC(8|16|32|64)ri", "ADC(8|16|32|64)rr", "ADC(8|16|32|64)i", - "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr", "SBB(8|16|32|64)ri", "SBB(8|16|32|64)rr", "SBB(8|16|32|64)i", @@ -949,13 +954,6 @@ def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr", "VMASKMOVPS(Y?)mr")>; -def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup38], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; - def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> { let Latency = 5; let NumMicroOps = 3; @@ -1297,7 +1295,6 @@ let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SBWriteResGroup65], (instregex "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm")>; def: InstRW<[SBWriteResGroup65, ReadAfterLd], (instrs ADC8rm, ADC16rm, ADC32rm, ADC64rm, SBB8rm, SBB16rm, SBB32rm, SBB64rm)>; Index: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td +++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td @@ -113,6 +113,13 @@ def : WriteRes { let Latency = 3; } // Integer multiplication, high part. def : WriteRes; // LEA instructions can't fold loads. +defm : SKLWriteResPair; // Conditional move. +def : WriteRes; // Setcc. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 3; +} + // Bit counts. defm : SKLWriteResPair; defm : SKLWriteResPair; @@ -534,7 +541,6 @@ "BTS(16|32|64)ri8", "BTS(16|32|64)rr", "CLAC", - "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr", "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_1", "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_4", "JMP_1", @@ -546,7 +552,6 @@ "SBB(16|32|64)ri", "SBB(16|32|64)i", "SBB(8|16|32|64)rr", - "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r", "SHL(8|16|32|64)r1", "SHL(8|16|32|64)ri", "SHLX(32|64)rr", @@ -812,13 +817,6 @@ } def: InstRW<[SKLWriteResGroup25], (instregex "FNSTCW16m")>; -def SKLWriteResGroup26 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup26], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; - def SKLWriteResGroup27 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort15]> { let Latency = 2; let NumMicroOps = 3; @@ -1421,7 +1419,6 @@ let ResourceCycles = [1,1]; } def: InstRW<[SKLWriteResGroup74], (instregex "BT(16|32|64)mi8", - "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm", "RORX(32|64)mi", "SARX(32|64)rm", "SHLX(32|64)rm", Index: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td +++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td @@ -113,6 +113,13 @@ def : WriteRes { let Latency = 3; } // Integer multiplication, high part. def : WriteRes; // LEA instructions can't fold loads. +defm : SKXWriteResPair; // Conditional move. +def : WriteRes; // Setcc. +def : WriteRes { + let Latency = 2; + let NumMicroOps = 3; +} + // Integer shifts and rotates. defm : SKXWriteResPair; @@ -1010,7 +1017,6 @@ "BTS(16|32|64)ri8", "BTS(16|32|64)rr", "CLAC", - "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rr", "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_1", "J(A|AE|B|BE|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)_4", "JMP_1", @@ -1022,7 +1028,6 @@ "SBB(16|32|64)ri", "SBB(16|32|64)i", "SBB(8|16|32|64)rr", - "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)r", "SHL(8|16|32|64)r1", "SHL(8|16|32|64)ri", "SHLX(32|64)rr", @@ -1617,13 +1622,6 @@ } def: InstRW<[SKXWriteResGroup25], (instregex "FNSTCW16m")>; -def SKXWriteResGroup26 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup26], (instregex "SET(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)m")>; - def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { let Latency = 2; let NumMicroOps = 3; @@ -3050,7 +3048,6 @@ let ResourceCycles = [1,1]; } def: InstRW<[SKXWriteResGroup78], (instregex "BT(16|32|64)mi8", - "CMOV(AE|B|E|G|GE|L|LE|NE|NO|NP|NS|O|P|S)(16|32|64)rm", "RORX(32|64)mi", "SARX(32|64)rm", "SHLX(32|64)rm", Index: llvm/trunk/lib/Target/X86/X86Schedule.td =================================================================== --- llvm/trunk/lib/Target/X86/X86Schedule.td +++ llvm/trunk/lib/Target/X86/X86Schedule.td @@ -39,9 +39,14 @@ } } +// Loads, stores, and moves, not folded with other operations. +def WriteLoad : SchedWrite; +def WriteStore : SchedWrite; +def WriteMove : SchedWrite; + // Arithmetic. defm WriteALU : X86SchedWritePair; // Simple integer ALU op. -def WriteALURMW : WriteSequence<[WriteALULd, WriteRMW]>; +def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>; defm WriteIMul : X86SchedWritePair; // Integer multiplication. def WriteIMulH : SchedWrite; // Integer multiplication, high part. defm WriteIDiv : X86SchedWritePair; // Integer division. @@ -51,6 +56,9 @@ defm WritePOPCNT : X86SchedWritePair; // Bit population count. defm WriteLZCNT : X86SchedWritePair; // Leading zero count. defm WriteTZCNT : X86SchedWritePair; // Trailing zero count. +defm WriteCMOV : X86SchedWritePair; // Conditional move. +def WriteSETCC : SchedWrite; // Set register based on condition code. +def WriteSETCCStore : SchedWrite; // Integer shifts and rotates. defm WriteShift : X86SchedWritePair; @@ -59,11 +67,6 @@ defm WriteBEXTR : X86SchedWritePair; defm WriteBZHI : X86SchedWritePair; -// Loads, stores, and moves, not folded with other operations. -def WriteLoad : SchedWrite; -def WriteStore : SchedWrite; -def WriteMove : SchedWrite; - // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. def WriteZero : SchedWrite; Index: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td +++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td @@ -141,6 +141,10 @@ defm : JWriteResIntPair; // Worst case (i64 division) defm : JWriteResIntPair; +defm : JWriteResIntPair; // Conditional move. +def : WriteRes; // Setcc. +def : WriteRes; + def : WriteRes { let Latency = 6; let ResourceCycles = [4]; Index: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td +++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td @@ -93,6 +93,13 @@ defm : SLMWriteResPair; defm : SLMWriteResPair; +defm : SLMWriteResPair; +def : WriteRes; +def : WriteRes { + // FIXME Latency and NumMicrOps? + let ResourceCycles = [2,1]; +} + // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on // the port to read all inputs. We don't model that. Index: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td +++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td @@ -153,6 +153,10 @@ defm : ZnWriteResPair; defm : ZnWriteResFpuPair; +defm : ZnWriteResPair; +def : WriteRes; +def : WriteRes; + // Bit counts. defm : ZnWriteResPair; defm : ZnWriteResPair; @@ -277,14 +281,6 @@ // r,m. def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; -// CMOVcc. -// r,r. -def : InstRW<[WriteALU], - (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>; -// r,m. -def : InstRW<[WriteALULd, ReadAfterLd], - (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>; - // XCHG. // r,r. def ZnWriteXCHG : SchedWriteRes<[ZnALU]> { @@ -614,14 +610,6 @@ // m,r,cl. def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>; -// SETcc. -// r. -def : InstRW<[WriteShift], - (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>; -// m. -def : InstRW<[WriteShift], - (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>; - //-- Misc instructions --// // CMPXCHG. def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> { Index: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll +++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll @@ -13882,22 +13882,22 @@ ; ZNVER1-NEXT: setge %dil # sched: [1:0.25] ; ZNVER1-NEXT: setle %dil # sched: [1:0.25] ; ZNVER1-NEXT: setg %dil # sched: [1:0.25] -; ZNVER1-NEXT: seto (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setno (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setb (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setae (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: sete (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setne (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setbe (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: seta (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: sets (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setns (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setp (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setnp (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setl (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setge (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setle (%rsi) # sched: [1:0.25] -; ZNVER1-NEXT: setg (%rsi) # sched: [1:0.25] +; ZNVER1-NEXT: seto (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setno (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setb (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setae (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: sete (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setne (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setbe (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: seta (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: sets (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setns (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setp (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setnp (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setl (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setge (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setle (%rsi) # sched: [1:0.50] +; ZNVER1-NEXT: setg (%rsi) # sched: [1:0.50] ; ZNVER1-NEXT: #NO_APP ; ZNVER1-NEXT: retq # sched: [1:0.50] call void asm sideeffect "seto $0 \0A\09 setno $0 \0A\09 setb $0 \0A\09 setnb $0 \0A\09 setz $0 \0A\09 setnz $0 \0A\09 setbe $0 \0A\09 setnbe $0 \0A\09 sets $0 \0A\09 setns $0 \0A\09 setp $0 \0A\09 setnp $0 \0A\09 setl $0 \0A\09 setnl $0 \0A\09 setle $0 \0A\09 setnle $0 \0A\09 seto $1 \0A\09 setno $1 \0A\09 setb $1 \0A\09 setnb $1 \0A\09 setz $1 \0A\09 setnz $1 \0A\09 setbe $1 \0A\09 setnbe $1 \0A\09 sets $1 \0A\09 setns $1 \0A\09 setp $1 \0A\09 setnp $1 \0A\09 setl $1 \0A\09 setnl $1 \0A\09 setle $1 \0A\09 setnle $1", "r,*m"(i8 %a0, i8 *%a1)