Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -199,9 +199,9 @@ SDNPVariadic]>; def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; -def X86NoTrackBrind : SDNode<"X86ISD::NT_BRIND", SDT_X86NtBrind, +def X86NoTrackBrind : SDNode<"X86ISD::NT_BRIND", SDT_X86NtBrind, [SDNPHasChain]>; def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, @@ -1189,7 +1189,7 @@ let mayLoad = 1, SchedRW = [WriteLoad] in { def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>, OpSize16; -def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>, +def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>, OpSize32, Requires<[Not64BitMode]>; def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>, OpSize16, NotMemoryFoldable; @@ -1694,7 +1694,7 @@ // Condition code ops, incl. set if equal/not equal/... -let SchedRW = [WriteALU] in { +let SchedRW = [WriteLAHFSAHF] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", [(set EFLAGS, (X86sahf AH))]>, Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -134,6 +134,7 @@ let Latency = 2; let NumMicroOps = 3; } +def : WriteRes; // Bit counts. defm : BWWriteResPair; @@ -624,7 +625,6 @@ let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup9], (instrs LAHF, SAHF)>; // TODO: This doesnt match Agner's data def: InstRW<[BWWriteResGroup9], (instregex "NOOP", "SGDT64m", "SIDT64m", Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -135,6 +135,7 @@ let Latency = 2; let NumMicroOps = 3; } +def : WriteRes; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on @@ -912,7 +913,6 @@ } def: InstRW<[HWWriteResGroup10], (instrs CBW, CWDE, CDQE, CMC, STC)>; -def: InstRW<[HWWriteResGroup10], (instrs LAHF, SAHF)>; // TODO: This doesn't match Agner's data def: InstRW<[HWWriteResGroup10], (instregex "NOOP", "SGDT64m", "SIDT64m", Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -134,6 +134,7 @@ let Latency = 2; let NumMicroOps = 3; } +def : WriteRes; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on @@ -558,7 +559,6 @@ let ResourceCycles = [1]; } def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>; -def: InstRW<[SBWriteResGroup4], (instrs LAHF, SAHF)>; def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8", "BT(16|32|64)rr", "BTC(16|32|64)ri8", Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -132,6 +132,7 @@ let Latency = 2; let NumMicroOps = 3; } +def : WriteRes; // Bit counts. defm : SKLWriteResPair; @@ -629,7 +630,6 @@ } def: InstRW<[SKLWriteResGroup10], (instrs CBW, CWDE, CDQE, CMC, STC)>; -def: InstRW<[SKLWriteResGroup10], (instrs LAHF, SAHF)>; // TODO: This doesn't match Agner's data def: InstRW<[SKLWriteResGroup10], (instregex "NOOP", "SGDT64m", "SIDT64m", Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -132,6 +132,7 @@ let Latency = 2; let NumMicroOps = 3; } +def : WriteRes; // Integer shifts and rotates. defm : SKXWriteResPair; @@ -652,7 +653,6 @@ } def: InstRW<[SKXWriteResGroup10], (instrs CBW, CWDE, CDQE, CMC, STC)>; -def: InstRW<[SKXWriteResGroup10], (instrs LAHF, SAHF)>; // TODO: This doesn't match Agner's data def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m", "SIDT64m", "SMSW16m", Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -137,6 +137,7 @@ def WriteFCMOV : SchedWrite; // X87 conditional move. def WriteSETCC : SchedWrite; // Set register based on condition code. def WriteSETCCStore : SchedWrite; +def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH. // Integer shifts and rotates. defm WriteShift : X86SchedWritePair; Index: lib/Target/X86/X86ScheduleAtom.td =================================================================== --- lib/Target/X86/X86ScheduleAtom.td +++ lib/Target/X86/X86ScheduleAtom.td @@ -101,6 +101,10 @@ let Latency = 2; let ResourceCycles = [2]; } +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} defm : X86WriteResUnsupported; Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -177,6 +177,7 @@ defm : X86WriteRes; // x87 conditional move. def : WriteRes; // Setcc. def : WriteRes; +def : WriteRes; // This is for simple LEAs with one or two input operands. // FIXME: SAGU 3-operand LEA Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -109,6 +109,7 @@ // FIXME Latency and NumMicrOps? let ResourceCycles = [2,1]; } +def : WriteRes; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -162,6 +162,7 @@ defm : ZnWriteResPair; def : WriteRes; def : WriteRes; +defm : X86WriteRes; // Bit counts. defm : ZnWriteResPair; @@ -509,13 +510,6 @@ //LAHF def : InstRW<[WriteMicrocoded], (instrs LAHF)>; -// SAHF. -def ZnWriteSAHF : SchedWriteRes<[ZnALU]> { - let Latency = 2; - let NumMicroOps = 2; -} -def : InstRW<[ZnWriteSAHF], (instrs SAHF)>; - // BSWAP. def ZnWriteBSwap : SchedWriteRes<[ZnALU]> { let ResourceCycles = [4]; Index: test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- test/CodeGen/X86/schedule-x86_64.ll +++ test/CodeGen/X86/schedule-x86_64.ll @@ -7301,8 +7301,8 @@ ; ATOM-LABEL: test_lahf_sahf: ; ATOM: # %bb.0: ; ATOM-NEXT: #APP -; ATOM-NEXT: lahf # sched: [1:0.50] -; ATOM-NEXT: sahf # sched: [1:0.50] +; ATOM-NEXT: lahf # sched: [2:1.00] +; ATOM-NEXT: sahf # sched: [2:1.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retq # sched: [79:39.50] ; @@ -7325,32 +7325,32 @@ ; HASWELL-LABEL: test_lahf_sahf: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP -; HASWELL-NEXT: lahf # sched: [1:0.25] -; HASWELL-NEXT: sahf # sched: [1:0.25] +; HASWELL-NEXT: lahf # sched: [1:0.50] +; HASWELL-NEXT: sahf # sched: [1:0.50] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_lahf_sahf: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP -; BROADWELL-NEXT: lahf # sched: [1:0.25] -; BROADWELL-NEXT: sahf # sched: [1:0.25] +; BROADWELL-NEXT: lahf # sched: [1:0.50] +; BROADWELL-NEXT: sahf # sched: [1:0.50] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_lahf_sahf: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP -; SKYLAKE-NEXT: lahf # sched: [1:0.25] -; SKYLAKE-NEXT: sahf # sched: [1:0.25] +; SKYLAKE-NEXT: lahf # sched: [1:0.50] +; SKYLAKE-NEXT: sahf # sched: [1:0.50] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_lahf_sahf: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: lahf # sched: [1:0.25] -; SKX-NEXT: sahf # sched: [1:0.25] +; SKX-NEXT: lahf # sched: [1:0.50] +; SKX-NEXT: sahf # sched: [1:0.50] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retq # sched: [7:1.00] ;