Index: lib/Target/X86/X86PadShortFunction.cpp =================================================================== --- lib/Target/X86/X86PadShortFunction.cpp +++ lib/Target/X86/X86PadShortFunction.cpp @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -49,7 +49,7 @@ struct PadShortFunc : public MachineFunctionPass { static char ID; PadShortFunc() : MachineFunctionPass(ID) - , Threshold(4), STI(nullptr), TII(nullptr) {} + , Threshold(4) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -82,8 +82,7 @@ // VisitedBBs - Cache of previously visited BBs. DenseMap VisitedBBs; - const X86Subtarget *STI; - const TargetInstrInfo *TII; + TargetSchedModel TSM; }; char PadShortFunc::ID = 0; @@ -99,15 +98,13 @@ if (skipFunction(MF.getFunction())) return false; - if (MF.getFunction().optForSize()) { + if (MF.getFunction().optForSize()) return false; - } - STI = &MF.getSubtarget(); - if (!STI->padShortFunctions()) + if (!MF.getSubtarget().padShortFunctions()) return false; - TII = STI->getInstrInfo(); + TSM.init(&MF.getSubtarget()); // Search through basic blocks and mark the ones that have early returns ReturnBBs.clear(); @@ -195,7 +192,7 @@ return true; } - CyclesToEnd += TII->getInstrLatency(STI->getInstrItineraryData(), MI); + CyclesToEnd += TSM.computeInstrLatency(&MI); } VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd); @@ -209,9 +206,8 @@ MachineBasicBlock::iterator &MBBI, unsigned int NOOPsToAdd) { DebugLoc DL = MBBI->getDebugLoc(); + unsigned IssueWidth = TSM.getIssueWidth(); - while (NOOPsToAdd-- > 0) { - BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP)); - BuildMI(*MBB, MBBI, DL, TII->get(X86::NOOP)); - } + for (unsigned i = 0, e = IssueWidth * NOOPsToAdd; i != e; ++i) + BuildMI(*MBB, MBBI, DL, TSM.getInstrInfo()->get(X86::NOOP)); } Index: lib/Target/X86/X86ScheduleAtom.td =================================================================== --- lib/Target/X86/X86ScheduleAtom.td +++ lib/Target/X86/X86ScheduleAtom.td @@ -15,549 +15,801 @@ // // Scheduling information derived from the "Intel 64 and IA32 Architectures // Optimization Reference Manual", Chapter 13, Section 4. -// Functional Units -// Port 0 -def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store - // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide -def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA - // SIMD/FP: SIMD ALU, FP Adder - -def AtomItineraries : ProcessorItineraries< - [ Port0, Port1 ], - [], [ - // P0 only - // InstrItinData] >, - // P0 or P1 - // InstrItinData] >, - // P0 and P1 - // InstrItinData, InstrStage] >, - // - // Default is 1 cycle, port0 or port1 - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // mul - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // imul by al, ax, eax, rax - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // imul reg by reg|mem - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // imul reg = reg/mem * imm - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // idiv - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // div - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // neg/not/inc/dec - InstrItinData] >, - InstrItinData] >, - // add/sub/and/or/xor/cmp/test - InstrItinData] >, - InstrItinData] >, - // adc/sbc - InstrItinData] >, - InstrItinData] >, - // shift/rotate - InstrItinData] >, - // shift double - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // cmov - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // set - InstrItinData] >, - InstrItinData] >, - // jcc - InstrItinData] >, - // jcxz/jecxz/jrcxz - InstrItinData] >, - // jmp rel - InstrItinData] >, - // jmp indirect - InstrItinData] >, - InstrItinData] >, - // jmp far - InstrItinData] >, - InstrItinData] >, - // loop/loope/loopne - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // call - all but reg/imm - InstrItinData, - InstrStage<1, [Port1]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - //ret - InstrItinData] >, - InstrItinData, InstrStage<1, [Port1]>] >, - //sign extension movs - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - //zero extension movs - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - // SSE binary operations - // arithmetic fp scalar - InstrItinData] >, - InstrItinData, - InstrStage<5, [Port1]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<5, [Port1]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - // arithmetic fp parallel - InstrItinData] >, - InstrItinData, - InstrStage<5, [Port1]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - // bitwise parallel - InstrItinData] >, - InstrItinData] >, - - // arithmetic int parallel - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - // multiply int parallel - InstrItinData] >, - InstrItinData] >, - - // shift parallel - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - // conversions - // to/from PD ... - InstrItinData] >, - InstrItinData] >, - // to/from PS except to/from PD and PS2PI - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - // MMX MOVs - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // other MMX - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // conversions - // from/to PD - InstrItinData] >, - InstrItinData] >, - // from/to PI - InstrItinData] >, - InstrItinData, - InstrStage<5, [Port1]>]>, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData, InstrStage<5, [Port1]>] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, InstrStage<1, [Port1]>] >, - InstrItinData] >, - InstrItinData] >, - - // System instructions - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - // worst case for mov REG_CRx - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // LAR - InstrItinData] >, - InstrItinData] >, - // LSL - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // push control register, segment registers - InstrItinData] >, - InstrItinData] >, - // pop control register, segment registers - InstrItinData] >, - InstrItinData] >, - // VERR, VERW - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // WRMSR, RDMSR - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // SMSW, LMSW - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [Port1]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData, - InstrStage<1, [Port1]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [Port1]>] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] > - ]>; // Atom machine model. def AtomModel : SchedMachineModel { let IssueWidth = 2; // Allows 2 instructions per scheduling group. let MicroOpBufferSize = 0; // In-order execution, always hide latency. - let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. - let HighLatency = 30;// Expected, may be overriden by OperandCycles. + let LoadLatency = 3; // Expected cycles, may be overriden. + let HighLatency = 30;// Expected, may be overriden. // On the Atom, the throughput for taken branches is 2 cycles. For small // simple loops, expand by a small factor to hide the backedge cost. let LoopMicroOpBufferSize = 10; let PostRAScheduler = 1; let CompleteModel = 0; +} + +let SchedModel = AtomModel in { + +// Functional Units +def AtomPort0 : ProcResource<1>; // ALU: ALU0, shift/rotate, load/store + // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide +def AtomPort1 : ProcResource<1>; // ALU: ALU1, bit processing, jump, and LEA + // SIMD/FP: SIMD ALU, FP Adder + +def AtomPort01 : ProcResGroup<[AtomPort0, AtomPort1]>; + +// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3 +// cycles after the memory operand. +def : ReadAdvance; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when dispatched by the schedulers. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass AtomWriteResPair RRPorts, + list RMPorts, + int RRLat = 1, int RMLat = 1, + list RRRes = [1], + list RMRes = [1]> { + // Register variant is using a single cycle on ExePort. + def : WriteRes { + let Latency = RRLat; + let ResourceCycles = RRRes; + } + + // Memory variant also uses a cycle on JLAGU and adds 3 cycles to the + // latency. + def : WriteRes { + let Latency = RMLat; + let ResourceCycles = RMRes; + } +} + +// A folded store needs a cycle on Port0 for the store data. +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Arithmetic. +//////////////////////////////////////////////////////////////////////////////// + +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. + +defm : AtomWriteResPair; + +def : WriteRes; +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} + +def : WriteRes; // NOTE: Doesn't exist on Atom. + +// This is for simple LEAs with one or two input operands. +def : WriteRes; + +def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> { + let Latency = 8; + let ResourceCycles = [8]; +} +def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>; + +def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> { + let Latency = 6; + let ResourceCycles = [6]; +} +def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>; + +def AtomWriteIMul64 : SchedWriteRes<[AtomPort01]> { + let Latency = 12; + let ResourceCycles = [12]; +} +def : InstRW<[AtomWriteIMul64], (instrs MUL64r, IMUL64r, IMUL64rr, IMUL64rm, + MUL64m, IMUL64m)>; + +def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> { + let Latency = 14; + let ResourceCycles = [14]; +} +def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32, + IMUL64rmi8, IMUL64rmi32)>; + +def AtomWriteDiv : SchedWriteRes<[AtomPort01]> { + let Latency = 50; + let ResourceCycles = [50]; +} +def : InstRW<[AtomWriteDiv], (instrs DIV8r, + DIV16r, DIV16m, + DIV32r, DIV32m)>; + +def AtomWriteDiv8Ld : SchedWriteRes<[AtomPort01]> { + let Latency = 68; + let ResourceCycles = [68]; +} +def : InstRW<[AtomWriteDiv8Ld], (instrs DIV8m)>; + +def AtomWriteIDiv64 : SchedWriteRes<[AtomPort01]> { + let Latency = 130; + let ResourceCycles = [130]; +} +def : InstRW<[AtomWriteIDiv64], (instrs DIV64r, IDIV64r, + DIV64m, IDIV64m)>; + +// Bit counts. +defm : AtomWriteResPair; +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. + +// BMI1 BEXTR, BMI2 BZHI +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. + +//////////////////////////////////////////////////////////////////////////////// +// Integer shifts and rotates. +//////////////////////////////////////////////////////////////////////////////// + +defm : AtomWriteResPair; + +//////////////////////////////////////////////////////////////////////////////// +// Loads, stores, and moves, not folded with other operations. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; + +//////////////////////////////////////////////////////////////////////////////// +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +//////////////////////////////////////////////////////////////////////////////// + +defm : AtomWriteResPair; + +//////////////////////////////////////////////////////////////////////////////// +// Special case scheduling classes. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { let Latency = 100; } +def : WriteRes { let Latency = 100; } +def : WriteRes; + +// Nops don't have dependencies, so there's no actual latency, but we set this +// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Floating point. This covers both scalar and vector operations. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes; +def : WriteRes; +def : WriteRes; + +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. + +//////////////////////////////////////////////////////////////////////////////// +// Conversions. +//////////////////////////////////////////////////////////////////////////////// + +defm : AtomWriteResPair; // Float -> Integer. +defm : AtomWriteResPair; // Integer -> Float. +defm : AtomWriteResPair; // Float -> Float size conversion. + +//////////////////////////////////////////////////////////////////////////////// +// Vector integer operations. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes; +def : WriteRes; +def : WriteRes; + +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. + +//////////////////////////////////////////////////////////////////////////////// +// SSE42 String instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. + +//////////////////////////////////////////////////////////////////////////////// +// MOVMSK Instructions. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { let Latency = 3; let ResourceCycles = [3]; } +def : WriteRes { let Latency = 3; let ResourceCycles = [3]; } +def : WriteRes { let Latency = 3; let ResourceCycles = [3]; } + +//////////////////////////////////////////////////////////////////////////////// +// AES Instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. + +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : AtomWriteResPair; +defm : AtomWriteResPair; + +//////////////////////////////////////////////////////////////////////////////// +// Carry-less multiplication instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : AtomWriteResPair; // NOTE: Doesn't exist on Atom. + +//////////////////////////////////////////////////////////////////////////////// +// Special Cases. +//////////////////////////////////////////////////////////////////////////////// + +// Port0 +def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> { + let Latency = 1; + let ResourceCycles = [1]; +} +def : InstRW<[AtomWrite0_1], (instrs FXAM, + BSWAP32r, BSWAP64r, + DEC8m, DEC16m, DEC32m, DEC64m, + INC8m, INC16m, INC32m, INC64m, + MOVSX64rr32, + MMX_MOVD64rr, MMX_MOVD64mr, + MMX_MOVD64to64rr, MMX_MOVD64to64rm, + MMX_PSHUFBrr, MMX_PSHUFBrm, + MOVDI2PDIrr, MOVDI2PDIrm, + MOV64toPQIrr, MOV64toPQIrm, + MOV64toSDrr, MOV64toSDrm, MOVSDto64mr, + MOVDI2SSrr, MOVDI2SSrm, + MOVPDI2DImr, MOVPQIto64mr, MOVSS2DImr, MOVQI2PQIrm, MOVPQI2QImr)>; +def : InstRW<[AtomWrite0_1], (instregex "(ADC|ADD|AND|NEG|NOT|OR|SBB|SUB|XOR)(8|16|32|64)m", + "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m", + "MOV(S|Z)X(32|64)(rr|rm)(8|8_NOREX|16)", + "LD_F(P)?(16|32|64)?(m|rr)", + "MMX_MASKMOVQ(64)?", + "MMX_PAVG(B|W)irm", + "MMX_P(MAX|MIN)(UB|SW)irm", + "MMX_PSIGN(B|D|W)rm")>; + +def AtomWrite0_3 : SchedWriteRes<[AtomPort0]> { + let Latency = 3; + let ResourceCycles = [3]; +} +def : InstRW<[AtomWrite0_3], (instrs MMX_MOVD64from64rr, MMX_MOVD64grr, + MOVPDI2DIrr, MOVPQIto64rr, + MOVSDto64rr, MOVSS2DIrr)>; + +def AtomWrite0_4 : SchedWriteRes<[AtomPort0]> { + let Latency = 4; + let ResourceCycles = [4]; +} +def : InstRW<[AtomWrite0_4], (instrs MMX_PMADDUBSWrr, MMX_PMADDUBSWrm, + MMX_PMADDWDirr, MMX_PMADDWDirm, + MMX_PMULHRSWrr, MMX_PMULHRSWrm, + MMX_PMULHUWirr, MMX_PMULHUWirm, + MMX_PMULHWirr, MMX_PMULHWirm, + MMX_PMULLWirr, MMX_PMULLWirm, + MMX_PMULUDQirr, MMX_PMULUDQirm)>; + +def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> { + let Latency = 5; + let ResourceCycles = [5]; +} +def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)", + "MUL(PS|SD)(rr|rm)(_Int)?")>; + +// Port1 +def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> { + let Latency = 1; + let ResourceCycles = [1]; +} +def : InstRW<[AtomWrite1_1], (instrs FCOMPP)>; +def : InstRW<[AtomWrite1_1], (instregex "ABS_F", "CHS_F", + "UCOM_F(P|PP)?r", + "BT(C|R|S)?(16|32|64)(rr|ri8)")>; + +def AtomWrite1_5 : SchedWriteRes<[AtomPort1]> { + let Latency = 5; + let ResourceCycles = [5]; +} +def : InstRW<[AtomWrite1_5], (instrs MMX_CVTPI2PSirr, MMX_CVTPI2PSirm, + MMX_CVTPS2PIirr, MMX_CVTTPS2PIirr)>; + +// Port0 and Port1 +def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> { + let Latency = 1; + let ResourceCycles = [1, 1]; +} +def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r, + POP16rmr, POP32rmr, POP64rmr, + PUSH16r, PUSH32r, PUSH64r, + PUSHi16, PUSHi32, + PUSH16rmr, PUSH32rmr, PUSH64rmr, + PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32, + XCH_F)>; +def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$", + "IRET(16|32|64)?")>; + +def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> { + let Latency = 5; + let ResourceCycles = [5, 5]; +} +def : InstRW<[AtomWrite0_1_5], (instrs MMX_CVTPS2PIirm, MMX_CVTTPS2PIirm)>; +def : InstRW<[AtomWrite0_1_5], (instregex "ILD_F(16|32|64)")>; + +// Port0 or Port1 +def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> { + let Latency = 1; + let ResourceCycles = [1]; +} +def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, LD_F0, WAIT, + LFENCE, + STOSB, STOSL, STOSQ, STOSW, + MOVSSrr, MOVSSrr_REV, + PSLLDQri, PSRLDQri)>; +def : InstRW<[AtomWrite01_1], (instregex "(MMX_)?PS(LL|RA|RL)(D|Q|W)ri", + "MMX_PAVG(B|W)irr", + "MMX_P(MAX|MIN)(UB|SW)irr", + "MMX_PSIGN(B|D|W)rr", + "MMX_PACK(SSDW|SSWB|USWB)irr", + "MMX_PUNPCKH(BW|DQ|WD)irr")>; + +def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> { + let Latency = 2; + let ResourceCycles = [2]; +} +def : InstRW<[AtomWrite01_2], (instrs LEAVE, LEAVE64, POP16r, + PUSH16rmm, PUSH32rmm, PUSH64rmm, + LODSB, LODSL, LODSQ, LODSW, + SCASB, SCASL, SCASQ, SCASW, + SHLD32rrCL, SHRD32rrCL, + SHLD32rri8, SHRD32rri8)>; +def : InstRW<[AtomWrite01_2], (instregex "BT(C|R|S)(16|32|64)mi8", + "PUSH(CS|DS|ES|FS|GS|SS)(16|32|64)", + "XADD(8|16|32|64)rr", + "XCHG(8|16|32|64)(ar|rr)", + "(ST|ISTT)_F(P)?(16|32|64)?(m|rr)", + "MMX_P(ADD|SUB)Qirr", + "MOV(S|Z)X16rr8", + "MOV(UPS|UPD|DQU)mr", + "MASKMOVDQU(64)?", + "P(ADD|SUB)Qrr")>; + +def AtomWrite01_3 : SchedWriteRes<[AtomPort01]> { + let Latency = 3; + let ResourceCycles = [3]; +} +def : InstRW<[AtomWrite01_3], (instrs CLD, LDDQUrm, + CMPSB, CMPSL, CMPSQ, CMPSW, + MOVSB, MOVSL, MOVSQ, MOVSW, + POP16rmm, POP32rmm, POP64rmm)>; +def : InstRW<[AtomWrite01_3], (instregex "XADD(8|16|32|64)rm", + "XCHG(8|16|32|64)rm", + "(MMX_)?PH(ADD|SUB)Drr", + "MOV(S|Z)X16rm8", + "MMX_P(ADD|SUB)Qirm", + "MOV(UPS|UPD|DQU)rm", + "P(ADD|SUB)Qrm")>; + +def AtomWrite01_4 : SchedWriteRes<[AtomPort01]> { + let Latency = 4; + let ResourceCycles = [4]; +} +def : InstRW<[AtomWrite01_4], (instrs CBW, CWD, CWDE, CDQ, CDQE, CQO, + JCXZ, JECXZ, JRCXZ, + SHLD32mrCL, SHRD32mrCL, + SHLD32mri8, SHRD32mri8, + LD_F80m, + MMX_PSADBWirr, MMX_PSADBWirm)>; +def : InstRW<[AtomWrite01_4], (instregex "(MMX_)?PH(ADD|SUB)Drm", + "(MMX_)?PEXTRWrr(_REV)?")>; + +def AtomWrite01_5 : SchedWriteRes<[AtomPort01]> { + let Latency = 5; + let ResourceCycles = [5]; +} +def : InstRW<[AtomWrite01_5], (instrs FLDCW16m, LDMXCSR, + MMX_EMMS)>; +def : InstRW<[AtomWrite01_5], (instregex "ST_FP80m", + "MMX_PH(ADD|SUB)S?Wrr")>; + +def AtomWrite01_6 : SchedWriteRes<[AtomPort01]> { + let Latency = 6; + let ResourceCycles = [6]; +} +def : InstRW<[AtomWrite01_6], (instrs LD_F1, CMPXCHG8rm, INTO, XLAT, + SHLD16rrCL, SHRD16rrCL, + SHLD16rri8, SHRD16rri8, + SHLD16mrCL, SHRD16mrCL, + SHLD16mri8, SHRD16mri8, + ADDSUBPDrr, ADDSUBPDrm, + CVTPS2DQrr, CVTTPS2DQrr)>; +def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr", + "IST_F(P)?(16|32|64)?m", + "MMX_PH(ADD|SUB)S?Wrm", + "(ADD|SUB|MAX|MIN)PDrr", + "CMPPDrri")>; + +def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> { + let Latency = 7; + let ResourceCycles = [7]; +} +def : InstRW<[AtomWrite01_7], (instrs AAD8i8, + CVTDQ2PDrr, + CVTPD2DQrr, + CVTPD2PSrr, + CVTPS2DQrm, + CVTPS2PDrr, + CVTTPD2DQrr, + CVTTPS2DQrm, + MMX_CVTPD2PIirr, + MMX_CVTPI2PDirr, + MMX_CVTTPD2PIirr)>; +def : InstRW<[AtomWrite01_7], (instregex "(ADD|SUB|MAX|MIN)PDrm", + "CMPPDrmi")>; + +def AtomWrite01_8 : SchedWriteRes<[AtomPort01]> { + let Latency = 8; + let ResourceCycles = [8]; +} +def : InstRW<[AtomWrite01_8], (instrs LOOPE, + PUSHA16, PUSHA32, + SHLD64rrCL, SHRD64rrCL, + FNSTCW16m, + CVTDQ2PDrm, + CVTPD2DQrm, + CVTPD2PSrm, + CVTPS2PDrm, + CVTTPD2DQrm, + MMX_CVTPD2PIirm, + MMX_CVTPI2PDirm, + MMX_CVTTPD2PIirm)>; + +def AtomWrite01_9 : SchedWriteRes<[AtomPort01]> { + let Latency = 9; + let ResourceCycles = [9]; +} +def : InstRW<[AtomWrite01_9], (instrs BT16mr, BT32mr, BT64mr, + POPA16, POPA32, + PUSHF16, PUSHF32, PUSHF64, + SHLD64mrCL, SHRD64mrCL, + SHLD64mri8, SHRD64mri8, + SHLD64rri8, SHRD64rri8, + CMPXCHG8rr, + MULPDrr, RCPPSr, RSQRTPSr)>; +def : InstRW<[AtomWrite01_9], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F", + "(U)?COM_FI", "TST_F", + "(U)?COMIS(D|S)rr", + "CVT(T)?SS2SI64rr(_Int)?")>; + +def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> { + let Latency = 10; + let ResourceCycles = [10]; +} +def : InstRW<[AtomWrite01_10], (instrs FLDL2E, FLDL2T, FLDLG2, FLDLN2, FLDPI, + MULPDrm, RCPPSm, RSQRTPSm)>; +def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm", + "CVT(T)?SS2SI64rm(_Int)?")>; + +def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> { + let Latency = 11; + let ResourceCycles = [11]; +} +def : InstRW<[AtomWrite01_11], (instrs BOUNDS16rm, BOUNDS32rm)>; +def : InstRW<[AtomWrite01_11], (instregex "BT(C|R|S)(16|32|64)mr")>; + +def AtomWrite01_13 : SchedWriteRes<[AtomPort01]> { + let Latency = 13; + let ResourceCycles = [13]; +} +def : InstRW<[AtomWrite01_13], (instrs AAA, AAS)>; + +def AtomWrite01_14 : SchedWriteRes<[AtomPort01]> { + let Latency = 14; + let ResourceCycles = [14]; +} +def : InstRW<[AtomWrite01_14], (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; + +def AtomWrite01_15 : SchedWriteRes<[AtomPort01]> { + let Latency = 15; + let ResourceCycles = [15]; +} +def : InstRW<[AtomWrite01_15], (instrs CMPXCHG16rr, CMPXCHG32rr, CMPXCHG64rr, + STMXCSR)>; + +def AtomWrite01_17 : SchedWriteRes<[AtomPort01]> { + let Latency = 17; + let ResourceCycles = [17]; +} +def : InstRW<[AtomWrite01_17], (instrs LOOPNE, PAUSE)>; + +def AtomWrite01_18 : SchedWriteRes<[AtomPort01]> { + let Latency = 18; + let ResourceCycles = [18]; +} +def : InstRW<[AtomWrite01_18], (instrs CMPXCHG8B, DAA, LOOP)>; + +def AtomWrite01_20 : SchedWriteRes<[AtomPort01]> { + let Latency = 20; + let ResourceCycles = [20]; +} +def : InstRW<[AtomWrite01_20], (instrs DAS)>; + +def AtomWrite01_21 : SchedWriteRes<[AtomPort01]> { + let Latency = 21; + let ResourceCycles = [21]; +} +def : InstRW<[AtomWrite01_21], (instrs AAM8i8, STD)>; + +def AtomWrite01_22 : SchedWriteRes<[AtomPort01]> { + let Latency = 22; + let ResourceCycles = [22]; +} +def : InstRW<[AtomWrite01_22], (instrs CMPXCHG16B)>; + +def AtomWrite01_23 : SchedWriteRes<[AtomPort01]> { + let Latency = 23; + let ResourceCycles = [23]; +} +def : InstRW<[AtomWrite01_23], (instrs ARPL16mr, ARPL16rr)>; + +def AtomWrite01_25 : SchedWriteRes<[AtomPort01]> { + let Latency = 25; + let ResourceCycles = [25]; +} +def : InstRW<[AtomWrite01_25], (instrs FNCLEX, FXTRACT)>; + +def AtomWrite01_26 : SchedWriteRes<[AtomPort01]> { + let Latency = 26; + let ResourceCycles = [26]; +} +def : InstRW<[AtomWrite01_26], (instrs POPF32, POPF64)>; + +def AtomWrite01_29 : SchedWriteRes<[AtomPort01]> { + let Latency = 29; + let ResourceCycles = [29]; +} +def : InstRW<[AtomWrite01_29], (instregex "POP(DS|ES|FS|GS)(16|32|64)")>; + +def AtomWrite01_30 : SchedWriteRes<[AtomPort01]> { + let Latency = 30; + let ResourceCycles = [30]; +} +def : InstRW<[AtomWrite01_30], (instrs RDTSC, RDTSCP)>; + +def AtomWrite01_32 : SchedWriteRes<[AtomPort01]> { + let Latency = 32; + let ResourceCycles = [32]; +} +def : InstRW<[AtomWrite01_32], (instrs ENTER, POPF16)>; + +def AtomWrite01_45 : SchedWriteRes<[AtomPort01]> { + let Latency = 45; + let ResourceCycles = [45]; +} +def : InstRW<[AtomWrite01_45], (instrs MONITORrrr)>; + +def AtomWrite01_46 : SchedWriteRes<[AtomPort01]> { + let Latency = 46; + let ResourceCycles = [46]; +} +def : InstRW<[AtomWrite01_46], (instrs FRNDINT, MWAITrr, RDPMC)>; + +def AtomWrite01_48 : SchedWriteRes<[AtomPort01]> { + let Latency = 48; + let ResourceCycles = [48]; +} +def : InstRW<[AtomWrite01_48], (instrs POPSS16, POPSS32)>; + +def AtomWrite01_55 : SchedWriteRes<[AtomPort01]> { + let Latency = 55; + let ResourceCycles = [55]; +} +def : InstRW<[AtomWrite01_55], (instrs FPREM)>; + +def AtomWrite01_59 : SchedWriteRes<[AtomPort01]> { + let Latency = 59; + let ResourceCycles = [59]; +} +def : InstRW<[AtomWrite01_59], (instrs INSB, INSL, INSW)>; + +def AtomWrite01_62 : SchedWriteRes<[AtomPort01]> { + let Latency = 62; + let ResourceCycles = [62]; +} +def : InstRW<[AtomWrite01_62], (instregex "DIVSD(r|m)(_Int)?", + "SQRTSD(r|m)(_Int)?")>; + +def AtomWrite01_63 : SchedWriteRes<[AtomPort01]> { + let Latency = 63; + let ResourceCycles = [63]; +} +def : InstRW<[AtomWrite01_63], (instrs FNINIT)>; + +def AtomWrite01_68 : SchedWriteRes<[AtomPort01]> { + let Latency = 68; + let ResourceCycles = [68]; +} +def : InstRW<[AtomWrite01_68], (instrs OUT8rr, OUT16rr, OUT32rr)>; + +def AtomWrite01_70 : SchedWriteRes<[AtomPort01]> { + let Latency = 70; + let ResourceCycles = [70]; +} +def : InstRW<[AtomWrite01_70], (instrs DIVPSrr, DIVPSrm, SQRTPSr, SQRTPSm)>; + +def AtomWrite01_71 : SchedWriteRes<[AtomPort01]> { + let Latency = 71; + let ResourceCycles = [71]; +} +def : InstRW<[AtomWrite01_71], (instrs FPREM1, + INVLPG, INVLPGA32, INVLPGA64)>; +def : InstRW<[AtomWrite01_71], (instregex "SQRT_F")>; + +def AtomWrite01_72 : SchedWriteRes<[AtomPort01]> { + let Latency = 72; + let ResourceCycles = [72]; +} +def : InstRW<[AtomWrite01_72], (instrs OUT8ir, OUT16ir, OUT32ir)>; + +def AtomWrite01_74 : SchedWriteRes<[AtomPort01]> { + let Latency = 74; + let ResourceCycles = [74]; +} +def : InstRW<[AtomWrite01_74], (instrs OUTSB, OUTSL, OUTSW)>; + +def AtomWrite01_77 : SchedWriteRes<[AtomPort01]> { + let Latency = 77; + let ResourceCycles = [77]; +} +def : InstRW<[AtomWrite01_77], (instrs FSCALE)>; - let Itineraries = AtomItineraries; +def AtomWrite01_78 : SchedWriteRes<[AtomPort01]> { + let Latency = 78; + let ResourceCycles = [78]; } +def : InstRW<[AtomWrite01_78], (instrs RDMSR)>; + +def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> { + let Latency = 79; + let ResourceCycles = [79]; +} +def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$", + "LRETI?(L|Q|W)")>; + +def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> { + let Latency = 92; + let ResourceCycles = [92]; +} +def : InstRW<[AtomWrite01_92], (instrs IN8ri, IN16ri, IN32ri)>; + +def AtomWrite01_94 : SchedWriteRes<[AtomPort01]> { + let Latency = 94; + let ResourceCycles = [94]; +} +def : InstRW<[AtomWrite01_94], (instrs IN8rr, IN16rr, IN32rr)>; + +def AtomWrite01_99 : SchedWriteRes<[AtomPort01]> { + let Latency = 99; + let ResourceCycles = [99]; +} +def : InstRW<[AtomWrite01_99], (instrs F2XM1)>; + +def AtomWrite01_121 : SchedWriteRes<[AtomPort01]> { + let Latency = 121; + let ResourceCycles = [121]; +} +def : InstRW<[AtomWrite01_121], (instrs CPUID)>; + +def AtomWrite01_125 : SchedWriteRes<[AtomPort01]> { + let Latency = 125; + let ResourceCycles = [125]; +} +def : InstRW<[AtomWrite01_125], (instrs DIVPDrr, DIVPDrm, SQRTPDr, SQRTPDm)>; + +def AtomWrite01_127 : SchedWriteRes<[AtomPort01]> { + let Latency = 127; + let ResourceCycles = [127]; +} +def : InstRW<[AtomWrite01_127], (instrs INT)>; + +def AtomWrite01_130 : SchedWriteRes<[AtomPort01]> { + let Latency = 130; + let ResourceCycles = [130]; +} +def : InstRW<[AtomWrite01_130], (instrs INT3)>; + +def AtomWrite01_140 : SchedWriteRes<[AtomPort01]> { + let Latency = 140; + let ResourceCycles = [140]; +} +def : InstRW<[AtomWrite01_140], (instrs FXSAVE, FXSAVE64)>; + +def AtomWrite01_141 : SchedWriteRes<[AtomPort01]> { + let Latency = 141; + let ResourceCycles = [141]; +} +def : InstRW<[AtomWrite01_141], (instrs FXRSTOR, FXRSTOR64)>; + +def AtomWrite01_146 : SchedWriteRes<[AtomPort01]> { + let Latency = 146; + let ResourceCycles = [146]; +} +def : InstRW<[AtomWrite01_146], (instrs FYL2X)>; + +def AtomWrite01_147 : SchedWriteRes<[AtomPort01]> { + let Latency = 147; + let ResourceCycles = [147]; +} +def : InstRW<[AtomWrite01_147], (instrs FYL2XP1)>; + +def AtomWrite01_168 : SchedWriteRes<[AtomPort01]> { + let Latency = 168; + let ResourceCycles = [168]; +} +def : InstRW<[AtomWrite01_168], (instrs FPTAN)>; + +def AtomWrite01_174 : SchedWriteRes<[AtomPort01]> { + let Latency = 174; + let ResourceCycles = [174]; +} +def : InstRW<[AtomWrite01_174], (instrs FSINCOS)>; +def : InstRW<[AtomWrite01_174], (instregex "(COS|SIN)_F")>; + +def AtomWrite01_183 : SchedWriteRes<[AtomPort01]> { + let Latency = 183; + let ResourceCycles = [183]; +} +def : InstRW<[AtomWrite01_183], (instrs FPATAN)>; + +def AtomWrite01_202 : SchedWriteRes<[AtomPort01]> { + let Latency = 202; + let ResourceCycles = [202]; +} +def : InstRW<[AtomWrite01_202], (instrs WRMSR)>; + +} // SchedModel Index: test/CodeGen/X86/lsr-loop-exit-cond.ll =================================================================== --- test/CodeGen/X86/lsr-loop-exit-cond.ll +++ test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -7,8 +7,10 @@ ; CHECK-NEXT: jne ; ATOM-LABEL: t: -; ATOM: movl (%r9,%r{{.+}},4), %e{{..}} +; ATOM: movl (%r9,%r{{.+}},4), %r{{..}} +; ATOM-NEXT: xorl ; ATOM-NEXT: testq +; ATOM-NEXT: movl ; ATOM-NEXT: jne @Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5] Index: test/CodeGen/X86/lsr-static-addr.ll =================================================================== --- test/CodeGen/X86/lsr-static-addr.ll +++ test/CodeGen/X86/lsr-static-addr.ll @@ -1,5 +1,5 @@ ; RUN: llc -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s -; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck -check-prefix=ATOM %s +; RUN: llc -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s ; CHECK: xorl %eax, %eax ; CHECK: movsd .LCPI0_0(%rip), %xmm0 @@ -10,16 +10,6 @@ ; CHECK-NEXT: movsd ; CHECK-NEXT: incq %rax - -; ATOM: movsd .LCPI0_0(%rip), %xmm0 -; ATOM: xorl %eax, %eax -; ATOM: align -; ATOM-NEXT: BB0_2: -; ATOM-NEXT: movsd A(,%rax,8) -; ATOM-NEXT: mulsd -; ATOM-NEXT: movsd -; ATOM-NEXT: incq %rax - @A = external global [0 x double] define void @foo(i64 %n) nounwind { Index: test/CodeGen/X86/mmx-schedule.ll =================================================================== --- test/CodeGen/X86/mmx-schedule.ll +++ test/CodeGen/X86/mmx-schedule.ll @@ -647,11 +647,11 @@ ; ; ATOM-LABEL: test_movd: ; ATOM: # %bb.0: -; ATOM-NEXT: movd (%rsi), %mm1 # sched: [1:1.00] -; ATOM-NEXT: movd %edi, %mm2 # sched: [1:1.00] -; ATOM-NEXT: paddd %mm2, %mm1 # sched: [1:0.50] -; ATOM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] -; ATOM-NEXT: movd %mm1, %ecx # sched: [3:3.00] +; ATOM-NEXT: movd %edi, %mm1 # sched: [1:1.00] +; ATOM-NEXT: movd (%rsi), %mm2 # sched: [1:1.00] +; ATOM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] +; ATOM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] +; ATOM-NEXT: movd %mm2, %ecx # sched: [3:3.00] ; ATOM-NEXT: movd %mm0, %eax # sched: [3:3.00] ; ATOM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] @@ -3509,8 +3509,8 @@ ; ; ATOM-LABEL: test_pinsrw: ; ATOM: # %bb.0: -; ATOM-NEXT: movswl (%rsi), %eax # sched: [1:1.00] ; ATOM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] +; ATOM-NEXT: movswl (%rsi), %eax # sched: [1:1.00] ; ATOM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] ; ATOM-NEXT: retq # sched: [79:39.50] Index: test/CodeGen/X86/schedule-x86_32.ll =================================================================== --- test/CodeGen/X86/schedule-x86_32.ll +++ test/CodeGen/X86/schedule-x86_32.ll @@ -1220,7 +1220,7 @@ ; ATOM-LABEL: test_into: ; ATOM: # %bb.0: ; ATOM-NEXT: #APP -; ATOM-NEXT: into # sched: [0:?] +; ATOM-NEXT: into # sched: [6:3.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; Index: test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- test/CodeGen/X86/schedule-x86_64.ll +++ test/CodeGen/X86/schedule-x86_64.ll @@ -15737,7 +15737,7 @@ ; ATOM-LABEL: test_ud2: ; ATOM: # %bb.0: ; ATOM-NEXT: #APP -; ATOM-NEXT: ud2 # sched: [0:?] +; ATOM-NEXT: ud2 # sched: [100:0.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retq # sched: [79:39.50] ; Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -104,14 +104,23 @@ ;; Select between two floating point constants. define float @test3(i32 %x) nounwind readnone { -; CHECK-LABEL: test3: -; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: sete %al -; CHECK-NEXT: leaq {{.*}}(%rip), %rcx -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: retq +; GENERIC-LABEL: test3: +; GENERIC: ## %bb.0: ## %entry +; GENERIC-NEXT: xorl %eax, %eax +; GENERIC-NEXT: testl %edi, %edi +; GENERIC-NEXT: sete %al +; GENERIC-NEXT: leaq {{.*}}(%rip), %rcx +; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; GENERIC-NEXT: retq +; +; ATOM-LABEL: test3: +; ATOM: ## %bb.0: ## %entry +; ATOM-NEXT: xorl %eax, %eax +; ATOM-NEXT: leaq {{.*}}(%rip), %rcx +; ATOM-NEXT: testl %edi, %edi +; ATOM-NEXT: sete %al +; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ATOM-NEXT: retq ; ; MCU-LABEL: test3: ; MCU: # %bb.0: # %entry @@ -266,15 +275,25 @@ ; Select with fp80's define x86_fp80 @test7(i32 %tmp8) nounwind { -; CHECK-LABEL: test7: -; CHECK: ## %bb.0: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: setns %al -; CHECK-NEXT: shlq $4, %rax -; CHECK-NEXT: leaq {{.*}}(%rip), %rcx -; CHECK-NEXT: fldt (%rax,%rcx) -; CHECK-NEXT: retq +; GENERIC-LABEL: test7: +; GENERIC: ## %bb.0: +; GENERIC-NEXT: xorl %eax, %eax +; GENERIC-NEXT: testl %edi, %edi +; GENERIC-NEXT: setns %al +; GENERIC-NEXT: shlq $4, %rax +; GENERIC-NEXT: leaq {{.*}}(%rip), %rcx +; GENERIC-NEXT: fldt (%rax,%rcx) +; GENERIC-NEXT: retq +; +; ATOM-LABEL: test7: +; ATOM: ## %bb.0: +; ATOM-NEXT: xorl %eax, %eax +; ATOM-NEXT: leaq {{.*}}(%rip), %rcx +; ATOM-NEXT: testl %edi, %edi +; ATOM-NEXT: setns %al +; ATOM-NEXT: shlq $4, %rax +; ATOM-NEXT: fldt (%rax,%rcx) +; ATOM-NEXT: retq ; ; MCU-LABEL: test7: ; MCU: # %bb.0: @@ -330,31 +349,32 @@ ; ATOM-NEXT: testb $1, %dil ; ATOM-NEXT: jne LBB7_1 ; ATOM-NEXT: ## %bb.2: -; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero -; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; ATOM-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ATOM-NEXT: jmp LBB7_3 ; ATOM-NEXT: LBB7_1: -; ATOM-NEXT: movd %r9d, %xmm0 +; ATOM-NEXT: movd %r9d, %xmm1 ; ATOM-NEXT: movd %r8d, %xmm2 -; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; ATOM-NEXT: movd %ecx, %xmm3 ; ATOM-NEXT: movd %edx, %xmm0 -; ATOM-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero +; ATOM-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; ATOM-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero ; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; ATOM-NEXT: LBB7_3: -; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] ; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; ATOM-NEXT: pcmpeqd %xmm2, %xmm2 -; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] +; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; ATOM-NEXT: paddd %xmm2, %xmm0 ; ATOM-NEXT: paddd %xmm2, %xmm1 -; ATOM-NEXT: movdqa %xmm0, (%rsi) ; ATOM-NEXT: movq %xmm1, 16(%rsi) +; ATOM-NEXT: movdqa %xmm0, (%rsi) ; ATOM-NEXT: retq ; ; MCU-LABEL: test8: @@ -634,8 +654,8 @@ ; ATOM: ## %bb.0: ## %entry ; ATOM-NEXT: movq %rdi, %rax ; ATOM-NEXT: movl $4, %ecx -; ATOM-NEXT: mulq %rcx ; ATOM-NEXT: movq $-1, %rdi +; ATOM-NEXT: mulq %rcx ; ATOM-NEXT: cmovnoq %rax, %rdi ; ATOM-NEXT: jmp __Znam ## TAILCALL ; @@ -894,8 +914,8 @@ ; ATOM: ## %bb.0: ; ATOM-NEXT: cmpl $127, %edi ; ATOM-NEXT: movl $127, %eax -; ATOM-NEXT: cmovlel %edi, %eax ; ATOM-NEXT: movb $-128, %cl +; ATOM-NEXT: cmovlel %edi, %eax ; ATOM-NEXT: cmpl $-128, %eax ; ATOM-NEXT: jl LBB22_2 ; ATOM-NEXT: ## %bb.1: @@ -946,8 +966,8 @@ ; ATOM: ## %bb.0: ; ATOM-NEXT: cmpl $32767, %edi ## imm = 0x7FFF ; ATOM-NEXT: movl $32767, %eax ## imm = 0x7FFF -; ATOM-NEXT: cmovlel %edi, %eax ; ATOM-NEXT: movl $32768, %ecx ## imm = 0x8000 +; ATOM-NEXT: cmovlel %edi, %eax ; ATOM-NEXT: cmpl $-32768, %eax ## imm = 0x8000 ; ATOM-NEXT: cmovgel %eax, %ecx ; ATOM-NEXT: movw %cx, (%rsi) Index: test/CodeGen/X86/sse-schedule.ll =================================================================== --- test/CodeGen/X86/sse-schedule.ll +++ test/CodeGen/X86/sse-schedule.ll @@ -6133,8 +6133,6 @@ ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_fnop: Index: test/CodeGen/X86/sse2-schedule.ll =================================================================== --- test/CodeGen/X86/sse2-schedule.ll +++ test/CodeGen/X86/sse2-schedule.ll @@ -4670,10 +4670,10 @@ ; ATOM: # %bb.0: ; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] ; ATOM-NEXT: movq %rdi, %xmm2 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm0, %xmm2 # sched: [2:1.00] ; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00] -; ATOM-NEXT: movq %xmm2, (%rsi) # sched: [1:1.00] +; ATOM-NEXT: paddq %xmm0, %xmm2 # sched: [2:1.00] ; ATOM-NEXT: movq %xmm1, %rax # sched: [3:3.00] +; ATOM-NEXT: movq %xmm2, (%rsi) # sched: [1:1.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_movd_64: @@ -10447,10 +10447,11 @@ ; ; ATOM-LABEL: test_pshufd: ; ATOM: # %bb.0: -; ATOM-NEXT: pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] -; ATOM-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] +; ATOM-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00] +; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pshufd: @@ -10575,10 +10576,11 @@ ; ; ATOM-LABEL: test_pshufhw: ; ATOM: # %bb.0: -; ATOM-NEXT: pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] -; ATOM-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; ATOM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; ATOM-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] +; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pshufhw: @@ -10703,10 +10705,11 @@ ; ; ATOM-LABEL: test_pshuflw: ; ATOM: # %bb.0: -; ATOM-NEXT: pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] -; ATOM-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; ATOM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; ATOM-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] +; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pshuflw: Index: test/CodeGen/X86/sse3-schedule.ll =================================================================== --- test/CodeGen/X86/sse3-schedule.ll +++ test/CodeGen/X86/sse3-schedule.ll @@ -899,10 +899,9 @@ ; ; ATOM-LABEL: test_movddup: ; ATOM: # %bb.0: -; ATOM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [1:1.00] -; ATOM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; ATOM-NEXT: subpd %xmm0, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] +; ATOM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00] +; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_movddup: @@ -1027,10 +1026,9 @@ ; ; ATOM-LABEL: test_movshdup: ; ATOM: # %bb.0: -; ATOM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:1.00] -; ATOM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] +; ATOM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00] +; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_movshdup: @@ -1155,10 +1153,9 @@ ; ; ATOM-LABEL: test_movsldup: ; ATOM: # %bb.0: -; ATOM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:1.00] -; ATOM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] +; ATOM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00] +; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_movsldup: Index: test/CodeGen/X86/ssse3-schedule.ll =================================================================== --- test/CodeGen/X86/ssse3-schedule.ll +++ test/CodeGen/X86/ssse3-schedule.ll @@ -29,10 +29,11 @@ ; ; ATOM-LABEL: test_pabsb: ; ATOM: # %bb.0: -; ATOM-NEXT: pabsb (%rdi), %xmm1 # sched: [1:1.00] -; ATOM-NEXT: pabsb %xmm0, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: por %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] +; ATOM-NEXT: pabsb (%rdi), %xmm0 # sched: [1:1.00] +; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pabsb: @@ -157,10 +158,11 @@ ; ; ATOM-LABEL: test_pabsd: ; ATOM: # %bb.0: -; ATOM-NEXT: pabsd (%rdi), %xmm1 # sched: [1:1.00] -; ATOM-NEXT: pabsd %xmm0, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: por %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] +; ATOM-NEXT: pabsd (%rdi), %xmm0 # sched: [1:1.00] +; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pabsd: @@ -285,10 +287,11 @@ ; ; ATOM-LABEL: test_pabsw: ; ATOM: # %bb.0: -; ATOM-NEXT: pabsw (%rdi), %xmm1 # sched: [1:1.00] -; ATOM-NEXT: pabsw %xmm0, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: por %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] +; ATOM-NEXT: pabsw (%rdi), %xmm0 # sched: [1:1.00] +; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] +; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_pabsw: Index: test/CodeGen/X86/x87-schedule.ll =================================================================== --- test/CodeGen/X86/x87-schedule.ll +++ test/CodeGen/X86/x87-schedule.ll @@ -177,10 +177,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fadd %st(0), %st(1) # sched: [0:?] -; ATOM-NEXT: fadd %st(2) # sched: [0:?] -; ATOM-NEXT: fadds (%ecx) # sched: [0:?] -; ATOM-NEXT: faddl (%eax) # sched: [0:?] +; ATOM-NEXT: fadd %st(0), %st(1) # sched: [5:5.00] +; ATOM-NEXT: fadd %st(2) # sched: [5:5.00] +; ATOM-NEXT: fadds (%ecx) # sched: [5:5.00] +; ATOM-NEXT: faddl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -301,10 +301,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: faddp %st(1) # sched: [0:?] -; ATOM-NEXT: faddp %st(2) # sched: [0:?] -; ATOM-NEXT: fiadds (%ecx) # sched: [0:?] -; ATOM-NEXT: fiaddl (%eax) # sched: [0:?] +; ATOM-NEXT: faddp %st(1) # sched: [5:5.00] +; ATOM-NEXT: faddp %st(2) # sched: [5:5.00] +; ATOM-NEXT: fiadds (%ecx) # sched: [5:5.00] +; ATOM-NEXT: fiaddl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -421,8 +421,8 @@ ; ATOM: # %bb.0: ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fbld (%eax) # sched: [0:?] -; ATOM-NEXT: fbstp (%eax) # sched: [0:?] +; ATOM-NEXT: fbld (%eax) # sched: [100:0.50] +; ATOM-NEXT: fbstp (%eax) # sched: [100:0.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -895,10 +895,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fcom %st(1) # sched: [0:?] -; ATOM-NEXT: fcom %st(3) # sched: [0:?] -; ATOM-NEXT: fcoms (%ecx) # sched: [0:?] -; ATOM-NEXT: fcoml (%eax) # sched: [0:?] +; ATOM-NEXT: fcom %st(1) # sched: [5:5.00] +; ATOM-NEXT: fcom %st(3) # sched: [5:5.00] +; ATOM-NEXT: fcoms (%ecx) # sched: [5:5.00] +; ATOM-NEXT: fcoml (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -1020,10 +1020,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fcomp %st(1) # sched: [0:?] -; ATOM-NEXT: fcomp %st(3) # sched: [0:?] -; ATOM-NEXT: fcomps (%ecx) # sched: [0:?] -; ATOM-NEXT: fcompl (%eax) # sched: [0:?] +; ATOM-NEXT: fcomp %st(1) # sched: [5:5.00] +; ATOM-NEXT: fcomp %st(3) # sched: [5:5.00] +; ATOM-NEXT: fcomps (%ecx) # sched: [5:5.00] +; ATOM-NEXT: fcompl (%eax) # sched: [5:5.00] ; ATOM-NEXT: fcompp # sched: [1:1.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] @@ -1385,10 +1385,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fdiv %st(0), %st(1) # sched: [0:?] -; ATOM-NEXT: fdiv %st(2) # sched: [0:?] -; ATOM-NEXT: fdivs (%ecx) # sched: [0:?] -; ATOM-NEXT: fdivl (%eax) # sched: [0:?] +; ATOM-NEXT: fdiv %st(0), %st(1) # sched: [34:17.00] +; ATOM-NEXT: fdiv %st(2) # sched: [34:17.00] +; ATOM-NEXT: fdivs (%ecx) # sched: [34:17.00] +; ATOM-NEXT: fdivl (%eax) # sched: [34:17.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -1509,10 +1509,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fdivp %st(1) # sched: [0:?] -; ATOM-NEXT: fdivp %st(2) # sched: [0:?] -; ATOM-NEXT: fidivs (%ecx) # sched: [0:?] -; ATOM-NEXT: fidivl (%eax) # sched: [0:?] +; ATOM-NEXT: fdivp %st(1) # sched: [34:17.00] +; ATOM-NEXT: fdivp %st(2) # sched: [34:17.00] +; ATOM-NEXT: fidivs (%ecx) # sched: [34:17.00] +; ATOM-NEXT: fidivl (%eax) # sched: [34:17.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -1633,10 +1633,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fdivr %st(0), %st(1) # sched: [0:?] -; ATOM-NEXT: fdivr %st(2) # sched: [0:?] -; ATOM-NEXT: fdivrs (%ecx) # sched: [0:?] -; ATOM-NEXT: fdivrl (%eax) # sched: [0:?] +; ATOM-NEXT: fdivr %st(0), %st(1) # sched: [34:17.00] +; ATOM-NEXT: fdivr %st(2) # sched: [34:17.00] +; ATOM-NEXT: fdivrs (%ecx) # sched: [34:17.00] +; ATOM-NEXT: fdivrl (%eax) # sched: [34:17.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -1757,10 +1757,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fdivrp %st(1) # sched: [0:?] -; ATOM-NEXT: fdivrp %st(2) # sched: [0:?] -; ATOM-NEXT: fidivrs (%ecx) # sched: [0:?] -; ATOM-NEXT: fidivrl (%eax) # sched: [0:?] +; ATOM-NEXT: fdivrp %st(1) # sched: [34:17.00] +; ATOM-NEXT: fdivrp %st(2) # sched: [34:17.00] +; ATOM-NEXT: fidivrs (%ecx) # sched: [34:17.00] +; ATOM-NEXT: fidivrl (%eax) # sched: [34:17.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -1955,10 +1955,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: ficoms (%ecx) # sched: [0:?] -; ATOM-NEXT: ficoml (%eax) # sched: [0:?] -; ATOM-NEXT: ficomps (%ecx) # sched: [0:?] -; ATOM-NEXT: ficompl (%eax) # sched: [0:?] +; ATOM-NEXT: ficoms (%ecx) # sched: [5:5.00] +; ATOM-NEXT: ficoml (%eax) # sched: [5:5.00] +; ATOM-NEXT: ficomps (%ecx) # sched: [5:5.00] +; ATOM-NEXT: ficompl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -2740,7 +2740,7 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: #APP ; ATOM-NEXT: fldcw (%eax) # sched: [5:2.50] -; ATOM-NEXT: fldenv (%eax) # sched: [0:?] +; ATOM-NEXT: fldenv (%eax) # sched: [100:0.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -2961,10 +2961,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fmul %st(0), %st(1) # sched: [0:?] -; ATOM-NEXT: fmul %st(2) # sched: [0:?] -; ATOM-NEXT: fmuls (%ecx) # sched: [0:?] -; ATOM-NEXT: fmull (%eax) # sched: [0:?] +; ATOM-NEXT: fmul %st(0), %st(1) # sched: [4:4.00] +; ATOM-NEXT: fmul %st(2) # sched: [4:4.00] +; ATOM-NEXT: fmuls (%ecx) # sched: [4:4.00] +; ATOM-NEXT: fmull (%eax) # sched: [4:4.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -3085,10 +3085,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fmulp %st(1) # sched: [0:?] -; ATOM-NEXT: fmulp %st(2) # sched: [0:?] -; ATOM-NEXT: fimuls (%ecx) # sched: [0:?] -; ATOM-NEXT: fimull (%eax) # sched: [0:?] +; ATOM-NEXT: fmulp %st(1) # sched: [4:4.00] +; ATOM-NEXT: fmulp %st(2) # sched: [4:4.00] +; ATOM-NEXT: fimuls (%ecx) # sched: [4:4.00] +; ATOM-NEXT: fimull (%eax) # sched: [4:4.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -3584,7 +3584,7 @@ ; ATOM: # %bb.0: ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: frstor (%eax) # sched: [0:?] +; ATOM-NEXT: frstor (%eax) # sched: [100:0.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -3670,7 +3670,7 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: #APP ; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: fnsave (%eax) # sched: [0:?] +; ATOM-NEXT: fnsave (%eax) # sched: [100:0.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -3762,7 +3762,7 @@ ; ATOM: # %bb.0: ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fnsave (%eax) # sched: [0:?] +; ATOM-NEXT: fnsave (%eax) # sched: [100:0.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -4314,9 +4314,9 @@ ; ATOM-NEXT: wait # sched: [1:0.50] ; ATOM-NEXT: fnstcw (%eax) # sched: [8:4.00] ; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: fnstenv (%eax) # sched: [0:?] +; ATOM-NEXT: fnstenv (%eax) # sched: [100:0.50] ; ATOM-NEXT: wait # sched: [1:0.50] -; ATOM-NEXT: fnstsw (%eax) # sched: [0:?] +; ATOM-NEXT: fnstsw (%eax) # sched: [100:0.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -4443,8 +4443,8 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: #APP ; ATOM-NEXT: fnstcw (%eax) # sched: [8:4.00] -; ATOM-NEXT: fnstenv (%eax) # sched: [0:?] -; ATOM-NEXT: fnstsw (%eax) # sched: [0:?] +; ATOM-NEXT: fnstenv (%eax) # sched: [100:0.50] +; ATOM-NEXT: fnstsw (%eax) # sched: [100:0.50] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -4549,10 +4549,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fsub %st(0), %st(1) # sched: [0:?] -; ATOM-NEXT: fsub %st(2) # sched: [0:?] -; ATOM-NEXT: fsubs (%ecx) # sched: [0:?] -; ATOM-NEXT: fsubl (%eax) # sched: [0:?] +; ATOM-NEXT: fsub %st(0), %st(1) # sched: [5:5.00] +; ATOM-NEXT: fsub %st(2) # sched: [5:5.00] +; ATOM-NEXT: fsubs (%ecx) # sched: [5:5.00] +; ATOM-NEXT: fsubl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -4673,10 +4673,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fsubp %st(1) # sched: [0:?] -; ATOM-NEXT: fsubp %st(2) # sched: [0:?] -; ATOM-NEXT: fisubs (%ecx) # sched: [0:?] -; ATOM-NEXT: fisubl (%eax) # sched: [0:?] +; ATOM-NEXT: fsubp %st(1) # sched: [5:5.00] +; ATOM-NEXT: fsubp %st(2) # sched: [5:5.00] +; ATOM-NEXT: fisubs (%ecx) # sched: [5:5.00] +; ATOM-NEXT: fisubl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -4797,10 +4797,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fsubr %st(0), %st(1) # sched: [0:?] -; ATOM-NEXT: fsubr %st(2) # sched: [0:?] -; ATOM-NEXT: fsubrs (%ecx) # sched: [0:?] -; ATOM-NEXT: fsubrl (%eax) # sched: [0:?] +; ATOM-NEXT: fsubr %st(0), %st(1) # sched: [5:5.00] +; ATOM-NEXT: fsubr %st(2) # sched: [5:5.00] +; ATOM-NEXT: fsubrs (%ecx) # sched: [5:5.00] +; ATOM-NEXT: fsubrl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ; @@ -4921,10 +4921,10 @@ ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00] ; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00] ; ATOM-NEXT: #APP -; ATOM-NEXT: fsubrp %st(1) # sched: [0:?] -; ATOM-NEXT: fsubrp %st(2) # sched: [0:?] -; ATOM-NEXT: fisubrs (%ecx) # sched: [0:?] -; ATOM-NEXT: fisubrl (%eax) # sched: [0:?] +; ATOM-NEXT: fsubrp %st(1) # sched: [5:5.00] +; ATOM-NEXT: fsubrp %st(2) # sched: [5:5.00] +; ATOM-NEXT: fisubrs (%ecx) # sched: [5:5.00] +; ATOM-NEXT: fisubrl (%eax) # sched: [5:5.00] ; ATOM-NEXT: #NO_APP ; ATOM-NEXT: retl # sched: [79:39.50] ;