Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -449,6 +449,7 @@ include "X86SchedBroadwell.td" include "X86ScheduleSLM.td" include "X86ScheduleZnver1.td" +include "X86ScheduleBdVer2.td" include "X86ScheduleBtVer2.td" include "X86SchedSkylakeClient.td" include "X86SchedSkylakeServer.td" @@ -1010,7 +1011,7 @@ ]>; // Bulldozer -def : Proc<"bdver1", [ +def : ProcessorModel<"bdver1", BdVer2Model, [ FeatureX87, FeatureCMOV, FeatureXOP, @@ -1035,7 +1036,7 @@ FeatureMacroFusion ]>; // Piledriver -def : Proc<"bdver2", [ +def : ProcessorModel<"bdver2", BdVer2Model, [ FeatureX87, FeatureCMOV, FeatureXOP, Index: llvm/trunk/lib/Target/X86/X86PfmCounters.td =================================================================== --- llvm/trunk/lib/Target/X86/X86PfmCounters.td +++ llvm/trunk/lib/Target/X86/X86PfmCounters.td @@ -91,6 +91,18 @@ } def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>; +def BdVer2PfmCounters : ProcPfmCounters { + let CycleCounter = PfmCounter<"cpu_clk_unhalted">; + let UopsCounter = PfmCounter<"retired_uops">; + let IssueCounters = [ + PfmIssueCounter<"PdFPU0", "dispatched_fpu_ops:ops_pipe0 + dispatched_fpu_ops:ops_dual_pipe0">, + PfmIssueCounter<"PdFPU1", "dispatched_fpu_ops:ops_pipe1 + dispatched_fpu_ops:ops_dual_pipe1">, + PfmIssueCounter<"PdFPU2", "dispatched_fpu_ops:ops_pipe2 + dispatched_fpu_ops:ops_dual_pipe2">, + PfmIssueCounter<"PdFPU3", "dispatched_fpu_ops:ops_pipe3 + dispatched_fpu_ops:ops_dual_pipe3"> + ]; +} +def : PfmCountersBinding<"bdver2", BdVer2PfmCounters>; + def BtVer2PfmCounters : ProcPfmCounters { let CycleCounter = PfmCounter<"cpu_clk_unhalted">; let UopsCounter = PfmCounter<"retired_uops">; Index: llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td +++ llvm/trunk/lib/Target/X86/X86ScheduleBdVer2.td @@ -0,0 +1,1278 @@ +//=- X86ScheduleBdVer2.td - X86 BdVer2 (Piledriver) Scheduling * tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for AMD bdver2 (Piledriver) to support +// instruction scheduling and other instruction cost heuristics. +// Based on: +// * AMD Software Optimization Guide for AMD Family 15h Processors. +// https://support.amd.com/TechDocs/47414_15h_sw_opt_guide.pdf +// * The microarchitecture of Intel, AMD and VIA CPUs, By Agner Fog +// http://www.agner.org/optimize/microarchitecture.pdf +// * https://www.realworldtech.com/bulldozer/ +// Yes, that is for Bulldozer aka bdver1, not Piledriver aka bdver2. +// +//===----------------------------------------------------------------------===// + +def BdVer2Model : SchedMachineModel { + let IssueWidth = 4; // Up to 4 IPC can be decoded, issued, retired. + let MicroOpBufferSize = 128; // RCU reorder buffer size, which is unconfirmed. + let LoopMicroOpBufferSize = -1; // There does not seem to be a loop buffer. + let LoadLatency = 4; // L1 data cache has a 4-cycle load-to-use latency. + let HighLatency = 25; // FIXME: any better choice? + let MispredictPenalty = 20; // Minimum branch misdirection penalty. + + let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass. + + // FIXME: Incomplete. This flag is set to allow the scheduler to assign + // a default model to unrecognized opcodes. + let CompleteModel = 0; +} // SchedMachineModel + +let SchedModel = BdVer2Model in { + + +//===----------------------------------------------------------------------===// +// Pipes +//===----------------------------------------------------------------------===// + +// There are total of eight pipes. + +//===----------------------------------------------------------------------===// +// Integer execution pipes +// + +// Two EX (ALU) pipes. +def PdEX0 : ProcResource<1>; // ALU, Integer Pipe0 +def PdEX1 : ProcResource<1>; // ALU, Integer Pipe1 +def PdEX01 : ProcResGroup<[PdEX0, PdEX1]>; + +// Two AGLU pipes, identical. +def PdAGLU01 : ProcResource<2>; // AGU, Integer Pipe[23] + +//===----------------------------------------------------------------------===// +// Floating point execution pipes +// + +// Four FPU pipes. + +def PdFPU0 : ProcResource<1>; // Vector/FPU Pipe0 +def PdFPU1 : ProcResource<1>; // Vector/FPU Pipe1 +def PdFPU2 : ProcResource<1>; // Vector/FPU Pipe2 +def PdFPU3 : ProcResource<1>; // Vector/FPU Pipe3 + +// FPU grouping +def PdFPU01 : ProcResGroup<[PdFPU0, PdFPU1]>; +def PdFPU23 : ProcResGroup<[PdFPU2, PdFPU3]>; + + +//===----------------------------------------------------------------------===// +// RCU +//===----------------------------------------------------------------------===// + +// The Retire Control Unit on Piledriver can retire up to 4 macro-ops per cycle. +// On the other hand, the RCU reorder buffer size for Piledriver does not +// seem be specified in any trustworthy source. +// But as per https://www.realworldtech.com/bulldozer/6/ the Bulldozer had +// RCU reorder buffer size of 128. So that is a good guess for now. +def PdRCU : RetireControlUnit<128, 4>; + + +//===----------------------------------------------------------------------===// +// Pipelines +//===----------------------------------------------------------------------===// + +// There are total of two pipelines, each one with it's own scheduler. + +//===----------------------------------------------------------------------===// +// Integer Pipeline Scheduling +// + +// There is one Integer Scheduler per core. + +// Integer physical register file has 96 registers of 64-bit. +def PdIntegerPRF : RegisterFile<96, [GR64, CCR]>; + +// Unified Integer, Memory Scheduler has 40 entries. +def PdEX : ProcResGroup<[PdEX0, PdEX1, PdAGLU01]> { + // Up to 4 IPC can be decoded, issued, retired. + let BufferSize = 40; +} + + +//===----------------------------------------------------------------------===// +// FPU Pipeline Scheduling +// + +// The FPU unit is shared between the two cores. + +// FP physical register file has 160 registers of 128-bit. +// Operations on 256-bit data types are cracked into two COPs. +def PdFpuPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>; + +// Unified FP Scheduler has 64 entries, +def PdFPU : ProcResGroup<[PdFPU0, PdFPU1, PdFPU2, PdFPU3]> { + // Up to 4 IPC can be decoded, issued, retired. + let BufferSize = 64; +} + + +//===----------------------------------------------------------------------===// +// Functional units +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Load-Store Units +// + +// FIXME: does this even make sense? + +def PdLoad : ProcResGroup<[PdAGLU01]> { + // For Piledriver, the load queue is 40 entries deep. + let BufferSize = 40; +} + +def PdStore : ProcResGroup<[PdAGLU01]> { + // For Piledriver, the store queue is 24 entries deep. + let BufferSize = 24; +} + +//===----------------------------------------------------------------------===// +// Integer Execution Units +// + +def PdDiv : ProcResource<1>; // PdEX0; unpipelined integer division +def PdCount : ProcResource<1>; // PdEX0; POPCNT, LZCOUNT + +def PdMul : ProcResource<1>; // PdEX1; integer multiplication +def PdBranch : ProcResource<1>; // PdEX1; JMP, fused branches + +//===----------------------------------------------------------------------===// +// Floating-Point Units +// + +// Two FMAC/FPFMA units. +def PdFPFMA : ProcResource<2>; // PdFPU0, PdFPU1 + +// One 128-bit integer multiply-accumulate unit. +def PdFPMMA : ProcResource<1>; // PdFPU0 + +// One fp conversion unit. +def PdFPCVT : ProcResource<1>; // PdFPU0 + +// One unit for shuffles, packs, permutes, shifts. +def PdFPXBR : ProcResource<1>; // PdFPU1 + +// Two 128-bit packed integer units. +def PdFPMAL : ProcResource<2>; // PdFPU2, PdFPU3 + +// One FP store unit. +def PdFPSTO : ProcResource<1>; // PdFPU3 + + +//===----------------------------------------------------------------------===// +// Basic helper classes. +//===----------------------------------------------------------------------===// + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when dispatched by the schedulers. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass PdWriteRes ExePorts, int Lat = 1, + list Res = [], int UOps = 1> { + def : WriteRes { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } +} + +multiclass __pdWriteResPair ExePorts, int Lat, + list Res, int UOps, + int LoadLat, int LoadRes, int LoadUOps> { + defm : PdWriteRes; + + defm : PdWriteRes; +} + +multiclass PdWriteResExPair ExePorts, int Lat = 1, + list Res = [], int UOps = 1, + int LoadUOps = 0> { + defm : __pdWriteResPair; +} + +multiclass PdWriteResXMMPair ExePorts, int Lat = 1, + list Res = [], int UOps = 1, + int LoadUOps = 0> { + defm : __pdWriteResPair; +} + +multiclass PdWriteResYMMPair ExePorts, int Lat, + list Res, int UOps = 2, + int LoadUOps = 0> { + defm : __pdWriteResPair; +} + +//===----------------------------------------------------------------------===// +// Here be dragons. +//===----------------------------------------------------------------------===// + +// L1 data cache has a 4-cycle load-to-use latency, so ReadAfterLd registers +// needn't be available until 4 cycles after the memory operand. +def : ReadAdvance; + +// Vector loads are 5 cycles, so ReadAfterVec*Ld registers needn't be available +// until 5 cycles after the memory operand. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// A folded store needs a cycle on the PdStore for the store data. +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Loads, stores, and moves, not folded with other operations. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { let Latency = 5; } +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Load/store MXCSR. +// FIXME: These are copy and pasted from WriteLoad/Store. +def : WriteRes { let Latency = 5; } +def : WriteRes { let NumMicroOps = 2; } + +// Treat misc copies as a move. +def : InstRW<[WriteMove], (instrs COPY)>; + +//////////////////////////////////////////////////////////////////////////////// +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteResExPair; + +//////////////////////////////////////////////////////////////////////////////// +// Special case scheduling classes. +//////////////////////////////////////////////////////////////////////////////// + +def : WriteRes { let Latency = 100; } +def : WriteRes { let Latency = 100; } +def : WriteRes; + +def PdWriteXLAT : SchedWriteRes<[PdEX01]> { + let Latency = 6; +} +def : InstRW<[PdWriteXLAT], (instrs XLAT)>; + +def PdWriteLARrr : SchedWriteRes<[PdEX01]> { + let Latency = 184; + let NumMicroOps = 45; +} +def : InstRW<[PdWriteLARrr], (instregex "LAR(16|32|64)rr", + "LSL(16|32|64)rr")>; + +// Nops don't have dependencies, so there's no actual latency, but we set this +// to '1' to tell the scheduler that the nop uses an ALU slot for a cycle. +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Arithmetic. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteResExPair; + +def PdWriteLXADD : SchedWriteRes<[PdEX01]> { + let Latency = 6; + let NumMicroOps = 4; +} +def : InstRW<[PdWriteLXADD], (instrs LXADD8, LXADD16, LXADD32, LXADD64)>; + +def PdWriteBMI1 : SchedWriteRes<[PdEX01]> { + let Latency = 2; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteBMI1], + (instrs BLCFILL32rr, BLCFILL64rr, BLCI32rr, BLCI64rr, + BLCIC32rr, BLCIC64rr, BLCMSK32rr, BLCMSK64rr, + BLCS32rr, BLCS64rr, BLSFILL32rr, BLSFILL64rr, + BLSIC32rr, BLSIC64rr, T1MSKC32rr, T1MSKC64rr, + TZMSK32rr, TZMSK64rr)>; + +def PdWriteBMI1m : SchedWriteRes<[PdEX01]> { + let Latency = 6; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteBMI1m], + (instrs BLCFILL32rm, BLCFILL64rm, BLCI32rm, BLCI64rm, + BLCIC32rm, BLCIC64rm, BLCMSK32rm, BLCMSK64rm, + BLCS32rm, BLCS64rm, BLSFILL32rm, BLSFILL64rm, + BLSIC32rm, BLSIC64rm, T1MSKC32rm, T1MSKC64rm, + TZMSK32rm, TZMSK64rm)>; + +defm : PdWriteResExPair; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +def PdWriteCMPXCHG8rr : SchedWriteRes<[PdEX1]> { + let Latency = 3; + let NumMicroOps = 3; +} +def : InstRW<[PdWriteCMPXCHG8rr], (instrs CMPXCHG8rr)>; + +def PdWriteCMPXCHG8rm : SchedWriteRes<[PdEX1]> { + let Latency = 3; + let NumMicroOps = 5; +} +def : InstRW<[PdWriteCMPXCHG8rm], (instrs CMPXCHG8rm)>; + +def PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm : SchedWriteRes<[PdEX1]> { + let Latency = 3; + let NumMicroOps = 6; +} +def : InstRW<[PdWriteCMPXCHG16rm_CMPXCHG32rm_CMPXCHG64rm], + (instrs CMPXCHG16rm, CMPXCHG32rm, CMPXCHG64rm)>; + +def PdWriteCMPXCHG8B : SchedWriteRes<[PdEX1]> { + let Latency = 3; + let NumMicroOps = 18; +} +def : InstRW<[PdWriteCMPXCHG8B], (instrs CMPXCHG8B)>; + +def PdWriteCMPXCHG16B : SchedWriteRes<[PdEX1]> { + let Latency = 3; + let NumMicroOps = 22; +} +def : InstRW<[PdWriteCMPXCHG16B], (instrs CMPXCHG16B)>; + +def PdWriteXCHG16rr : SchedWriteRes<[PdEX1]> { + let Latency = 2; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteXCHG16rr], (instrs XCHG16rr)>; + +def PdWriteXADD : SchedWriteRes<[PdEX1]> { + let Latency = 2; + let NumMicroOps = 4; +} +def : InstRW<[PdWriteXADD], (instrs XADD8rr, XADD16rr, XADD32rr, XADD64rr)>; + +def PdWriteXADDm : SchedWriteRes<[PdEX1]> { +let Latency = 6; +let NumMicroOps = 4; +} +def : InstRW<[PdWriteXADDm], (instrs XADD8rm, XADD16rm, XADD32rm, XADD64rm)>; + +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : X86WriteResUnsupported; // BMI2 MULX + +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; + +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; + +defm : PdWriteResExPair; + +def PdWriteCRC32r32r16 : SchedWriteRes<[PdEX01]> { + let Latency = 5; + let ResourceCycles = [4]; + let NumMicroOps = 5; +} +def : InstRW<[PdWriteCRC32r32r16], (instrs CRC32r32r16)>; + +def PdWriteCRC32r32r32 : SchedWriteRes<[PdEX01]> { + let Latency = 6; + let ResourceCycles = [4]; + let NumMicroOps = 7; +} +def : InstRW<[PdWriteCRC32r32r32], (instrs CRC32r32r32)>; + +def PdWriteCRC32r64r64 : SchedWriteRes<[PdEX01]> { + let Latency = 10; + let ResourceCycles = [4]; + let NumMicroOps = 11; +} +def : InstRW<[PdWriteCRC32r64r64], (instrs CRC32r64r64)>; + +defm : PdWriteResExPair; // Conditional move. +defm : PdWriteResExPair; // Conditional (CF + ZF flag) move. + +def : InstRW<[WriteCMOV2.Folded], (instrs CMOVG16rm, CMOVG32rm, CMOVG64rm, + CMOVGE16rm, CMOVGE32rm, CMOVGE64rm, + CMOVL16rm, CMOVL32rm, CMOVL64rm, + CMOVLE16rm, CMOVLE32rm, CMOVLE64rm)>; + +defm : PdWriteRes; // x87 conditional move. + +def : WriteRes; // Setcc. +def : WriteRes; + +def PdWriteSETGEmSETGmSETLEmSETLm : SchedWriteRes<[PdEX01]> { + let ResourceCycles = [2]; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteSETGEmSETGmSETLEmSETLm], (instrs SETGEm, SETGm, + SETLEm, SETLm)>; + +defm : PdWriteRes; + +def WriteLAHF : SchedWriteRes<[PdEX01]> { + let Latency = 2; + let NumMicroOps = 4; +} +def : InstRW<[WriteLAHF], (instrs LAHF)>; + +def WriteSAHF : SchedWriteRes<[PdEX01]> { + let Latency = 2; + let NumMicroOps = 2; +} +def : InstRW<[WriteSAHF], (instrs SAHF)>; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +// This is for simple LEAs with one or two input operands. +// FIXME: SAGU 3-operand LEA +def : WriteRes { let NumMicroOps = 2; } + +// Bit counts. +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; + +// BMI1 BEXTR, BMI2 BZHI +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; + +//////////////////////////////////////////////////////////////////////////////// +// Integer shifts and rotates. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; +defm : PdWriteResExPair; + +def PdWriteRCL8rCL : SchedWriteRes<[PdEX01]> { + let Latency = 12; + let NumMicroOps = 26; +} +def : InstRW<[PdWriteRCL8rCL], (instrs RCL8rCL)>; + +def PdWriteRCR8ri : SchedWriteRes<[PdEX01]> { + let Latency = 12; + let NumMicroOps = 23; +} +def : InstRW<[PdWriteRCR8ri], (instrs RCR8ri)>; + +def PdWriteRCR8rCL : SchedWriteRes<[PdEX01]> { + let Latency = 11; + let NumMicroOps = 24; +} +def : InstRW<[PdWriteRCR8rCL], (instrs RCR8rCL)>; + +def PdWriteRCL16rCL : SchedWriteRes<[PdEX01]> { + let Latency = 10; + let NumMicroOps = 22; +} +def : InstRW<[PdWriteRCL16rCL], (instrs RCL16rCL)>; + +def PdWriteRCR16ri : SchedWriteRes<[PdEX01]> { + let Latency = 10; + let NumMicroOps = 19; +} +def : InstRW<[PdWriteRCR16ri], (instrs RCR16ri)>; + +def PdWriteRCL32rCLRCL64rCL : SchedWriteRes<[PdEX01]> { + let Latency = 7; + let NumMicroOps = 17; +} +def : InstRW<[PdWriteRCL32rCLRCL64rCL], (instrs RCL32rCL, RCL64rCL)>; + +def PdWriteRCR64rCL : SchedWriteRes<[PdEX01]> { + let Latency = 7; + let NumMicroOps = 16; +} +def : InstRW<[PdWriteRCR64rCL], (instrs RCR64rCL)>; + +def PdWriteRCR32rCL : SchedWriteRes<[PdEX01]> { + let Latency = 7; + let NumMicroOps = 16; +} +def : InstRW<[PdWriteRCR32rCL ], (instrs RCR32rCL)>; + +def PdWriteRCR32riRCR64ri : SchedWriteRes<[PdEX01]> { + let Latency = 7; + let NumMicroOps = 15; +} +def : InstRW<[PdWriteRCR32riRCR64ri], (instrs RCR32ri, RCR64ri)>; + + +def PdWriteRCR16rCL : SchedWriteRes<[PdEX01]> { + let Latency = 9; + let NumMicroOps = 20; +} +def : InstRW<[PdWriteRCR16rCL], (instrs RCR16rCL)>; + +def PdWriteRCL16ri : SchedWriteRes<[PdEX01]> { + let Latency = 11; + let NumMicroOps = 21; +} +def : InstRW<[PdWriteRCL16ri], (instrs RCL16ri)>; + +def PdWriteRCL3264ri : SchedWriteRes<[PdEX01]> { + let Latency = 8; + let NumMicroOps = 16; +} +def : InstRW<[PdWriteRCL3264ri], (instrs RCL32ri, RCL64ri)>; + +def PdWriteRCL8ri : SchedWriteRes<[PdEX01]> { + let Latency = 13; + let NumMicroOps = 25; +} +def : InstRW<[PdWriteRCL8ri], (instrs RCL8ri)>; + +// SHLD/SHRD. +defm : PdWriteRes; +defm : PdWriteRes; + +def PdWriteSHLD32rri8SHRD16rri8 : SchedWriteRes<[PdEX01]> { + let Latency = 3; + let ResourceCycles = [6]; + let NumMicroOps = 6; +} +def : InstRW<[PdWriteSHLD32rri8SHRD16rri8 ], (instrs SHLD32rri8, SHRD16rri8)>; + +def PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL : SchedWriteRes<[PdEX01]> { + let Latency = 4; + let ResourceCycles = [8]; + let NumMicroOps = 7; +} +def : InstRW<[PdWriteSHLD16rrCLSHLD32rrCLSHRD32rrCL], (instrs SHLD16rrCL, + SHLD32rrCL, + SHRD32rrCL)>; + +defm : PdWriteRes; +defm : PdWriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Floating point. This covers both scalar and vector operations. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +def PdWriteMOVHPm : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { + let Latency = 2; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteMOVHPm], (instrs MOVHPDmr, MOVHPSmr, VMOVHPDmr, VMOVHPSmr)>; + +def PdWriteVMOVUPDYmrVMOVUPSYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { + let NumMicroOps = 8; +} +def : InstRW<[PdWriteVMOVUPDYmrVMOVUPSYmr], (instrs VMOVUPDYmr, VMOVUPSYmr)>; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; + +def PdWriteFCOMPm : SchedWriteRes<[PdFPU1, PdFPFMA]> { + let Latency = 6; +} +def : InstRW<[PdWriteFCOMPm], (instrs FCOM32m, FCOM64m, FCOMP32m, FCOMP64m)>; + +def PdWriteTST_F_UCOM_FPPr : SchedWriteRes<[PdFPU1, PdFPFMA]>; +def : InstRW<[PdWriteTST_F_UCOM_FPPr], (instrs TST_F, UCOM_FPPr)>; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + + +defm : PdWriteResXMMPair; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +def PdWriteVDPPSrri : SchedWriteRes<[PdFPU1, PdFPFMA]> { + let Latency = 25; + let ResourceCycles = [1, 3]; + let NumMicroOps = 17; +} +def : InstRW<[PdWriteVDPPSrri], (instrs VDPPSrri)>; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +def PdWriteVFRCZ : SchedWriteRes<[PdFPU1, PdFPSTO]> { + let Latency = 10; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteVFRCZ], (instrs VFRCZPDrr, VFRCZPSrr, + VFRCZSDrr, VFRCZSSrr)>; + +def PdWriteVFRCZm : SchedWriteRes<[PdFPU1, PdFPSTO]> { + let Latency = 15; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteVFRCZm], (instrs VFRCZPDrm, VFRCZPSrm, + VFRCZSDrm, VFRCZSSrm)>; + +def PdWriteVFRCZY : SchedWriteRes<[PdFPU1, PdFPSTO]> { + let Latency = 10; + let ResourceCycles = [2, 1]; + let NumMicroOps = 4; +} +def : InstRW<[PdWriteVFRCZY], (instrs VFRCZPSYrr, VFRCZPDYrr)>; + +def PdWriteVFRCZYm : SchedWriteRes<[PdFPU1, PdFPSTO]> { + let Latency = 15; + let ResourceCycles = [2, 1]; + let NumMicroOps = 8; +} +def : InstRW<[PdWriteVFRCZYm], (instrs VFRCZPSYrm, VFRCZPDYrm)>; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +def PdWriteVBROADCASTF128 : SchedWriteRes<[PdFPU01, PdFPFMA]> { + let Latency = 7; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteVBROADCASTF128], (instrs VBROADCASTF128)>; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; + +def PdWriteVEXTRACTF128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { + let Latency = 2; +} +def : InstRW<[PdWriteVEXTRACTF128rr], (instrs VEXTRACTF128rr)>; + +def PdWriteVEXTRACTF128mr : SchedWriteRes<[PdFPU01, PdFPFMA]> { + let Latency = 7; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteVEXTRACTF128mr], (instrs VEXTRACTF128mr)>; + +def PdWriteVPERM2F128rr : SchedWriteRes<[PdFPU01, PdFPFMA]> { + let Latency = 4; + let NumMicroOps = 8; +} +def : InstRW<[PdWriteVPERM2F128rr], (instrs VPERM2F128rr)>; + +def PdWriteVPERM2F128rm : SchedWriteRes<[PdFPU01, PdFPFMA]> { + let Latency = 8; // 4 + 4 + let NumMicroOps = 10; +} +def : InstRW<[PdWriteVPERM2F128rm], (instrs VPERM2F128rm)>; + +//////////////////////////////////////////////////////////////////////////////// +// Conversions. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteResXMMPair; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +def PdWriteMMX_CVTTPD2PIirr : SchedWriteRes<[PdFPU1, PdFPSTO]> { + let Latency = 6; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteMMX_CVTTPD2PIirr], (instrs MMX_CVTTPD2PIirr)>; + +// FIXME: f+3 ST, LD+STC latency +defm : PdWriteResXMMPair; +// FIXME: .Folded version is one NumMicroOp *less*.. + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +// FIXME: .Folded version is one NumMicroOp *less*.. + +def WriteCVTSI642SDrr : SchedWriteRes<[PdFPU1, PdFPSTO]> { + let Latency = 13; + let NumMicroOps = 2; +} +def : InstRW<[WriteCVTSI642SDrr], (instrs CVTSI642SDrr, CVTSI642SSrr)>; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +def WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr : SchedWriteRes<[PdFPU1, PdFPSTO]> { + let Latency = 6; + let NumMicroOps = 2; +} +def : InstRW<[WriteMMX_CVTPD2PIirrMMX_CVTPI2PDirr], (instrs MMX_CVTPD2PIirr, + MMX_CVTPI2PDirr)>; + +def WriteMMX_CVTPI2PSirr : SchedWriteRes<[PdFPU1, PdFPSTO]> { + let Latency = 4; + let NumMicroOps = 2; +} +def : InstRW<[WriteMMX_CVTPI2PSirr], (instrs MMX_CVTPI2PSirr)>; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : X86WriteResUnsupported; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : X86WriteResUnsupported; + +//////////////////////////////////////////////////////////////////////////////// +// Vector integer operations. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +def PdWriteVMOVDQUYmr : SchedWriteRes<[PdStore, PdFPU1, PdFPSTO]> { + let NumMicroOps = 8; +} +def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>; + +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +def JWriteVPMACS : SchedWriteRes<[PdFPU0, PdFPU01, PdFPMMA, PdFPMAL]> { + let Latency = 4; + let ResourceCycles = [2, 1, 2, 1]; +} +def : InstRW<[JWriteVPMACS], (instrs VPMACSDQHrr, VPMACSDQLrr, VPMACSSDQHrr, + VPMACSSDQLrr)>; + +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; + +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +//////////////////////////////////////////////////////////////////////////////// +// Vector insert/extract operations. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteRes; +defm : PdWriteRes; + +defm : PdWriteRes; +defm : PdWriteRes; + +def PdWriteEXTRQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { + let Latency = 3; +} +def : InstRW<[PdWriteEXTRQ], (instrs EXTRQ, EXTRQI)>; + +//////////////////////////////////////////////////////////////////////////////// +// SSE42 String instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; + +//////////////////////////////////////////////////////////////////////////////// +// MOVMSK Instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteRes; + +defm : PdWriteRes; +defm : X86WriteResUnsupported; +// defm : X86WriteResUnsupported; + +defm : PdWriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// AES Instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; + +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteResXMMPair; +defm : PdWriteResYMMPair; +defm : X86WriteResPairUnsupported; + +defm : PdWriteResXMMPair; +defm : PdWriteResXMMPair; +defm : X86WriteResPairUnsupported; +defm : X86WriteResPairUnsupported; + +def : InstRW<[WritePHAdd], (instrs PHADDDrr, PHSUBDrr, + PHADDWrr, PHSUBWrr, + PHADDSWrr, PHSUBSWrr, + VPHADDDrr, VPHSUBDrr, + VPHADDWrr, VPHSUBWrr, + VPHADDSWrr, VPHSUBSWrr)>; + +def : InstRW<[WritePHAdd.Folded], (instrs PHADDDrm, PHSUBDrm, + PHADDWrm, PHSUBWrm, + PHADDSWrm, PHSUBSWrm, + VPHADDDrm, VPHSUBDrm, + VPHADDWrm, VPHSUBWrm, + VPHADDSWrm, VPHSUBSWrm)>; + +//////////////////////////////////////////////////////////////////////////////// +// Carry-less multiplication instructions. +//////////////////////////////////////////////////////////////////////////////// + +defm : PdWriteResXMMPair; + +def PdWriteVPCLMULQDQrr : SchedWriteRes<[PdFPU0, PdFPMMA]> { + let Latency = 13; + let NumMicroOps = 6; +} +def : InstRW<[PdWriteVPCLMULQDQrr], (instrs VPCLMULQDQrr)>; + +//////////////////////////////////////////////////////////////////////////////// +// SSE4A instructions. +//////////////////////////////////////////////////////////////////////////////// + +def PdWriteINSERTQ : SchedWriteRes<[PdFPU01, PdFPMAL]> { + let Latency = 3; + let ResourceCycles = [1, 4]; +} +def : InstRW<[PdWriteINSERTQ], (instrs INSERTQ, INSERTQI)>; + +//////////////////////////////////////////////////////////////////////////////// +// AVX instructions. +//////////////////////////////////////////////////////////////////////////////// + +def PdWriteVBROADCASTYLd : SchedWriteRes<[PdLoad, PdFPU01, PdFPFMA]> { + let Latency = 6; + let ResourceCycles = [1, 2, 4]; + let NumMicroOps = 2; +} +def : InstRW<[PdWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, + VBROADCASTSSYrm)>; + +def PdWriteVZEROALL : SchedWriteRes<[]> { + let Latency = 90; + let NumMicroOps = 32; +} +def : InstRW<[PdWriteVZEROALL], (instrs VZEROALL)>; + +def PdWriteVZEROUPPER : SchedWriteRes<[]> { + let Latency = 46; + let NumMicroOps = 16; +} +def : InstRW<[PdWriteVZEROUPPER], (instrs VZEROUPPER)>; + +/////////////////////////////////////////////////////////////////////////////// +// SchedWriteVariant definitions. +/////////////////////////////////////////////////////////////////////////////// + +def PdWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +def PdWriteZeroIdiom : SchedWriteVariant<[ + SchedVar, [PdWriteZeroLatency]>, + SchedVar, [WriteALU]> +]>; +def : InstRW<[PdWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def PdWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar, [PdWriteZeroLatency]>, + SchedVar, [WriteFLogic]> +]>; +def : InstRW<[PdWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, + XORPDrr, VXORPDrr, + ANDNPSrr, VANDNPSrr, + ANDNPDrr, VANDNPDrr)>; + +// VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr "zero-idioms" have latency of 1. + +def PdWriteVZeroIdiomLogic : SchedWriteVariant<[ + SchedVar, [PdWriteZeroLatency]>, + SchedVar, [WriteVecLogic]> +]>; +def : InstRW<[PdWriteVZeroIdiomLogic], (instrs MMX_PXORirr, MMX_PANDNirr)>; + +def PdWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar, [PdWriteZeroLatency]>, + SchedVar, [WriteVecLogicX]> +]>; +def : InstRW<[PdWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, + PANDNrr, VPANDNrr)>; + +def PdWriteVZeroIdiomALU : SchedWriteVariant<[ + SchedVar, [PdWriteZeroLatency]>, + SchedVar, [WriteVecALU]> +]>; +def : InstRW<[PdWriteVZeroIdiomALU], (instrs MMX_PSUBBirr, MMX_PSUBDirr, + MMX_PSUBQirr, MMX_PSUBWirr, + MMX_PCMPGTBirr, + MMX_PCMPGTDirr, + MMX_PCMPGTWirr)>; + +def PdWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar, [PdWriteZeroLatency]>, + SchedVar, [WriteVecALUX]> +]>; +def : InstRW<[PdWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, + PSUBDrr, VPSUBDrr, + PSUBQrr, VPSUBQrr, + PSUBWrr, VPSUBWrr, + PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTWrr, VPCMPGTWrr)>; + +/////////////////////////////////////////////////////////////////////////////// +// Dependency breaking instructions. +/////////////////////////////////////////////////////////////////////////////// + +// VPCMPGTQ, but not PCMPGTQ! + +def : IsZeroIdiomFunction<[ + // GPR Zero-idioms. + DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, + + // MMX Zero-idioms. + DepBreakingClass<[ + MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr, + MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr, + MMX_PSUBSBirr, MMX_PSUBSWirr, MMX_PSUBUSBirr, MMX_PSUBUSWirr, + MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr + ], ZeroIdiomPredicate>, + + // SSE Zero-idioms. + DepBreakingClass<[ + // fp variants. + XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr, + + // int variants. + PXORrr, PANDNrr, + PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, + PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, + PCMPGTBrr, PCMPGTDrr, PCMPGTWrr + ], ZeroIdiomPredicate>, + + // AVX Zero-idioms. + DepBreakingClass<[ + // xmm fp variants. + VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr, + + // xmm int variants. + VPXORrr, VPANDNrr, + VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, + VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr, + VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, + + // ymm variants. + VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr + ], ZeroIdiomPredicate> +]>; + +def : IsDepBreakingFunction<[ + // GPR + DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>, + DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >, + + // MMX + DepBreakingClass<[ + MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr + ], ZeroIdiomPredicate>, + + // SSE + DepBreakingClass<[ + PCMPEQBrr, PCMPEQWrr, PCMPEQDrr + // But not PCMPEQQrr. + ], ZeroIdiomPredicate>, + + // AVX + DepBreakingClass<[ + VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr + // But not VPCMPEQQrr. + ], ZeroIdiomPredicate> +]>; + + +} // SchedModel Index: llvm/trunk/test/CodeGen/X86/aes-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/aes-schedule.ll +++ llvm/trunk/test/CodeGen/X86/aes-schedule.ll @@ -14,8 +14,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE @@ -96,15 +96,15 @@ ; ; BDVER2-SSE-LABEL: test_aesdec: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [14:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_aesdec: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_aesdec: ; BTVER2-SSE: # %bb.0: @@ -211,15 +211,15 @@ ; ; BDVER2-SSE-LABEL: test_aesdeclast: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [14:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_aesdeclast: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_aesdeclast: ; BTVER2-SSE: # %bb.0: @@ -326,15 +326,15 @@ ; ; BDVER2-SSE-LABEL: test_aesenc: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [14:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_aesenc: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_aesenc: ; BTVER2-SSE: # %bb.0: @@ -441,15 +441,15 @@ ; ; BDVER2-SSE-LABEL: test_aesenclast: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [14:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_aesenclast: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_aesenclast: ; BTVER2-SSE: # %bb.0: @@ -569,17 +569,17 @@ ; ; BDVER2-SSE-LABEL: test_aesimc: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00] -; BDVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [5:1.00] +; BDVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_aesimc: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [12:2.00] -; BDVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [18:2.00] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [10:1.00] +; BDVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_aesimc: ; BTVER2-SSE: # %bb.0: @@ -703,17 +703,17 @@ ; ; BDVER2-SSE-LABEL: test_aeskeygenassist: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67] -; BDVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [5:1.00] +; BDVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_aeskeygenassist: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [8:3.67] -; BDVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [8:3.33] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [10:1.00] +; BDVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_aeskeygenassist: ; BTVER2-SSE: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/avx-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll @@ -6,7 +6,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -49,9 +49,9 @@ ; ; BDVER2-LABEL: test_addpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_addpd: ; BTVER2: # %bb.0: @@ -109,9 +109,9 @@ ; ; BDVER2-LABEL: test_addps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_addps: ; BTVER2: # %bb.0: @@ -169,9 +169,9 @@ ; ; BDVER2-LABEL: test_addsubpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # %bb.0: @@ -230,9 +230,9 @@ ; ; BDVER2-LABEL: test_addsubps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # %bb.0: @@ -297,10 +297,10 @@ ; ; BDVER2-LABEL: test_andnotpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # %bb.0: @@ -373,10 +373,10 @@ ; ; BDVER2-LABEL: test_andnotps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # %bb.0: @@ -449,10 +449,10 @@ ; ; BDVER2-LABEL: test_andpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_andpd: ; BTVER2: # %bb.0: @@ -523,10 +523,10 @@ ; ; BDVER2-LABEL: test_andps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_andps: ; BTVER2: # %bb.0: @@ -597,10 +597,10 @@ ; ; BDVER2-LABEL: test_blendpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] -; BDVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [2:1.00] +; BDVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [7:1.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # %bb.0: @@ -667,10 +667,10 @@ ; ; BDVER2-LABEL: test_blendps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] -; BDVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [2:1.00] +; BDVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [7:1.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blendps: ; BTVER2: # %bb.0: @@ -731,9 +731,9 @@ ; ; BDVER2-LABEL: test_blendvpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:3.00] +; BDVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:3.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # %bb.0: @@ -792,9 +792,9 @@ ; ; BDVER2-LABEL: test_blendvps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BDVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:3.00] +; BDVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:3.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # %bb.0: @@ -847,8 +847,8 @@ ; ; BDVER2-LABEL: test_broadcastf128: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_broadcastf128: ; BTVER2: # %bb.0: @@ -897,8 +897,8 @@ ; ; BDVER2-LABEL: test_broadcastsd_ymm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_broadcastsd_ymm: ; BTVER2: # %bb.0: @@ -948,8 +948,8 @@ ; ; BDVER2-LABEL: test_broadcastss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_broadcastss: ; BTVER2: # %bb.0: @@ -999,8 +999,8 @@ ; ; BDVER2-LABEL: test_broadcastss_ymm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_broadcastss_ymm: ; BTVER2: # %bb.0: @@ -1062,10 +1062,10 @@ ; ; BDVER2-LABEL: test_cmppd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BDVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00] +; BDVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BDVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # %bb.0: @@ -1135,10 +1135,10 @@ ; ; BDVER2-LABEL: test_cmpps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; BDVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00] +; BDVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BDVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # %bb.0: @@ -1208,10 +1208,10 @@ ; ; BDVER2-LABEL: test_cvtdq2pd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:2.00] +; BDVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [8:2.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # %bb.0: @@ -1280,10 +1280,10 @@ ; ; BDVER2-LABEL: test_cvtdq2ps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:2.00] +; BDVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:2.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # %bb.0: @@ -1350,10 +1350,10 @@ ; ; BDVER2-LABEL: test_cvtpd2dq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00] -; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [13:2.00] +; BDVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [8:2.00] +; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # %bb.0: @@ -1421,10 +1421,10 @@ ; ; BDVER2-LABEL: test_cvttpd2dq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] -; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [13:2.00] +; BDVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [8:2.00] +; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvttpd2dq: ; BTVER2: # %bb.0: @@ -1491,10 +1491,10 @@ ; ; BDVER2-LABEL: test_cvtpd2ps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] -; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [13:2.00] +; BDVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [8:2.00] +; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # %bb.0: @@ -1561,10 +1561,10 @@ ; ; BDVER2-LABEL: test_cvtps2dq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00] -; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [9:2.00] +; BDVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:2.00] +; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # %bb.0: @@ -1632,10 +1632,10 @@ ; ; BDVER2-LABEL: test_cvttps2dq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00] -; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [9:2.00] +; BDVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:2.00] +; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvttps2dq: ; BTVER2: # %bb.0: @@ -1696,9 +1696,9 @@ ; ; BDVER2-LABEL: test_divpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00] -; BDVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [9:19.00] +; BDVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [14:19.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_divpd: ; BTVER2: # %bb.0: @@ -1756,9 +1756,9 @@ ; ; BDVER2-LABEL: test_divps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00] -; BDVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [9:19.00] +; BDVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [14:19.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_divps: ; BTVER2: # %bb.0: @@ -1816,9 +1816,9 @@ ; ; BDVER2-LABEL: test_dpps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] -; BDVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [27:3.00] +; BDVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [32:3.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_dpps: ; BTVER2: # %bb.0: @@ -1883,10 +1883,10 @@ ; ; BDVER2-LABEL: test_extractf128: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [7:0.50] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_extractf128: ; BTVER2: # %bb.0: @@ -1945,9 +1945,9 @@ ; ; BDVER2-LABEL: test_haddpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [11:2.00] +; BDVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [16:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # %bb.0: @@ -2006,9 +2006,9 @@ ; ; BDVER2-LABEL: test_haddps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [11:2.00] +; BDVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [16:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # %bb.0: @@ -2067,9 +2067,9 @@ ; ; BDVER2-LABEL: test_hsubpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [11:2.00] +; BDVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [16:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # %bb.0: @@ -2128,9 +2128,9 @@ ; ; BDVER2-LABEL: test_hsubps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; BDVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [11:2.00] +; BDVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [16:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # %bb.0: @@ -2195,10 +2195,10 @@ ; ; BDVER2-LABEL: test_insertf128: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] +; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.50] ; BDVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_insertf128: ; BTVER2: # %bb.0: @@ -2255,8 +2255,8 @@ ; ; BDVER2-LABEL: test_lddqu: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # %bb.0: @@ -2317,10 +2317,10 @@ ; ; BDVER2-LABEL: test_maskmovpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] -; BDVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00] +; BDVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00] +; BDVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_maskmovpd: ; BTVER2: # %bb.0: @@ -2387,10 +2387,10 @@ ; ; BDVER2-LABEL: test_maskmovpd_ymm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; BDVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00] +; BDVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00] +; BDVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_maskmovpd_ymm: ; BTVER2: # %bb.0: @@ -2457,10 +2457,10 @@ ; ; BDVER2-LABEL: test_maskmovps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00] -; BDVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00] +; BDVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00] +; BDVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_maskmovps: ; BTVER2: # %bb.0: @@ -2527,10 +2527,10 @@ ; ; BDVER2-LABEL: test_maskmovps_ymm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; BDVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00] +; BDVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00] +; BDVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_maskmovps_ymm: ; BTVER2: # %bb.0: @@ -2591,9 +2591,9 @@ ; ; BDVER2-LABEL: test_maxpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BDVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # %bb.0: @@ -2652,9 +2652,9 @@ ; ; BDVER2-LABEL: test_maxps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BDVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # %bb.0: @@ -2713,9 +2713,9 @@ ; ; BDVER2-LABEL: test_minpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BDVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # %bb.0: @@ -2774,9 +2774,9 @@ ; ; BDVER2-LABEL: test_minps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BDVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_minps: ; BTVER2: # %bb.0: @@ -2841,10 +2841,10 @@ ; ; BDVER2-LABEL: test_movapd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BDVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:0.50] +; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [5:2.00] ; BDVER2-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movapd: ; BTVER2: # %bb.0: @@ -2910,10 +2910,10 @@ ; ; BDVER2-LABEL: test_movaps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BDVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:0.50] +; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [5:2.00] ; BDVER2-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movaps: ; BTVER2: # %bb.0: @@ -2979,10 +2979,10 @@ ; ; BDVER2-LABEL: test_movddup: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; BDVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:1.00] +; BDVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [2:1.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # %bb.0: @@ -3043,9 +3043,9 @@ ; ; BDVER2-LABEL: test_movmskpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [10:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # %bb.0: @@ -3101,9 +3101,9 @@ ; ; BDVER2-LABEL: test_movmskps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovmskps %ymm0, %eax # sched: [10:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # %bb.0: @@ -3172,10 +3172,10 @@ ; BDVER2-LABEL: test_movntdq: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] +; BDVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [2:2.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movntdq: ; BTVER2: # %bb.0: @@ -3234,9 +3234,9 @@ ; ; BDVER2-LABEL: test_movntpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # %bb.0: @@ -3293,9 +3293,9 @@ ; ; BDVER2-LABEL: test_movntps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movntps: ; BTVER2: # %bb.0: @@ -3358,10 +3358,10 @@ ; ; BDVER2-LABEL: test_movshdup: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; BDVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:1.00] +; BDVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [2:1.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # %bb.0: @@ -3428,10 +3428,10 @@ ; ; BDVER2-LABEL: test_movsldup: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; BDVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:1.00] +; BDVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [2:1.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # %bb.0: @@ -3500,10 +3500,10 @@ ; ; BDVER2-LABEL: test_movupd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BDVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:0.50] +; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [5:2.00] ; BDVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movupd: ; BTVER2: # %bb.0: @@ -3571,10 +3571,10 @@ ; ; BDVER2-LABEL: test_movups: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; BDVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:0.50] +; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [5:2.00] ; BDVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movups: ; BTVER2: # %bb.0: @@ -3634,9 +3634,9 @@ ; ; BDVER2-LABEL: test_mulpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # %bb.0: @@ -3694,9 +3694,9 @@ ; ; BDVER2-LABEL: test_mulps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_mulps: ; BTVER2: # %bb.0: @@ -3760,10 +3760,10 @@ ; ; BDVER2-LABEL: orpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: orpd: ; BTVER2: # %bb.0: @@ -3834,10 +3834,10 @@ ; ; BDVER2-LABEL: test_orps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_orps: ; BTVER2: # %bb.0: @@ -3908,10 +3908,10 @@ ; ; BDVER2-LABEL: test_perm2f128: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] -; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [4:0.50] +; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:0.50] +; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_perm2f128: ; BTVER2: # %bb.0: @@ -3978,10 +3978,10 @@ ; ; BDVER2-LABEL: test_permilpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; BDVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:0.50] +; BDVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [2:0.50] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_permilpd: ; BTVER2: # %bb.0: @@ -4048,10 +4048,10 @@ ; ; BDVER2-LABEL: test_permilpd_ymm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; BDVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [7:1.00] +; BDVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [2:1.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_permilpd_ymm: ; BTVER2: # %bb.0: @@ -4118,10 +4118,10 @@ ; ; BDVER2-LABEL: test_permilps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; BDVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] +; BDVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [2:0.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_permilps: ; BTVER2: # %bb.0: @@ -4188,10 +4188,10 @@ ; ; BDVER2-LABEL: test_permilps_ymm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; BDVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [7:1.00] +; BDVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [2:1.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_permilps_ymm: ; BTVER2: # %bb.0: @@ -4252,9 +4252,9 @@ ; ; BDVER2-LABEL: test_permilvarpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BDVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_permilvarpd: ; BTVER2: # %bb.0: @@ -4313,9 +4313,9 @@ ; ; BDVER2-LABEL: test_permilvarpd_ymm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00] +; BDVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:3.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_permilvarpd_ymm: ; BTVER2: # %bb.0: @@ -4374,9 +4374,9 @@ ; ; BDVER2-LABEL: test_permilvarps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BDVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_permilvarps: ; BTVER2: # %bb.0: @@ -4435,9 +4435,9 @@ ; ; BDVER2-LABEL: test_permilvarps_ymm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00] +; BDVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:3.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_permilvarps_ymm: ; BTVER2: # %bb.0: @@ -4502,10 +4502,10 @@ ; ; BDVER2-LABEL: test_rcpps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00] -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [10:2.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # %bb.0: @@ -4573,10 +4573,10 @@ ; ; BDVER2-LABEL: test_roundpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [9:2.00] +; BDVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:2.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # %bb.0: @@ -4644,10 +4644,10 @@ ; ; BDVER2-LABEL: test_roundps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [9:2.00] +; BDVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:2.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # %bb.0: @@ -4715,10 +4715,10 @@ ; ; BDVER2-LABEL: test_rsqrtps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00] -; BDVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [10:2.00] +; BDVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # %bb.0: @@ -4786,10 +4786,10 @@ ; ; BDVER2-LABEL: test_shufpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; BDVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [2:1.00] +; BDVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [7:1.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # %bb.0: @@ -4856,10 +4856,10 @@ ; ; BDVER2-LABEL: test_shufps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; BDVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [2:1.00] +; BDVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [7:1.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_shufps: ; BTVER2: # %bb.0: @@ -4926,10 +4926,10 @@ ; ; BDVER2-LABEL: test_sqrtpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:44.00] -; BDVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:44.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [14:27.00] +; BDVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [9:27.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # %bb.0: @@ -4997,10 +4997,10 @@ ; ; BDVER2-LABEL: test_sqrtps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:28.00] -; BDVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:28.00] -; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [14:21.00] +; BDVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [9:21.00] +; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # %bb.0: @@ -5062,9 +5062,9 @@ ; ; BDVER2-LABEL: test_subpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_subpd: ; BTVER2: # %bb.0: @@ -5122,9 +5122,9 @@ ; ; BDVER2-LABEL: test_subps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_subps: ; BTVER2: # %bb.0: @@ -5203,9 +5203,9 @@ ; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25] ; BDVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00] +; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_testpd: ; BTVER2: # %bb.0: @@ -5298,10 +5298,10 @@ ; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25] ; BDVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] ; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67] -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [6:1.00] +; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_testpd_ymm: ; BTVER2: # %bb.0: @@ -5389,9 +5389,9 @@ ; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25] ; BDVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00] +; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_testps: ; BTVER2: # %bb.0: @@ -5484,10 +5484,10 @@ ; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25] ; BDVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] ; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: adcl $0, %eax # sched: [2:0.67] -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [6:1.00] +; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_testps_ymm: ; BTVER2: # %bb.0: @@ -5560,10 +5560,10 @@ ; ; BDVER2-LABEL: test_unpckhpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [2:1.00] +; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [7:1.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # %bb.0: @@ -5624,9 +5624,9 @@ ; ; BDVER2-LABEL: test_unpckhps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [2:1.00] +; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # %bb.0: @@ -5690,10 +5690,10 @@ ; ; BDVER2-LABEL: test_unpcklpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] -; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [2:1.00] +; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [7:1.00] +; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # %bb.0: @@ -5754,9 +5754,9 @@ ; ; BDVER2-LABEL: test_unpcklps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [2:1.00] +; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # %bb.0: @@ -5820,10 +5820,10 @@ ; ; BDVER2-LABEL: test_xorpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # %bb.0: @@ -5894,10 +5894,10 @@ ; ; BDVER2-LABEL: test_xorps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xorps: ; BTVER2: # %bb.0: @@ -5956,8 +5956,8 @@ ; ; BDVER2-LABEL: test_zeroall: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vzeroall # sched: [9:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroall # sched: [90:8.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_zeroall: ; BTVER2: # %bb.0: @@ -6006,8 +6006,8 @@ ; ; BDVER2-LABEL: test_zeroupper: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_zeroupper: ; BTVER2: # %bb.0: @@ -6086,12 +6086,12 @@ ; BDVER2-LABEL: test_avx256_zero_idioms: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] -; BDVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00] -; BDVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00] +; BDVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [2:1.00] +; BDVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [2:1.00] +; BDVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [2:1.00] +; BDVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [2:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_avx256_zero_idioms: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll +++ llvm/trunk/test/CodeGen/X86/avx-vzeroupper.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX512 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx,+fast-partial-ymm-or-zmm-write | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=FAST-ymm-zmm -; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BDVER2 +; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BDVER2 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BTVER2 declare i32 @foo() @@ -60,8 +60,8 @@ ; BDVER2-LABEL: test01: ; BDVER2: # %bb.0: ; BDVER2-NEXT: subq $56, %rsp -; BDVER2-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill ; BDVER2-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; BDVER2-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill ; BDVER2-NEXT: vzeroupper ; BDVER2-NEXT: callq do_sse ; BDVER2-NEXT: vmovaps %xmm0, {{.*}}(%rip) @@ -203,8 +203,8 @@ ; BDVER2-NEXT: testl %eax, %eax ; BDVER2-NEXT: jne .LBB3_1 ; BDVER2-NEXT: # %bb.2: # %for.body.preheader -; BDVER2-NEXT: movl $4, %ebx ; BDVER2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; BDVER2-NEXT: movl $4, %ebx ; BDVER2-NEXT: .p2align 4, 0x90 ; BDVER2-NEXT: .LBB3_3: # %for.body ; BDVER2-NEXT: # =>This Inner Loop Header: Depth=1 @@ -214,7 +214,7 @@ ; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ; BDVER2-NEXT: vzeroupper ; BDVER2-NEXT: callq do_sse -; BDVER2-NEXT: addl $-1, %ebx +; BDVER2-NEXT: decl %ebx ; BDVER2-NEXT: jne .LBB3_3 ; BDVER2-NEXT: # %bb.4: # %for.end ; BDVER2-NEXT: addq $16, %rsp Index: llvm/trunk/test/CodeGen/X86/bmi-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bmi-schedule.ll +++ llvm/trunk/test/CodeGen/X86/bmi-schedule.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -39,10 +39,10 @@ ; ; BDVER2-LABEL: test_andn_i32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.33] -; BDVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [5:0.50] +; BDVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_andn_i32: ; BTVER2: # %bb.0: @@ -96,10 +96,10 @@ ; ; BDVER2-LABEL: test_andn_i64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.33] -; BDVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [5:0.50] +; BDVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_andn_i64: ; BTVER2: # %bb.0: @@ -153,10 +153,10 @@ ; ; BDVER2-LABEL: test_bextr_i32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:1.00] -; BDVER2-NEXT: bextrl %edi, %esi, %eax # sched: [2:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [6:0.50] +; BDVER2-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bextr_i32: ; BTVER2: # %bb.0: @@ -210,10 +210,10 @@ ; ; BDVER2-LABEL: test_bextr_i64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:1.00] -; BDVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [6:0.50] +; BDVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bextr_i64: ; BTVER2: # %bb.0: @@ -268,9 +268,9 @@ ; BDVER2-LABEL: test_blsi_i32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: blsil (%rsi), %ecx # sched: [6:0.50] -; BDVER2-NEXT: blsil %edi, %eax # sched: [1:0.33] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsil %edi, %eax # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blsi_i32: ; BTVER2: # %bb.0: @@ -326,9 +326,9 @@ ; BDVER2-LABEL: test_blsi_i64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50] -; BDVER2-NEXT: blsiq %rdi, %rax # sched: [1:0.33] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsiq %rdi, %rax # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blsi_i64: ; BTVER2: # %bb.0: @@ -384,9 +384,9 @@ ; BDVER2-LABEL: test_blsmsk_i32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50] -; BDVER2-NEXT: blsmskl %edi, %eax # sched: [1:0.33] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsmskl %edi, %eax # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blsmsk_i32: ; BTVER2: # %bb.0: @@ -442,9 +442,9 @@ ; BDVER2-LABEL: test_blsmsk_i64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50] -; BDVER2-NEXT: blsmskq %rdi, %rax # sched: [1:0.33] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsmskq %rdi, %rax # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blsmsk_i64: ; BTVER2: # %bb.0: @@ -500,9 +500,9 @@ ; BDVER2-LABEL: test_blsr_i32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50] -; BDVER2-NEXT: blsrl %edi, %eax # sched: [1:0.33] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsrl %edi, %eax # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blsr_i32: ; BTVER2: # %bb.0: @@ -558,9 +558,9 @@ ; BDVER2-LABEL: test_blsr_i64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50] -; BDVER2-NEXT: blsrq %rdi, %rax # sched: [1:0.33] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsrq %rdi, %rax # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_blsr_i64: ; BTVER2: # %bb.0: @@ -619,11 +619,11 @@ ; ; BDVER2-LABEL: test_cttz_i16: ; BDVER2: # %bb.0: -; BDVER2-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00] -; BDVER2-NEXT: tzcntw %di, %ax # sched: [3:1.00] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] +; BDVER2-NEXT: tzcntw (%rsi), %cx # sched: [6:1.00] +; BDVER2-NEXT: tzcntw %di, %ax # sched: [2:1.00] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] ; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cttz_i16: ; BTVER2: # %bb.0: @@ -679,10 +679,10 @@ ; ; BDVER2-LABEL: test_cttz_i32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00] -; BDVER2-NEXT: tzcntl %edi, %eax # sched: [3:1.00] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: tzcntl (%rsi), %ecx # sched: [6:1.00] +; BDVER2-NEXT: tzcntl %edi, %eax # sched: [2:1.00] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cttz_i32: ; BTVER2: # %bb.0: @@ -736,10 +736,10 @@ ; ; BDVER2-LABEL: test_cttz_i64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00] -; BDVER2-NEXT: tzcntq %rdi, %rax # sched: [3:1.00] -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: tzcntq (%rsi), %rcx # sched: [6:1.00] +; BDVER2-NEXT: tzcntq %rdi, %rax # sched: [2:1.00] +; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cttz_i64: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/cmov-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/cmov-schedule.ll +++ llvm/trunk/test/CodeGen/X86/cmov-schedule.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -544,68 +544,68 @@ ; BDVER2-LABEL: test_cmov_16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmovow %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovnow %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovnew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovnew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovbew %si, %di # sched: [3:1.00] -; BDVER2-NEXT: cmovbew %si, %di # sched: [3:1.00] -; BDVER2-NEXT: cmovaw %si, %di # sched: [3:1.00] -; BDVER2-NEXT: cmovaw %si, %di # sched: [3:1.00] -; BDVER2-NEXT: cmovsw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovnsw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovpw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovpw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovnpw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovnpw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovlw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovlw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovgew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovgew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovlew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovlew %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovgw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovgw %si, %di # sched: [2:0.67] -; BDVER2-NEXT: cmovow (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovnow (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [8:1.00] -; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [8:1.00] -; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [8:1.00] -; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [8:1.00] -; BDVER2-NEXT: cmovsw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovnsw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [7:0.67] -; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [7:0.67] +; BDVER2-NEXT: cmovow %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovnow %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovbw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovbw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovbw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovaew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovaew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovaew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovnew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovnew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovbew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovbew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovaw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovaw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovsw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovnsw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovpw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovpw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovlw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovlw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovgew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovgew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovlew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovlew %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovgw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovgw %si, %di # sched: [1:0.50] +; BDVER2-NEXT: cmovow (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovnow (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovsw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovnsw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [5:0.50] +; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmov_16: ; BTVER2: # %bb.0: @@ -1274,68 +1274,68 @@ ; BDVER2-LABEL: test_cmov_32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmovol %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovnol %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovel %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovel %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovnel %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovnel %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovbel %esi, %edi # sched: [3:1.00] -; BDVER2-NEXT: cmovbel %esi, %edi # sched: [3:1.00] -; BDVER2-NEXT: cmoval %esi, %edi # sched: [3:1.00] -; BDVER2-NEXT: cmoval %esi, %edi # sched: [3:1.00] -; BDVER2-NEXT: cmovsl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovnsl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovpl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovpl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovll %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovll %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovgel %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovgel %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovlel %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovlel %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovgl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovgl %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: cmovol (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovnol (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00] -; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00] -; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [8:1.00] -; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [8:1.00] -; BDVER2-NEXT: cmovsl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67] -; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67] +; BDVER2-NEXT: cmovol %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovnol %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovsl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovnsl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmovol (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovnol (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovsl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovnsl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50] +; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmov_32: ; BTVER2: # %bb.0: @@ -2004,68 +2004,68 @@ ; BDVER2-LABEL: test_cmov_64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmovoq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00] -; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00] -; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00] -; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00] -; BDVER2-NEXT: cmovsq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00] -; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00] -; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00] -; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00] -; BDVER2-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67] -; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67] +; BDVER2-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmovoq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovnoq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovsq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovnsq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50] +; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmov_64: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/f16c-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/f16c-schedule.ll +++ llvm/trunk/test/CodeGen/X86/f16c-schedule.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -46,10 +46,10 @@ ; ; BDVER2-LABEL: test_vcvtph2ps_128: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] -; BDVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [13:1.00] +; BDVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_vcvtph2ps_128: ; BTVER2: # %bb.0: @@ -110,10 +110,10 @@ ; ; BDVER2-LABEL: test_vcvtph2ps_256: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00] -; BDVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [13:2.00] +; BDVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [8:2.00] +; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_vcvtph2ps_256: ; BTVER2: # %bb.0: @@ -169,9 +169,9 @@ ; ; BDVER2-LABEL: test_vcvtps2ph_128: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] +; BDVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [8:1.00] ; BDVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_vcvtps2ph_128: ; BTVER2: # %bb.0: @@ -230,10 +230,10 @@ ; ; BDVER2-LABEL: test_vcvtps2ph_256: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00] -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [8:2.00] +; BDVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:2.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_vcvtps2ph_256: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/fma-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fma-schedule.ll +++ llvm/trunk/test/CodeGen/X86/fma-schedule.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE @@ -35,7 +35,7 @@ ; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] ; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddpd_128: ; HASWELL: # %bb.0: @@ -132,12 +132,12 @@ ; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:1.00] +; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddpd_256: ; HASWELL: # %bb.0: @@ -242,7 +242,7 @@ ; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] ; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddps_128: ; HASWELL: # %bb.0: @@ -339,12 +339,12 @@ ; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:1.00] +; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddps_256: ; HASWELL: # %bb.0: @@ -449,7 +449,7 @@ ; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] ; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddsd_128: ; HASWELL: # %bb.0: @@ -549,7 +549,7 @@ ; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] ; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddss_128: ; HASWELL: # %bb.0: @@ -653,7 +653,7 @@ ; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] ; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddsubpd_128: ; HASWELL: # %bb.0: @@ -750,12 +750,12 @@ ; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] -; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:1.00] +; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddsubpd_256: ; HASWELL: # %bb.0: @@ -860,7 +860,7 @@ ; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] ; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddsubps_128: ; HASWELL: # %bb.0: @@ -957,12 +957,12 @@ ; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50] -; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:1.00] +; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmaddsubps_256: ; HASWELL: # %bb.0: @@ -1071,7 +1071,7 @@ ; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] ; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubaddpd_128: ; HASWELL: # %bb.0: @@ -1168,12 +1168,12 @@ ; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] -; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:1.00] +; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubaddpd_256: ; HASWELL: # %bb.0: @@ -1278,7 +1278,7 @@ ; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] ; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubaddps_128: ; HASWELL: # %bb.0: @@ -1375,12 +1375,12 @@ ; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50] -; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:1.00] +; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubaddps_256: ; HASWELL: # %bb.0: @@ -1489,7 +1489,7 @@ ; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] ; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubpd_128: ; HASWELL: # %bb.0: @@ -1586,12 +1586,12 @@ ; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:1.00] +; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubpd_256: ; HASWELL: # %bb.0: @@ -1696,7 +1696,7 @@ ; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] ; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubps_128: ; HASWELL: # %bb.0: @@ -1793,12 +1793,12 @@ ; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:1.00] +; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubps_256: ; HASWELL: # %bb.0: @@ -1903,7 +1903,7 @@ ; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] ; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubsd_128: ; HASWELL: # %bb.0: @@ -2003,7 +2003,7 @@ ; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] ; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfmsubss_128: ; HASWELL: # %bb.0: @@ -2107,7 +2107,7 @@ ; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] ; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmaddpd_128: ; HASWELL: # %bb.0: @@ -2204,12 +2204,12 @@ ; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:1.00] +; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmaddpd_256: ; HASWELL: # %bb.0: @@ -2314,7 +2314,7 @@ ; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] ; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmaddps_128: ; HASWELL: # %bb.0: @@ -2411,12 +2411,12 @@ ; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50] -; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:1.00] +; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmaddps_256: ; HASWELL: # %bb.0: @@ -2521,7 +2521,7 @@ ; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] ; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmaddsd_128: ; HASWELL: # %bb.0: @@ -2621,7 +2621,7 @@ ; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] ; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmaddss_128: ; HASWELL: # %bb.0: @@ -2725,7 +2725,7 @@ ; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] ; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmsubpd_128: ; HASWELL: # %bb.0: @@ -2822,12 +2822,12 @@ ; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:1.00] +; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmsubpd_256: ; HASWELL: # %bb.0: @@ -2932,7 +2932,7 @@ ; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] ; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmsubps_128: ; HASWELL: # %bb.0: @@ -3029,12 +3029,12 @@ ; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50] ; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50] ; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50] -; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50] -; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50] -; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50] +; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:1.00] +; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:1.00] +; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vzeroupper # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vzeroupper # sched: [46:4.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmsubps_256: ; HASWELL: # %bb.0: @@ -3139,7 +3139,7 @@ ; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] ; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmsubsd_128: ; HASWELL: # %bb.0: @@ -3239,7 +3239,7 @@ ; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] ; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_vfnmsubss_128: ; HASWELL: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/fma.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fma.ll +++ llvm/trunk/test/CodeGen/X86/fma.ll @@ -247,76 +247,6 @@ ; FMA32-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; FMA32-NEXT: retl ## encoding: [0xc3] ; -; FMACALL32-LABEL: test_v4f32: -; FMACALL32: ## %bb.0: ## %entry -; FMACALL32-NEXT: subl $108, %esp ## encoding: [0x83,0xec,0x6c] -; FMACALL32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x40] -; FMACALL32-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x30] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x20] -; FMACALL32-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x08,0x02] -; FMACALL32-NEXT: vextractps $2, %xmm1, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x4c,0x24,0x04,0x02] -; FMACALL32-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0x7c,0x24,0x60] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x20] -; FMACALL32-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0x7c,0x24,0x54] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x20] -; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x20] -; FMACALL32-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0x6c,0x24,0x54] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0x6c,0x24,0x60] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10] -; FMACALL32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x1c] -; FMACALL32-NEXT: ## xmm0 = mem[0],zero,zero,zero -; FMACALL32-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x18,0x10] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[2,3] -; FMACALL32-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x14,0x20] -; FMACALL32-NEXT: ## xmm0 = xmm0[0,1],mem[0],xmm0[3] -; FMACALL32-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x10,0x30] -; FMACALL32-NEXT: ## xmm0 = xmm0[0,1,2],mem[0] -; FMACALL32-NEXT: addl $108, %esp ## encoding: [0x83,0xc4,0x6c] -; FMACALL32-NEXT: retl ## encoding: [0xc3] -; ; FMA64-LABEL: test_v4f32: ; FMA64: ## %bb.0: ## %entry ; FMA64-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0xc2] @@ -407,6 +337,76 @@ ; AVX512VL-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2] ; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512VL-NEXT: retq ## encoding: [0xc3] +; +; FMACALL32_BDVER2-LABEL: test_v4f32: +; FMACALL32_BDVER2: ## %bb.0: ## %entry +; FMACALL32_BDVER2-NEXT: subl $108, %esp ## encoding: [0x83,0xec,0x6c] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x20] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x08,0x02] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm1, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x4c,0x24,0x04,0x02] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x20] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x54] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x20] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x20] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x54] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x60] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x1c] +; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0],zero,zero,zero +; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x18,0x10] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[2,3] +; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x14,0x20] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0],xmm0[3] +; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x10,0x30] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1,2],mem[0] +; FMACALL32_BDVER2-NEXT: addl $108, %esp ## encoding: [0x83,0xc4,0x6c] +; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] entry: %call = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %call @@ -419,165 +419,6 @@ ; FMA32-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 ; FMA32-NEXT: retl ## encoding: [0xc3] ; -; FMACALL32-LABEL: test_v8f32: -; FMACALL32: ## %bb.0: ## %entry -; FMACALL32-NEXT: subl $316, %esp ## encoding: [0x81,0xec,0x3c,0x01,0x00,0x00] -; FMACALL32-NEXT: ## imm = 0x13C -; FMACALL32-NEXT: vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xe0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01] -; FMACALL32-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x60] -; FMACALL32-NEXT: vextractps $2, %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x5c,0x24,0x08,0x02] -; FMACALL32-NEXT: vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01] -; FMACALL32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x50] -; FMACALL32-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] -; FMACALL32-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x40] -; FMACALL32-NEXT: vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0xb4,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] -; FMACALL32-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0xa8,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] -; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x9c,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xe0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x90,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xe0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x84,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xe0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0x7c,0x24,0x78] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xe0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] -; FMACALL32-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x3c] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0x6c,0x24,0x78] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x38] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x84,0x00,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x34] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x90,0x00,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x30] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x9c,0x00,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0xa8,0x00,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0xb4,0x00,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20] -; FMACALL32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x3c] -; FMACALL32-NEXT: ## xmm0 = mem[0],zero,zero,zero -; FMACALL32-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x38,0x10] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[2,3] -; FMACALL32-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x34,0x20] -; FMACALL32-NEXT: ## xmm0 = xmm0[0,1],mem[0],xmm0[3] -; FMACALL32-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x30,0x30] -; FMACALL32-NEXT: ## xmm0 = xmm0[0,1,2],mem[0] -; FMACALL32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x2c] -; FMACALL32-NEXT: ## xmm1 = mem[0],zero,zero,zero -; FMACALL32-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x28,0x10] -; FMACALL32-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[2,3] -; FMACALL32-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x24,0x20] -; FMACALL32-NEXT: ## xmm1 = xmm1[0,1],mem[0],xmm1[3] -; FMACALL32-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x20,0x30] -; FMACALL32-NEXT: ## xmm1 = xmm1[0,1,2],mem[0] -; FMACALL32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] -; FMACALL32-NEXT: addl $316, %esp ## encoding: [0x81,0xc4,0x3c,0x01,0x00,0x00] -; FMACALL32-NEXT: ## imm = 0x13C -; FMACALL32-NEXT: retl ## encoding: [0xc3] -; ; FMA64-LABEL: test_v8f32: ; FMA64: ## %bb.0: ## %entry ; FMA64-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0xa8,0xc2] @@ -745,6 +586,165 @@ ; AVX512VL-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2] ; AVX512VL-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512VL-NEXT: retq ## encoding: [0xc3] +; +; FMACALL32_BDVER2-LABEL: test_v8f32: +; FMACALL32_BDVER2: ## %bb.0: ## %entry +; FMACALL32_BDVER2-NEXT: subl $316, %esp ## encoding: [0x81,0xec,0x3c,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## imm = 0x13C +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01] +; FMACALL32_BDVER2-NEXT: vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01] +; FMACALL32_BDVER2-NEXT: vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xe0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] +; FMACALL32_BDVER2-NEXT: vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x5c,0x24,0x08,0x02] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xb4,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xa8,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x9c,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xe0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x90,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xe0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x84,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xe0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x78] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xe0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x3c] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x78] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x38] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x84,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x34] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x90,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x30] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x9c,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xa8,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xb4,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20] +; FMACALL32_BDVER2-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x3c] +; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0],zero,zero,zero +; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x38,0x10] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[2,3] +; FMACALL32_BDVER2-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x2c] +; FMACALL32_BDVER2-NEXT: ## xmm1 = mem[0],zero,zero,zero +; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x28,0x10] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[2,3] +; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x34,0x20] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0],xmm0[3] +; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x24,0x20] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1],mem[0],xmm1[3] +; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x30,0x30] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1,2],mem[0] +; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x20,0x30] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1,2],mem[0] +; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] +; FMACALL32_BDVER2-NEXT: addl $316, %esp ## encoding: [0x81,0xc4,0x3c,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## imm = 0x13C +; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] entry: %call = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) ret <8 x float> %call @@ -765,321 +765,6 @@ ; FMA32-NEXT: popl %ebp ## encoding: [0x5d] ; FMA32-NEXT: retl ## encoding: [0xc3] ; -; FMACALL32-LABEL: test_v16f32: -; FMACALL32: ## %bb.0: ## %entry -; FMACALL32-NEXT: pushl %ebp ## encoding: [0x55] -; FMACALL32-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] -; FMACALL32-NEXT: andl $-32, %esp ## encoding: [0x83,0xe4,0xe0] -; FMACALL32-NEXT: subl $448, %esp ## encoding: [0x81,0xec,0xc0,0x01,0x00,0x00] -; FMACALL32-NEXT: ## imm = 0x1C0 -; FMACALL32-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x29,0x5c,0x24,0x60] -; FMACALL32-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x60,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x29,0x8c,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0x80,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] -; FMACALL32-NEXT: vextractf128 $1, %ymm3, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd8,0x01] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0xb0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] -; FMACALL32-NEXT: vextractf128 $1, %ymm1, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc8,0x01] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x54,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xb0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x48,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xb0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x3c,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x30,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x24,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x18,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x0c,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x60] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50] -; FMACALL32-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] -; FMACALL32-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0xf4,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] -; FMACALL32-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0xe8,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] -; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0x7c,0x24,0x60] -; FMACALL32-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0x7c,0x24,0x50] -; FMACALL32-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0xdc,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xb0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0xdc,0x00,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0x6c,0x24,0x50] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0x6c,0x24,0x60] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0xe8,0x00,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0xf4,0x00,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x0c,0x01,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x4c] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x18,0x01,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x48] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x24,0x01,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x44] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x30,0x01,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x40] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x3c,0x01,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x3c] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x48,0x01,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x38] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x54,0x01,0x00,0x00] -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x34] -; FMACALL32-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x30] -; FMACALL32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x2c] -; FMACALL32-NEXT: ## xmm0 = mem[0],zero,zero,zero -; FMACALL32-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x28,0x10] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[2,3] -; FMACALL32-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x24,0x20] -; FMACALL32-NEXT: ## xmm0 = xmm0[0,1],mem[0],xmm0[3] -; FMACALL32-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x20,0x30] -; FMACALL32-NEXT: ## xmm0 = xmm0[0,1,2],mem[0] -; FMACALL32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x1c] -; FMACALL32-NEXT: ## xmm1 = mem[0],zero,zero,zero -; FMACALL32-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x18,0x10] -; FMACALL32-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[2,3] -; FMACALL32-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x14,0x20] -; FMACALL32-NEXT: ## xmm1 = xmm1[0,1],mem[0],xmm1[3] -; FMACALL32-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x10,0x30] -; FMACALL32-NEXT: ## xmm1 = xmm1[0,1,2],mem[0] -; FMACALL32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] -; FMACALL32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x4c] -; FMACALL32-NEXT: ## xmm1 = mem[0],zero,zero,zero -; FMACALL32-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x48,0x10] -; FMACALL32-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[2,3] -; FMACALL32-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x44,0x20] -; FMACALL32-NEXT: ## xmm1 = xmm1[0,1],mem[0],xmm1[3] -; FMACALL32-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x40,0x30] -; FMACALL32-NEXT: ## xmm1 = xmm1[0,1,2],mem[0] -; FMACALL32-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 ## encoding: [0xc5,0xfa,0x10,0x54,0x24,0x3c] -; FMACALL32-NEXT: ## xmm2 = mem[0],zero,zero,zero -; FMACALL32-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x38,0x10] -; FMACALL32-NEXT: ## xmm2 = xmm2[0],mem[0],xmm2[2,3] -; FMACALL32-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x34,0x20] -; FMACALL32-NEXT: ## xmm2 = xmm2[0,1],mem[0],xmm2[3] -; FMACALL32-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x30,0x30] -; FMACALL32-NEXT: ## xmm2 = xmm2[0,1,2],mem[0] -; FMACALL32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01] -; FMACALL32-NEXT: movl %ebp, %esp ## encoding: [0x89,0xec] -; FMACALL32-NEXT: popl %ebp ## encoding: [0x5d] -; FMACALL32-NEXT: retl ## encoding: [0xc3] -; ; FMA64-LABEL: test_v16f32: ; FMA64: ## %bb.0: ## %entry ; FMA64-NEXT: vfmadd213ps %ymm4, %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0x6d,0xa8,0xc4] @@ -1378,6 +1063,321 @@ ; AVX512VL-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2] ; AVX512VL-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512VL-NEXT: retq ## encoding: [0xc3] +; +; FMACALL32_BDVER2-LABEL: test_v16f32: +; FMACALL32_BDVER2: ## %bb.0: ## %entry +; FMACALL32_BDVER2-NEXT: pushl %ebp ## encoding: [0x55] +; FMACALL32_BDVER2-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] +; FMACALL32_BDVER2-NEXT: andl $-32, %esp ## encoding: [0x83,0xe4,0xe0] +; FMACALL32_BDVER2-NEXT: subl $448, %esp ## encoding: [0x81,0xec,0xc0,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## imm = 0x1C0 +; FMACALL32_BDVER2-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0x80,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] +; FMACALL32_BDVER2-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x60,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm3, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xda,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x8c,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm1, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc9,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x5c,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x94,0x24,0xb0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x8c,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x54,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x8c,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x48,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xb0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x3c,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x30,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x24,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x18,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x0c,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x02] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x02] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xf4,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x54,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x4c,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x01] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x01] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xe8,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x03] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x03] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x02] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x02] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $2, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x02] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x04,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $1, %xmm0, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x04,0x24,0x01] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xdc,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x08] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x60,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xfa,0x11,0x44,0x24,0x04] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x80,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x94,0x24,0xb0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x8c,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x44,0x24,0x08,0x03] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x54,0x24,0x04,0x03] +; FMACALL32_BDVER2-NEXT: vextractps $3, %xmm1, (%esp) ## encoding: [0xc4,0xe3,0x79,0x17,0x0c,0x24,0x03] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x2c] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xdc,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x28] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x50] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x24] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x60] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x20] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x1c] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xe8,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x18] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xf4,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x14] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x10] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x0c,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x4c] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x18,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x48] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x24,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x44] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x30,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x40] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x3c,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x3c] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x48,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x38] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x54,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x34] +; FMACALL32_BDVER2-NEXT: calll _fmaf ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fmaf-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstps {{[0-9]+}}(%esp) ## encoding: [0xd9,0x5c,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x2c] +; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0],zero,zero,zero +; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x28,0x10] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0],xmm0[2,3] +; FMACALL32_BDVER2-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x1c] +; FMACALL32_BDVER2-NEXT: ## xmm1 = mem[0],zero,zero,zero +; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x18,0x10] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[2,3] +; FMACALL32_BDVER2-NEXT: vmovss {{[0-9]+}}(%esp), %xmm2 ## encoding: [0xc5,0xfa,0x10,0x54,0x24,0x3c] +; FMACALL32_BDVER2-NEXT: ## xmm2 = mem[0],zero,zero,zero +; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x38,0x10] +; FMACALL32_BDVER2-NEXT: ## xmm2 = xmm2[0],mem[0],xmm2[2,3] +; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x24,0x20] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1],mem[0],xmm0[3] +; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x14,0x20] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1],mem[0],xmm1[3] +; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x34,0x20] +; FMACALL32_BDVER2-NEXT: ## xmm2 = xmm2[0,1],mem[0],xmm2[3] +; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0x44,0x24,0x20,0x30] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0,1,2],mem[0] +; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x10,0x30] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1,2],mem[0] +; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x21,0x54,0x24,0x30,0x30] +; FMACALL32_BDVER2-NEXT: ## xmm2 = xmm2[0,1,2],mem[0] +; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] +; FMACALL32_BDVER2-NEXT: vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x4c] +; FMACALL32_BDVER2-NEXT: ## xmm1 = mem[0],zero,zero,zero +; FMACALL32_BDVER2-NEXT: vinsertps $16, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x48,0x10] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0],mem[0],xmm1[2,3] +; FMACALL32_BDVER2-NEXT: vinsertps $32, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x44,0x20] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1],mem[0],xmm1[3] +; FMACALL32_BDVER2-NEXT: vinsertps $48, {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x21,0x4c,0x24,0x40,0x30] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0,1,2],mem[0] +; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01] +; FMACALL32_BDVER2-NEXT: movl %ebp, %esp ## encoding: [0x89,0xec] +; FMACALL32_BDVER2-NEXT: popl %ebp ## encoding: [0x5d] +; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] entry: %call = call <16 x float> @llvm.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) ret <16 x float> %call @@ -1390,41 +1390,6 @@ ; FMA32-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; FMA32-NEXT: retl ## encoding: [0xc3] ; -; FMACALL32-LABEL: test_v2f64: -; FMACALL32: ## %bb.0: ## %entry -; FMACALL32-NEXT: subl $108, %esp ## encoding: [0x83,0xec,0x6c] -; FMACALL32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x30] -; FMACALL32-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x40] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50] -; FMACALL32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x54,0x24,0x10] -; FMACALL32-NEXT: vmovlhps %xmm1, %xmm0, %xmm2 ## encoding: [0xc5,0xf8,0x16,0xd1] -; FMACALL32-NEXT: ## xmm2 = xmm0[0],xmm1[0] -; FMACALL32-NEXT: vmovups %xmm2, (%esp) ## encoding: [0xc5,0xf8,0x11,0x14,0x24] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: vmovapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x28,0x44,0x24,0x30] -; FMACALL32-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] -; FMACALL32-NEXT: vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x58] -; FMACALL32-NEXT: ## xmm0 = mem[0,1],xmm0[2,3] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20] -; FMACALL32-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x28] -; FMACALL32-NEXT: ## xmm0 = mem[0],zero -; FMACALL32-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x16,0x44,0x24,0x20] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0] -; FMACALL32-NEXT: addl $108, %esp ## encoding: [0x83,0xc4,0x6c] -; FMACALL32-NEXT: retl ## encoding: [0xc3] -; ; FMA64-LABEL: test_v2f64: ; FMA64: ## %bb.0: ## %entry ; FMA64-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] @@ -1477,6 +1442,41 @@ ; AVX512VL-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2] ; AVX512VL-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2 ; AVX512VL-NEXT: retq ## encoding: [0xc3] +; +; FMACALL32_BDVER2-LABEL: test_v2f64: +; FMACALL32_BDVER2: ## %bb.0: ## %entry +; FMACALL32_BDVER2-NEXT: subl $108, %esp ## encoding: [0x83,0xec,0x6c] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc1] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],xmm1[0] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x54,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x28,0x44,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40] +; FMACALL32_BDVER2-NEXT: vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x58] +; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0,1],xmm0[2,3] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20] +; FMACALL32_BDVER2-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x28] +; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0],zero +; FMACALL32_BDVER2-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x16,0x44,0x24,0x20] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] +; FMACALL32_BDVER2-NEXT: addl $108, %esp ## encoding: [0x83,0xc4,0x6c] +; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] entry: %call = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) ret <2 x double> %call @@ -1489,90 +1489,6 @@ ; FMA32-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 ; FMA32-NEXT: retl ## encoding: [0xc3] ; -; FMACALL32-LABEL: test_v4f64: -; FMACALL32: ## %bb.0: ## %entry -; FMACALL32-NEXT: subl $252, %esp ## encoding: [0x81,0xec,0xfc,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01] -; FMACALL32-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x70] -; FMACALL32-NEXT: vmovlps %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x5c,0x24,0x10] -; FMACALL32-NEXT: vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01] -; FMACALL32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x50] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] -; FMACALL32-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x60] -; FMACALL32-NEXT: vmovlhps %xmm2, %xmm1, %xmm0 ## encoding: [0xc5,0xf0,0x16,0xc2] -; FMACALL32-NEXT: ## xmm0 = xmm1[0],xmm2[0] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0x7c,0x24,0x44] -; FMACALL32-NEXT: vmovupd {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfd,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: ## xmm0 = xmm0[1],mem[1] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0x7c,0x24,0x38] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: vmovapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x28,0x44,0x24,0x70] -; FMACALL32-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] -; FMACALL32-NEXT: vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x68] -; FMACALL32-NEXT: ## xmm0 = mem[0,1],xmm0[2,3] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x30] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0x6c,0x24,0x38] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0x6c,0x24,0x44] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x18] -; FMACALL32-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x30] -; FMACALL32-NEXT: ## xmm0 = mem[0],zero -; FMACALL32-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x16,0x44,0x24,0x28] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0] -; FMACALL32-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x20] -; FMACALL32-NEXT: ## xmm1 = mem[0],zero -; FMACALL32-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x4c,0x24,0x18] -; FMACALL32-NEXT: ## xmm1 = xmm1[0],mem[0] -; FMACALL32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] -; FMACALL32-NEXT: addl $252, %esp ## encoding: [0x81,0xc4,0xfc,0x00,0x00,0x00] -; FMACALL32-NEXT: retl ## encoding: [0xc3] -; ; FMA64-LABEL: test_v4f64: ; FMA64: ## %bb.0: ## %entry ; FMA64-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] @@ -1664,6 +1580,90 @@ ; AVX512VL-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2] ; AVX512VL-NEXT: ## ymm0 = (ymm1 * ymm0) + ymm2 ; AVX512VL-NEXT: retq ## encoding: [0xc3] +; +; FMACALL32_BDVER2-LABEL: test_v4f64: +; FMACALL32_BDVER2: ## %bb.0: ## %entry +; FMACALL32_BDVER2-NEXT: subl $252, %esp ## encoding: [0x81,0xec,0xfc,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm2, %xmm3 ## encoding: [0xc4,0xe3,0x7d,0x19,0xd3,0x01] +; FMACALL32_BDVER2-NEXT: vmovups %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x94,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm1, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xca,0x01] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] +; FMACALL32_BDVER2-NEXT: vmovups %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x11,0x8c,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: vmovlhps %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc2] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],xmm2[0] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x5c,0x24,0x70] +; FMACALL32_BDVER2-NEXT: vmovlps %xmm3, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x5c,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x44] +; FMACALL32_BDVER2-NEXT: vmovupd {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfd,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[1],mem[1] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x38] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x10,0x84,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x28,0x44,0x24,0x70] +; FMACALL32_BDVER2-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x50] +; FMACALL32_BDVER2-NEXT: vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x68] +; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0,1],xmm0[2,3] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x30] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x38] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x44] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x18] +; FMACALL32_BDVER2-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x30] +; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0],zero +; FMACALL32_BDVER2-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x20] +; FMACALL32_BDVER2-NEXT: ## xmm1 = mem[0],zero +; FMACALL32_BDVER2-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x16,0x44,0x24,0x28] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] +; FMACALL32_BDVER2-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x4c,0x24,0x18] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0],mem[0] +; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] +; FMACALL32_BDVER2-NEXT: addl $252, %esp ## encoding: [0x81,0xc4,0xfc,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] entry: %call = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) ret <4 x double> %call @@ -1684,179 +1684,6 @@ ; FMA32-NEXT: popl %ebp ## encoding: [0x5d] ; FMA32-NEXT: retl ## encoding: [0xc3] ; -; FMACALL32-LABEL: test_v8f64: -; FMACALL32: ## %bb.0: ## %entry -; FMACALL32-NEXT: pushl %ebp ## encoding: [0x55] -; FMACALL32-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] -; FMACALL32-NEXT: andl $-32, %esp ## encoding: [0x83,0xe4,0xe0] -; FMACALL32-NEXT: subl $384, %esp ## encoding: [0x81,0xec,0x80,0x01,0x00,0x00] -; FMACALL32-NEXT: ## imm = 0x180 -; FMACALL32-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x29,0x9c,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x29,0x8c,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0xe0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x60,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] -; FMACALL32-NEXT: vextractf128 $1, %ymm3, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xda,0x01] -; FMACALL32-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x94,0x24,0x40,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractf128 $1, %ymm1, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc8,0x01] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x50,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovlhps %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc2] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],xmm2[0] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x94,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovapd 40(%ebp), %ymm0 ## encoding: [0xc5,0xfd,0x28,0x45,0x28] -; FMACALL32-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: ## xmm0 = xmm0[1],mem[1] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0x88,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] -; FMACALL32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] -; FMACALL32-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x29,0x44,0x24,0x30] -; FMACALL32-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] -; FMACALL32-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x8c,0x24,0x30,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00] -; FMACALL32-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] -; FMACALL32-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x20] -; FMACALL32-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x15,0xc1] -; FMACALL32-NEXT: ## xmm0 = xmm0[1],xmm1[1] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0xbc,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] -; FMACALL32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x20] -; FMACALL32-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0x30,0x01,0x00,0x00] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0x7c,0x24,0x30] -; FMACALL32-NEXT: vmovapd 8(%ebp), %ymm0 ## encoding: [0xc5,0xfd,0x28,0x45,0x08] -; FMACALL32-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00] -; FMACALL32-NEXT: vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: ## xmm0 = xmm0[1],mem[1] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill -; FMACALL32-NEXT: ## encoding: [0xdb,0x7c,0x24,0x20] -; FMACALL32-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] -; FMACALL32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00] -; FMACALL32-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0x00,0x01,0x00,0x00] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: vmovapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf9,0x28,0x84,0x24,0x60,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] -; FMACALL32-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x40,0x01,0x00,0x00] -; FMACALL32-NEXT: vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xc5,0xf8,0x12,0x84,0x24,0x58,0x01,0x00,0x00] -; FMACALL32-NEXT: ## xmm0 = mem[0,1],xmm0[2,3] -; FMACALL32-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x60] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0x6c,0x24,0x20] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x58] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0x6c,0x24,0x30] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x50] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0xa0,0x00,0x00,0x00] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x48] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0xc0,0x00,0x00,0x00] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x9c,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x88,0x00,0x00,0x00] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x78] -; FMACALL32-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload -; FMACALL32-NEXT: ## encoding: [0xdb,0xac,0x24,0x94,0x00,0x00,0x00] -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x70] -; FMACALL32-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] -; FMACALL32-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 -; FMACALL32-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x68] -; FMACALL32-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x60] -; FMACALL32-NEXT: ## xmm0 = mem[0],zero -; FMACALL32-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x16,0x44,0x24,0x58] -; FMACALL32-NEXT: ## xmm0 = xmm0[0],mem[0] -; FMACALL32-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x50] -; FMACALL32-NEXT: ## xmm1 = mem[0],zero -; FMACALL32-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x4c,0x24,0x48] -; FMACALL32-NEXT: ## xmm1 = xmm1[0],mem[0] -; FMACALL32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] -; FMACALL32-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x8c,0x24,0x80,0x00,0x00,0x00] -; FMACALL32-NEXT: ## xmm1 = mem[0],zero -; FMACALL32-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x4c,0x24,0x78] -; FMACALL32-NEXT: ## xmm1 = xmm1[0],mem[0] -; FMACALL32-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm2 ## encoding: [0xc5,0xfb,0x10,0x54,0x24,0x70] -; FMACALL32-NEXT: ## xmm2 = mem[0],zero -; FMACALL32-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x16,0x54,0x24,0x68] -; FMACALL32-NEXT: ## xmm2 = xmm2[0],mem[0] -; FMACALL32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01] -; FMACALL32-NEXT: movl %ebp, %esp ## encoding: [0x89,0xec] -; FMACALL32-NEXT: popl %ebp ## encoding: [0x5d] -; FMACALL32-NEXT: retl ## encoding: [0xc3] -; ; FMA64-LABEL: test_v8f64: ; FMA64: ## %bb.0: ## %entry ; FMA64-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0 ## encoding: [0xc4,0xe2,0xed,0xa8,0xc4] @@ -2011,6 +1838,179 @@ ; AVX512VL-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2] ; AVX512VL-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2 ; AVX512VL-NEXT: retq ## encoding: [0xc3] +; +; FMACALL32_BDVER2-LABEL: test_v8f64: +; FMACALL32_BDVER2: ## %bb.0: ## %entry +; FMACALL32_BDVER2-NEXT: pushl %ebp ## encoding: [0x55] +; FMACALL32_BDVER2-NEXT: movl %esp, %ebp ## encoding: [0x89,0xe5] +; FMACALL32_BDVER2-NEXT: andl $-32, %esp ## encoding: [0x83,0xe4,0xe0] +; FMACALL32_BDVER2-NEXT: subl $384, %esp ## encoding: [0x81,0xec,0x80,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## imm = 0x180 +; FMACALL32_BDVER2-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x84,0x24,0xe0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] +; FMACALL32_BDVER2-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x94,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm3, %xmm2 ## encoding: [0xc4,0xe3,0x7d,0x19,0xda,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x9c,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 32-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x29,0x8c,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x94,0x24,0x40,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x60,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm1, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc8,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x84,0x24,0x50,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovlhps %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc2] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],xmm2[0] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovapd 40(%ebp), %ymm0 ## encoding: [0xc5,0xfd,0x28,0x45,0x28] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x94,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[1],mem[1] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 40(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x28] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0x88,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] +; FMACALL32_BDVER2-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x29,0x44,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm1 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x8c,0x24,0x30,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 ## encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01] +; FMACALL32_BDVER2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x20] +; FMACALL32_BDVER2-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x15,0xc1] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[1],xmm1[1] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xbc,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x20] +; FMACALL32_BDVER2-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0x30,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovapd 8(%ebp), %ymm0 ## encoding: [0xc5,0xfd,0x28,0x45,0x08] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x30] +; FMACALL32_BDVER2-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vunpckhpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x15,0x84,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[1],mem[1] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovaps 8(%ebp), %ymm0 ## encoding: [0xc5,0xfc,0x28,0x45,0x08] +; FMACALL32_BDVER2-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Spill +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x7c,0x24,0x20] +; FMACALL32_BDVER2-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 ## 32-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xfc,0x28,0x84,0x24,0xe0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vunpcklpd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x14,0x84,0x24,0x00,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: vmovapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf9,0x28,0x84,0x24,0x60,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovhpd %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf9,0x17,0x44,0x24,0x10] +; FMACALL32_BDVER2-NEXT: vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x28,0x84,0x24,0x40,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xc5,0xf8,0x12,0x84,0x24,0x58,0x01,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0,1],xmm0[2,3] +; FMACALL32_BDVER2-NEXT: vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x60] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x20] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x58] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0x6c,0x24,0x30] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x50] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xa0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x48] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0xc0,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x9c,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x88,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x78] +; FMACALL32_BDVER2-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) ## 10-byte Folded Reload +; FMACALL32_BDVER2-NEXT: ## encoding: [0xdb,0xac,0x24,0x94,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x70] +; FMACALL32_BDVER2-NEXT: calll _fma ## encoding: [0xe8,A,A,A,A] +; FMACALL32_BDVER2-NEXT: ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4 +; FMACALL32_BDVER2-NEXT: fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x68] +; FMACALL32_BDVER2-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x60] +; FMACALL32_BDVER2-NEXT: ## xmm0 = mem[0],zero +; FMACALL32_BDVER2-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x50] +; FMACALL32_BDVER2-NEXT: ## xmm1 = mem[0],zero +; FMACALL32_BDVER2-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x16,0x44,0x24,0x58] +; FMACALL32_BDVER2-NEXT: ## xmm0 = xmm0[0],mem[0] +; FMACALL32_BDVER2-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x4c,0x24,0x48] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0],mem[0] +; FMACALL32_BDVER2-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm2 ## encoding: [0xc5,0xfb,0x10,0x54,0x24,0x70] +; FMACALL32_BDVER2-NEXT: ## xmm2 = mem[0],zero +; FMACALL32_BDVER2-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm2, %xmm2 ## encoding: [0xc5,0xe9,0x16,0x54,0x24,0x68] +; FMACALL32_BDVER2-NEXT: ## xmm2 = xmm2[0],mem[0] +; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01] +; FMACALL32_BDVER2-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfb,0x10,0x8c,0x24,0x80,0x00,0x00,0x00] +; FMACALL32_BDVER2-NEXT: ## xmm1 = mem[0],zero +; FMACALL32_BDVER2-NEXT: vmovhpd {{[0-9]+}}(%esp), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x4c,0x24,0x78] +; FMACALL32_BDVER2-NEXT: ## xmm1 = xmm1[0],mem[0] +; FMACALL32_BDVER2-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ## encoding: [0xc4,0xe3,0x75,0x18,0xca,0x01] +; FMACALL32_BDVER2-NEXT: movl %ebp, %esp ## encoding: [0x89,0xec] +; FMACALL32_BDVER2-NEXT: popl %ebp ## encoding: [0x5d] +; FMACALL32_BDVER2-NEXT: retl ## encoding: [0xc3] entry: %call = call <8 x double> @llvm.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c) ret <8 x double> %call Index: llvm/trunk/test/CodeGen/X86/fma4-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fma4-schedule.ll +++ llvm/trunk/test/CodeGen/X86/fma4-schedule.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER4 @@ -26,7 +26,7 @@ ; BDVER12-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddpd_128: ; BDVER34: # %bb.0: @@ -55,11 +55,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddpd_256: ; BDVER34: # %bb.0: @@ -91,7 +91,7 @@ ; BDVER12-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddps_128: ; BDVER34: # %bb.0: @@ -120,11 +120,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddps_256: ; BDVER34: # %bb.0: @@ -156,7 +156,7 @@ ; BDVER12-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddsd_128: ; BDVER34: # %bb.0: @@ -187,7 +187,7 @@ ; BDVER12-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddss_128: ; BDVER34: # %bb.0: @@ -222,7 +222,7 @@ ; BDVER12-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddsubpd_128: ; BDVER34: # %bb.0: @@ -251,11 +251,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddsubpd_256: ; BDVER34: # %bb.0: @@ -287,7 +287,7 @@ ; BDVER12-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddsubps_128: ; BDVER34: # %bb.0: @@ -316,11 +316,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmaddsubps_256: ; BDVER34: # %bb.0: @@ -356,7 +356,7 @@ ; BDVER12-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubaddpd_128: ; BDVER34: # %bb.0: @@ -385,11 +385,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubaddpd_256: ; BDVER34: # %bb.0: @@ -421,7 +421,7 @@ ; BDVER12-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubaddps_128: ; BDVER34: # %bb.0: @@ -450,11 +450,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubaddps_256: ; BDVER34: # %bb.0: @@ -490,7 +490,7 @@ ; BDVER12-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubpd_128: ; BDVER34: # %bb.0: @@ -519,11 +519,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubpd_256: ; BDVER34: # %bb.0: @@ -555,7 +555,7 @@ ; BDVER12-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubps_128: ; BDVER34: # %bb.0: @@ -584,11 +584,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubps_256: ; BDVER34: # %bb.0: @@ -620,7 +620,7 @@ ; BDVER12-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubsd_128: ; BDVER34: # %bb.0: @@ -651,7 +651,7 @@ ; BDVER12-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfmsubss_128: ; BDVER34: # %bb.0: @@ -686,7 +686,7 @@ ; BDVER12-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmaddpd_128: ; BDVER34: # %bb.0: @@ -715,11 +715,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmaddpd_256: ; BDVER34: # %bb.0: @@ -751,7 +751,7 @@ ; BDVER12-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmaddps_128: ; BDVER34: # %bb.0: @@ -780,11 +780,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmaddps_256: ; BDVER34: # %bb.0: @@ -816,7 +816,7 @@ ; BDVER12-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmaddsd_128: ; BDVER34: # %bb.0: @@ -847,7 +847,7 @@ ; BDVER12-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmaddss_128: ; BDVER34: # %bb.0: @@ -882,7 +882,7 @@ ; BDVER12-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmsubpd_128: ; BDVER34: # %bb.0: @@ -911,11 +911,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmsubpd_256: ; BDVER34: # %bb.0: @@ -947,7 +947,7 @@ ; BDVER12-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmsubps_128: ; BDVER34: # %bb.0: @@ -976,11 +976,11 @@ ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP ; BDVER12-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER12-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] -; BDVER12-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50] +; BDVER12-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; BDVER12-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmsubps_256: ; BDVER34: # %bb.0: @@ -1012,7 +1012,7 @@ ; BDVER12-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmsubsd_128: ; BDVER34: # %bb.0: @@ -1043,7 +1043,7 @@ ; BDVER12-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER34-LABEL: test_vfnmsubss_128: ; BDVER34: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/lea32-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lea32-schedule.ll +++ llvm/trunk/test/CodeGen/X86/lea32-schedule.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-slow-3ops-lea | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -65,7 +65,7 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_offset: ; BTVER2: # %bb.0: @@ -135,7 +135,7 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_offset_big: ; BTVER2: # %bb.0: @@ -214,7 +214,7 @@ ; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add: ; BTVER2: # %bb.0: @@ -299,7 +299,7 @@ ; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_offset: ; BTVER2: # %bb.0: @@ -390,7 +390,7 @@ ; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_offset_big: ; BTVER2: # %bb.0: @@ -463,7 +463,7 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_mul: ; BTVER2: # %bb.0: @@ -538,7 +538,7 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_mul_offset: ; BTVER2: # %bb.0: @@ -619,7 +619,7 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_mul_offset_big: ; BTVER2: # %bb.0: @@ -698,7 +698,7 @@ ; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_scale: ; BTVER2: # %bb.0: @@ -784,7 +784,7 @@ ; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_scale_offset: ; BTVER2: # %bb.0: @@ -876,7 +876,7 @@ ; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi ; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi ; BDVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_scale_offset_big: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/lea64-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lea64-schedule.ll +++ llvm/trunk/test/CodeGen/X86/lea64-schedule.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-slow-3ops-lea | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -57,7 +57,7 @@ ; BDVER2-LABEL: test_lea_offset: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_offset: ; BTVER2: # %bb.0: @@ -117,7 +117,7 @@ ; BDVER2-LABEL: test_lea_offset_big: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_offset_big: ; BTVER2: # %bb.0: @@ -178,7 +178,7 @@ ; BDVER2-LABEL: test_lea_add: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add: ; BTVER2: # %bb.0: @@ -243,7 +243,7 @@ ; BDVER2-LABEL: test_lea_add_offset: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_offset: ; BTVER2: # %bb.0: @@ -314,7 +314,7 @@ ; BDVER2-LABEL: test_lea_add_offset_big: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_offset_big: ; BTVER2: # %bb.0: @@ -375,7 +375,7 @@ ; BDVER2-LABEL: test_lea_mul: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_mul: ; BTVER2: # %bb.0: @@ -440,7 +440,7 @@ ; BDVER2-LABEL: test_lea_mul_offset: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_mul_offset: ; BTVER2: # %bb.0: @@ -511,7 +511,7 @@ ; BDVER2-LABEL: test_lea_mul_offset_big: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_mul_offset_big: ; BTVER2: # %bb.0: @@ -572,7 +572,7 @@ ; BDVER2-LABEL: test_lea_add_scale: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_scale: ; BTVER2: # %bb.0: @@ -638,7 +638,7 @@ ; BDVER2-LABEL: test_lea_add_scale_offset: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_scale_offset: ; BTVER2: # %bb.0: @@ -710,7 +710,7 @@ ; BDVER2-LABEL: test_lea_add_scale_offset_big: ; BDVER2: # %bb.0: ; BDVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lea_add_scale_offset_big: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/lwp-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lwp-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/lwp-intrinsics.ll @@ -40,14 +40,41 @@ } define i8 @test_lwpins32_rri(i32 %a0, i32 %a1) nounwind { -; X86-LABEL: test_lwpins32_rri: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF -; X86-NEXT: setb %al -; X86-NEXT: retl +; X86_BDVER1-LABEL: test_lwpins32_rri: +; X86_BDVER1: # %bb.0: +; X86_BDVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86_BDVER1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86_BDVER1-NEXT: addl %ecx, %ecx +; X86_BDVER1-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF +; X86_BDVER1-NEXT: setb %al +; X86_BDVER1-NEXT: retl +; +; X86_BDVER2-LABEL: test_lwpins32_rri: +; X86_BDVER2: # %bb.0: +; X86_BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86_BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86_BDVER2-NEXT: addl %ecx, %ecx +; X86_BDVER2-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF +; X86_BDVER2-NEXT: setb %al +; X86_BDVER2-NEXT: retl +; +; X86_BDVER3-LABEL: test_lwpins32_rri: +; X86_BDVER3: # %bb.0: +; X86_BDVER3-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86_BDVER3-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86_BDVER3-NEXT: addl %ecx, %ecx +; X86_BDVER3-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF +; X86_BDVER3-NEXT: setb %al +; X86_BDVER3-NEXT: retl +; +; X86_BDVER4-LABEL: test_lwpins32_rri: +; X86_BDVER4: # %bb.0: +; X86_BDVER4-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86_BDVER4-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86_BDVER4-NEXT: addl %ecx, %ecx +; X86_BDVER4-NEXT: lwpins $-1985229329, %ecx, %eax # imm = 0x89ABCDEF +; X86_BDVER4-NEXT: setb %al +; X86_BDVER4-NEXT: retl ; ; X64-LABEL: test_lwpins32_rri: ; X64: # %bb.0: @@ -80,13 +107,37 @@ } define void @test_lwpval32_rri(i32 %a0, i32 %a1) nounwind { -; X86-LABEL: test_lwpval32_rri: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98 -; X86-NEXT: retl +; X86_BDVER1-LABEL: test_lwpval32_rri: +; X86_BDVER1: # %bb.0: +; X86_BDVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86_BDVER1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86_BDVER1-NEXT: addl %ecx, %ecx +; X86_BDVER1-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98 +; X86_BDVER1-NEXT: retl +; +; X86_BDVER2-LABEL: test_lwpval32_rri: +; X86_BDVER2: # %bb.0: +; X86_BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86_BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86_BDVER2-NEXT: addl %ecx, %ecx +; X86_BDVER2-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98 +; X86_BDVER2-NEXT: retl +; +; X86_BDVER3-LABEL: test_lwpval32_rri: +; X86_BDVER3: # %bb.0: +; X86_BDVER3-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86_BDVER3-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86_BDVER3-NEXT: addl %ecx, %ecx +; X86_BDVER3-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98 +; X86_BDVER3-NEXT: retl +; +; X86_BDVER4-LABEL: test_lwpval32_rri: +; X86_BDVER4: # %bb.0: +; X86_BDVER4-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86_BDVER4-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86_BDVER4-NEXT: addl %ecx, %ecx +; X86_BDVER4-NEXT: lwpval $-19088744, %ecx, %eax # imm = 0xFEDCBA98 +; X86_BDVER4-NEXT: retl ; ; X64-LABEL: test_lwpval32_rri: ; X64: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/lwp-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lwp-schedule.ll +++ llvm/trunk/test/CodeGen/X86/lwp-schedule.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1 -; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1 +; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3 ; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4 @@ -13,8 +13,8 @@ ; ; BDVER12-LABEL: test_llwpcb: ; BDVER12: # %bb.0: -; BDVER12-NEXT: llwpcb %rdi # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: llwpcb %rdi # sched: [100:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_llwpcb: ; BDVER3: # %bb.0: @@ -37,8 +37,8 @@ ; ; BDVER12-LABEL: test_slwpcb: ; BDVER12: # %bb.0: -; BDVER12-NEXT: slwpcb %rax # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: slwpcb %rax # sched: [100:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_slwpcb: ; BDVER3: # %bb.0: @@ -64,11 +64,11 @@ ; ; BDVER12-LABEL: test_lwpins32_rri: ; BDVER12: # %bb.0: -; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.33] +; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.50] ; BDVER12-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF -; BDVER12-NEXT: # sched: [100:0.33] +; BDVER12-NEXT: # sched: [100:0.50] ; BDVER12-NEXT: setb %al # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_lwpins32_rri: ; BDVER3: # %bb.0: @@ -99,9 +99,9 @@ ; BDVER12-LABEL: test_lwpins32_rmi: ; BDVER12: # %bb.0: ; BDVER12-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210 -; BDVER12-NEXT: # sched: [100:0.33] +; BDVER12-NEXT: # sched: [100:0.50] ; BDVER12-NEXT: setb %al # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_lwpins32_rmi: ; BDVER3: # %bb.0: @@ -130,9 +130,9 @@ ; BDVER12-LABEL: test_lwpins64_rri: ; BDVER12: # %bb.0: ; BDVER12-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF -; BDVER12-NEXT: # sched: [100:0.33] +; BDVER12-NEXT: # sched: [100:0.50] ; BDVER12-NEXT: setb %al # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_lwpins64_rri: ; BDVER3: # %bb.0: @@ -160,9 +160,9 @@ ; BDVER12-LABEL: test_lwpins64_rmi: ; BDVER12: # %bb.0: ; BDVER12-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210 -; BDVER12-NEXT: # sched: [100:0.33] +; BDVER12-NEXT: # sched: [100:0.50] ; BDVER12-NEXT: setb %al # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_lwpins64_rmi: ; BDVER3: # %bb.0: @@ -190,10 +190,10 @@ ; ; BDVER12-LABEL: test_lwpval32_rri: ; BDVER12: # %bb.0: -; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.33] +; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.50] ; BDVER12-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98 -; BDVER12-NEXT: # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: # sched: [100:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_lwpval32_rri: ; BDVER3: # %bb.0: @@ -221,8 +221,8 @@ ; BDVER12-LABEL: test_lwpval32_rmi: ; BDVER12: # %bb.0: ; BDVER12-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678 -; BDVER12-NEXT: # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: # sched: [100:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_lwpval32_rmi: ; BDVER3: # %bb.0: @@ -248,8 +248,8 @@ ; BDVER12-LABEL: test_lwpval64_rri: ; BDVER12: # %bb.0: ; BDVER12-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98 -; BDVER12-NEXT: # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: # sched: [100:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_lwpval64_rri: ; BDVER3: # %bb.0: @@ -274,8 +274,8 @@ ; BDVER12-LABEL: test_lwpval64_rmi: ; BDVER12: # %bb.0: ; BDVER12-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678 -; BDVER12-NEXT: # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: # sched: [100:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_lwpval64_rmi: ; BDVER3: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll +++ llvm/trunk/test/CodeGen/X86/lzcnt-schedule.ll @@ -4,7 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -43,11 +43,11 @@ ; ; BDVER2-LABEL: test_ctlz_i16: ; BDVER2: # %bb.0: -; BDVER2-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00] -; BDVER2-NEXT: lzcntw %di, %ax # sched: [3:1.00] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] +; BDVER2-NEXT: lzcntw (%rsi), %cx # sched: [6:0.50] +; BDVER2-NEXT: lzcntw %di, %ax # sched: [2:0.50] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] ; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_ctlz_i16: ; BTVER2: # %bb.0: @@ -103,10 +103,10 @@ ; ; BDVER2-LABEL: test_ctlz_i32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00] -; BDVER2-NEXT: lzcntl %edi, %eax # sched: [3:1.00] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: lzcntl (%rsi), %ecx # sched: [6:0.50] +; BDVER2-NEXT: lzcntl %edi, %eax # sched: [2:0.50] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_ctlz_i32: ; BTVER2: # %bb.0: @@ -160,10 +160,10 @@ ; ; BDVER2-LABEL: test_ctlz_i64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00] -; BDVER2-NEXT: lzcntq %rdi, %rax # sched: [3:1.00] -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: lzcntq (%rsi), %rcx # sched: [6:0.50] +; BDVER2-NEXT: lzcntq %rdi, %rax # sched: [2:0.50] +; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_ctlz_i64: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/memset.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/memset.ll +++ llvm/trunk/test/CodeGen/X86/memset.ll @@ -22,7 +22,6 @@ ; X86-NEXT: calll _foo ; X86-NEXT: addl $44, %esp ; X86-NEXT: retl -; X86-NEXT: ## -- End function ; ; XMM-LABEL: t: ; XMM: ## %bb.0: ## %entry @@ -35,7 +34,6 @@ ; XMM-NEXT: calll _foo ; XMM-NEXT: addl $60, %esp ; XMM-NEXT: retl -; XMM-NEXT: ## -- End function ; ; YMM-LABEL: t: ; YMM: ## %bb.0: ## %entry @@ -44,15 +42,14 @@ ; YMM-NEXT: andl $-32, %esp ; YMM-NEXT: subl $96, %esp ; YMM-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; YMM-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) ; YMM-NEXT: leal {{[0-9]+}}(%esp), %eax +; YMM-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) ; YMM-NEXT: movl %eax, (%esp) ; YMM-NEXT: vzeroupper ; YMM-NEXT: calll _foo ; YMM-NEXT: movl %ebp, %esp ; YMM-NEXT: popl %ebp ; YMM-NEXT: retl -; YMM-NEXT: ## -- End function entry: %up_mvd = alloca [8 x %struct.x] ; <[8 x %struct.x]*> [#uses=2] %up_mvd116 = getelementptr [8 x %struct.x], [8 x %struct.x]* %up_mvd, i32 0, i32 0 ; <%struct.x*> [#uses=1] Index: llvm/trunk/test/CodeGen/X86/mmx-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/mmx-schedule.ll +++ llvm/trunk/test/CodeGen/X86/mmx-schedule.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -79,11 +79,11 @@ ; ; BDVER2-LABEL: test_cvtpd2pi: ; BDVER2: # %bb.0: -; BDVER2-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] -; BDVER2-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [13:1.00] +; BDVER2-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [6:1.00] +; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvtpd2pi: ; BTVER2: # %bb.0: @@ -168,10 +168,10 @@ ; ; BDVER2-LABEL: test_cvtpi2pd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [13:1.00] +; BDVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [6:1.00] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvtpi2pd: ; BTVER2: # %bb.0: @@ -253,10 +253,10 @@ ; ; BDVER2-LABEL: test_cvtpi2ps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] ; BDVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvtpi2ps: ; BTVER2: # %bb.0: @@ -346,11 +346,11 @@ ; ; BDVER2-LABEL: test_cvtps2pi: ; BDVER2: # %bb.0: -; BDVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] ; BDVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] -; BDVER2-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; BDVER2-NEXT: movq %mm1, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] +; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvtps2pi: ; BTVER2: # %bb.0: @@ -443,11 +443,11 @@ ; ; BDVER2-LABEL: test_cvttpd2pi: ; BDVER2: # %bb.0: -; BDVER2-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] -; BDVER2-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [13:1.00] +; BDVER2-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [6:1.00] +; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvttpd2pi: ; BTVER2: # %bb.0: @@ -540,11 +540,11 @@ ; ; BDVER2-LABEL: test_cvttps2pi: ; BDVER2: # %bb.0: -; BDVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] ; BDVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] -; BDVER2-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; BDVER2-NEXT: movq %mm1, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] +; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cvttps2pi: ; BTVER2: # %bb.0: @@ -613,8 +613,8 @@ ; ; BDVER2-LABEL: test_emms: ; BDVER2: # %bb.0: -; BDVER2-NEXT: emms # sched: [31:10.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: emms # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_emms: ; BTVER2: # %bb.0: @@ -673,8 +673,8 @@ ; ; BDVER2-LABEL: test_maskmovq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: maskmovq %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_maskmovq: ; BTVER2: # %bb.0: @@ -781,14 +781,14 @@ ; ; BDVER2-LABEL: test_movd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movd %edi, %mm1 # sched: [1:1.00] +; BDVER2-NEXT: movd %edi, %mm1 # sched: [10:0.50] ; BDVER2-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] -; BDVER2-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] -; BDVER2-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: movd %mm2, %ecx # sched: [2:1.00] -; BDVER2-NEXT: movd %mm0, %eax # sched: [2:1.00] -; BDVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: paddd %mm1, %mm2 # sched: [2:0.50] +; BDVER2-NEXT: paddd %mm2, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movd %mm2, %ecx # sched: [10:1.00] +; BDVER2-NEXT: movd %mm0, %eax # sched: [10:1.00] +; BDVER2-NEXT: movl %ecx, (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movd: ; BTVER2: # %bb.0: @@ -885,10 +885,10 @@ ; ; BDVER2-LABEL: test_movdq2q: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] -; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movdq2q: ; BTVER2: # %bb.0: @@ -953,8 +953,8 @@ ; ; BDVER2-LABEL: test_movntq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movntq %mm0, (%rdi) # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movntq: ; BTVER2: # %bb.0: @@ -1032,9 +1032,9 @@ ; BDVER2-LABEL: test_movq: ; BDVER2: # %bb.0: ; BDVER2-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] -; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, (%rdi) # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movq: ; BTVER2: # %bb.0: @@ -1100,8 +1100,8 @@ ; ; BDVER2-LABEL: test_movq2dq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movq2dq: ; BTVER2: # %bb.0: @@ -1176,10 +1176,10 @@ ; ; BDVER2-LABEL: test_pabsb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pabsb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: pabsb %mm0, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pabsb: ; BTVER2: # %bb.0: @@ -1261,10 +1261,10 @@ ; ; BDVER2-LABEL: test_pabsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pabsd (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: pabsd %mm0, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pabsd: ; BTVER2: # %bb.0: @@ -1346,10 +1346,10 @@ ; ; BDVER2-LABEL: test_pabsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pabsw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: pabsw %mm0, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pabsw: ; BTVER2: # %bb.0: @@ -1431,10 +1431,10 @@ ; ; BDVER2-LABEL: test_packssdw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: packssdw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: packssdw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_packssdw: ; BTVER2: # %bb.0: @@ -1516,10 +1516,10 @@ ; ; BDVER2-LABEL: test_packsswb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: packsswb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: packsswb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_packsswb: ; BTVER2: # %bb.0: @@ -1601,10 +1601,10 @@ ; ; BDVER2-LABEL: test_packuswb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: packuswb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: packuswb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_packuswb: ; BTVER2: # %bb.0: @@ -1686,10 +1686,10 @@ ; ; BDVER2-LABEL: test_paddb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: paddb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: paddb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_paddb: ; BTVER2: # %bb.0: @@ -1771,10 +1771,10 @@ ; ; BDVER2-LABEL: test_paddd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: paddd %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: paddd (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_paddd: ; BTVER2: # %bb.0: @@ -1856,10 +1856,10 @@ ; ; BDVER2-LABEL: test_paddq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] +; BDVER2-NEXT: paddq %mm1, %mm0 # sched: [2:0.50] ; BDVER2-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_paddq: ; BTVER2: # %bb.0: @@ -1941,10 +1941,10 @@ ; ; BDVER2-LABEL: test_paddsb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: paddsb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: paddsb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_paddsb: ; BTVER2: # %bb.0: @@ -2026,10 +2026,10 @@ ; ; BDVER2-LABEL: test_paddsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: paddsw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: paddsw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_paddsw: ; BTVER2: # %bb.0: @@ -2111,10 +2111,10 @@ ; ; BDVER2-LABEL: test_paddusb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: paddusb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: paddusb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_paddusb: ; BTVER2: # %bb.0: @@ -2196,10 +2196,10 @@ ; ; BDVER2-LABEL: test_paddusw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: paddusw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: paddusw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_paddusw: ; BTVER2: # %bb.0: @@ -2281,10 +2281,10 @@ ; ; BDVER2-LABEL: test_paddw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: paddw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: paddw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_paddw: ; BTVER2: # %bb.0: @@ -2366,10 +2366,10 @@ ; ; BDVER2-LABEL: test_palignr: ; BDVER2: # %bb.0: -; BDVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] -; BDVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_palignr: ; BTVER2: # %bb.0: @@ -2451,10 +2451,10 @@ ; ; BDVER2-LABEL: test_pand: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.33] -; BDVER2-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pand %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pand (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pand: ; BTVER2: # %bb.0: @@ -2536,10 +2536,10 @@ ; ; BDVER2-LABEL: test_pandn: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] -; BDVER2-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pandn %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pandn (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pandn: ; BTVER2: # %bb.0: @@ -2621,10 +2621,10 @@ ; ; BDVER2-LABEL: test_pavgb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pavgb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pavgb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pavgb: ; BTVER2: # %bb.0: @@ -2706,10 +2706,10 @@ ; ; BDVER2-LABEL: test_pavgw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pavgw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pavgw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pavgw: ; BTVER2: # %bb.0: @@ -2791,10 +2791,10 @@ ; ; BDVER2-LABEL: test_pcmpeqb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pcmpeqb: ; BTVER2: # %bb.0: @@ -2876,10 +2876,10 @@ ; ; BDVER2-LABEL: test_pcmpeqd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pcmpeqd: ; BTVER2: # %bb.0: @@ -2961,10 +2961,10 @@ ; ; BDVER2-LABEL: test_pcmpeqw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pcmpeqw: ; BTVER2: # %bb.0: @@ -3046,10 +3046,10 @@ ; ; BDVER2-LABEL: test_pcmpgtb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pcmpgtb: ; BTVER2: # %bb.0: @@ -3131,10 +3131,10 @@ ; ; BDVER2-LABEL: test_pcmpgtd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pcmpgtd: ; BTVER2: # %bb.0: @@ -3216,10 +3216,10 @@ ; ; BDVER2-LABEL: test_pcmpgtw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pcmpgtw: ; BTVER2: # %bb.0: @@ -3285,8 +3285,8 @@ ; ; BDVER2-LABEL: test_pextrw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pextrw $0, %mm0, %eax # sched: [13:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # %bb.0: @@ -3361,10 +3361,10 @@ ; ; BDVER2-LABEL: test_phaddd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] -; BDVER2-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: phaddd %mm1, %mm0 # sched: [5:0.50] +; BDVER2-NEXT: phaddd (%rdi), %mm0 # sched: [10:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_phaddd: ; BTVER2: # %bb.0: @@ -3446,10 +3446,10 @@ ; ; BDVER2-LABEL: test_phaddsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] -; BDVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: phaddsw %mm1, %mm0 # sched: [5:0.50] +; BDVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [10:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_phaddsw: ; BTVER2: # %bb.0: @@ -3531,10 +3531,10 @@ ; ; BDVER2-LABEL: test_phaddw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] -; BDVER2-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: phaddw %mm1, %mm0 # sched: [5:0.50] +; BDVER2-NEXT: phaddw (%rdi), %mm0 # sched: [10:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_phaddw: ; BTVER2: # %bb.0: @@ -3616,10 +3616,10 @@ ; ; BDVER2-LABEL: test_phsubd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] -; BDVER2-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: phsubd %mm1, %mm0 # sched: [5:0.50] +; BDVER2-NEXT: phsubd (%rdi), %mm0 # sched: [10:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_phsubd: ; BTVER2: # %bb.0: @@ -3701,10 +3701,10 @@ ; ; BDVER2-LABEL: test_phsubsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] -; BDVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: phsubsw %mm1, %mm0 # sched: [5:0.50] +; BDVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [10:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_phsubsw: ; BTVER2: # %bb.0: @@ -3786,10 +3786,10 @@ ; ; BDVER2-LABEL: test_phsubw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] -; BDVER2-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: phsubw %mm1, %mm0 # sched: [5:0.50] +; BDVER2-NEXT: phsubw (%rdi), %mm0 # sched: [10:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_phsubw: ; BTVER2: # %bb.0: @@ -3879,11 +3879,11 @@ ; ; BDVER2-LABEL: test_pinsrw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] ; BDVER2-NEXT: movswl (%rsi), %eax # sched: [5:0.50] -; BDVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pinsrw: ; BTVER2: # %bb.0: @@ -3968,10 +3968,10 @@ ; ; BDVER2-LABEL: test_pmaddwd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] -; BDVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] +; BDVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmaddwd: ; BTVER2: # %bb.0: @@ -4053,10 +4053,10 @@ ; ; BDVER2-LABEL: test_pmaddubsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] -; BDVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] +; BDVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmaddubsw: ; BTVER2: # %bb.0: @@ -4138,10 +4138,10 @@ ; ; BDVER2-LABEL: test_pmaxsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmaxsw: ; BTVER2: # %bb.0: @@ -4223,10 +4223,10 @@ ; ; BDVER2-LABEL: test_pmaxub: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmaxub %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmaxub: ; BTVER2: # %bb.0: @@ -4308,10 +4308,10 @@ ; ; BDVER2-LABEL: test_pminsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pminsw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pminsw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pminsw: ; BTVER2: # %bb.0: @@ -4393,10 +4393,10 @@ ; ; BDVER2-LABEL: test_pminub: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pminub %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pminub %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pminub (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pminub: ; BTVER2: # %bb.0: @@ -4462,8 +4462,8 @@ ; ; BDVER2-LABEL: test_pmovmskb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmovmskb %mm0, %eax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmovmskb: ; BTVER2: # %bb.0: @@ -4538,10 +4538,10 @@ ; ; BDVER2-LABEL: test_pmulhrsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] -; BDVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] +; BDVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmulhrsw: ; BTVER2: # %bb.0: @@ -4623,10 +4623,10 @@ ; ; BDVER2-LABEL: test_pmulhw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] -; BDVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] +; BDVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmulhw: ; BTVER2: # %bb.0: @@ -4708,10 +4708,10 @@ ; ; BDVER2-LABEL: test_pmulhuw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] -; BDVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] +; BDVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmulhuw: ; BTVER2: # %bb.0: @@ -4793,10 +4793,10 @@ ; ; BDVER2-LABEL: test_pmullw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] -; BDVER2-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] +; BDVER2-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmullw: ; BTVER2: # %bb.0: @@ -4878,10 +4878,10 @@ ; ; BDVER2-LABEL: test_pmuludq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] -; BDVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] +; BDVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pmuludq: ; BTVER2: # %bb.0: @@ -4963,10 +4963,10 @@ ; ; BDVER2-LABEL: test_por: ; BDVER2: # %bb.0: -; BDVER2-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; BDVER2-NEXT: por (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: por %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: por (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_por: ; BTVER2: # %bb.0: @@ -5048,10 +5048,10 @@ ; ; BDVER2-LABEL: test_psadbw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] -; BDVER2-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psadbw %mm1, %mm0 # sched: [4:0.50] +; BDVER2-NEXT: psadbw (%rdi), %mm0 # sched: [9:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psadbw: ; BTVER2: # %bb.0: @@ -5133,10 +5133,10 @@ ; ; BDVER2-LABEL: test_pshufb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] -; BDVER2-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pshufb %mm1, %mm0 # sched: [3:2.00] +; BDVER2-NEXT: pshufb (%rdi), %mm0 # sched: [8:2.00] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pshufb: ; BTVER2: # %bb.0: @@ -5218,10 +5218,10 @@ ; ; BDVER2-LABEL: test_pshufw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] -; BDVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [7:0.50] +; BDVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pshufw: ; BTVER2: # %bb.0: @@ -5303,10 +5303,10 @@ ; ; BDVER2-LABEL: test_psignb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] -; BDVER2-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psignb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psignb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psignb: ; BTVER2: # %bb.0: @@ -5388,10 +5388,10 @@ ; ; BDVER2-LABEL: test_psignd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] -; BDVER2-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psignd %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psignd (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psignd: ; BTVER2: # %bb.0: @@ -5473,10 +5473,10 @@ ; ; BDVER2-LABEL: test_psignw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] -; BDVER2-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psignw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psignw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psignw: ; BTVER2: # %bb.0: @@ -5566,11 +5566,11 @@ ; ; BDVER2-LABEL: test_pslld: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pslld %mm1, %mm0 # sched: [3:0.50] +; BDVER2-NEXT: pslld (%rdi), %mm0 # sched: [8:0.50] +; BDVER2-NEXT: pslld $7, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pslld: ; BTVER2: # %bb.0: @@ -5664,11 +5664,11 @@ ; ; BDVER2-LABEL: test_psllq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psllq %mm1, %mm0 # sched: [3:0.50] +; BDVER2-NEXT: psllq (%rdi), %mm0 # sched: [8:0.50] +; BDVER2-NEXT: psllq $7, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psllq: ; BTVER2: # %bb.0: @@ -5762,11 +5762,11 @@ ; ; BDVER2-LABEL: test_psllw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psllw %mm1, %mm0 # sched: [3:0.50] +; BDVER2-NEXT: psllw (%rdi), %mm0 # sched: [8:0.50] +; BDVER2-NEXT: psllw $7, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psllw: ; BTVER2: # %bb.0: @@ -5860,11 +5860,11 @@ ; ; BDVER2-LABEL: test_psrad: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psrad %mm1, %mm0 # sched: [3:0.50] +; BDVER2-NEXT: psrad (%rdi), %mm0 # sched: [8:0.50] +; BDVER2-NEXT: psrad $7, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psrad: ; BTVER2: # %bb.0: @@ -5958,11 +5958,11 @@ ; ; BDVER2-LABEL: test_psraw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psraw %mm1, %mm0 # sched: [3:0.50] +; BDVER2-NEXT: psraw (%rdi), %mm0 # sched: [8:0.50] +; BDVER2-NEXT: psraw $7, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psraw: ; BTVER2: # %bb.0: @@ -6056,11 +6056,11 @@ ; ; BDVER2-LABEL: test_psrld: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psrld %mm1, %mm0 # sched: [3:0.50] +; BDVER2-NEXT: psrld (%rdi), %mm0 # sched: [8:0.50] +; BDVER2-NEXT: psrld $7, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psrld: ; BTVER2: # %bb.0: @@ -6154,11 +6154,11 @@ ; ; BDVER2-LABEL: test_psrlq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psrlq %mm1, %mm0 # sched: [3:0.50] +; BDVER2-NEXT: psrlq (%rdi), %mm0 # sched: [8:0.50] +; BDVER2-NEXT: psrlq $7, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psrlq: ; BTVER2: # %bb.0: @@ -6252,11 +6252,11 @@ ; ; BDVER2-LABEL: test_psrlw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] -; BDVER2-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psrlw %mm1, %mm0 # sched: [3:0.50] +; BDVER2-NEXT: psrlw (%rdi), %mm0 # sched: [8:0.50] +; BDVER2-NEXT: psrlw $7, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psrlw: ; BTVER2: # %bb.0: @@ -6342,10 +6342,10 @@ ; ; BDVER2-LABEL: test_psubb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psubb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psubb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psubb: ; BTVER2: # %bb.0: @@ -6427,10 +6427,10 @@ ; ; BDVER2-LABEL: test_psubd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psubd %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psubd (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psubd: ; BTVER2: # %bb.0: @@ -6512,10 +6512,10 @@ ; ; BDVER2-LABEL: test_psubq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psubq %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psubq (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psubq: ; BTVER2: # %bb.0: @@ -6597,10 +6597,10 @@ ; ; BDVER2-LABEL: test_psubsb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psubsb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psubsb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psubsb: ; BTVER2: # %bb.0: @@ -6682,10 +6682,10 @@ ; ; BDVER2-LABEL: test_psubsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psubsw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psubsw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psubsw: ; BTVER2: # %bb.0: @@ -6767,10 +6767,10 @@ ; ; BDVER2-LABEL: test_psubusb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psubusb %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psubusb (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psubusb: ; BTVER2: # %bb.0: @@ -6852,10 +6852,10 @@ ; ; BDVER2-LABEL: test_psubusw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psubusw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psubusw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psubusw: ; BTVER2: # %bb.0: @@ -6937,10 +6937,10 @@ ; ; BDVER2-LABEL: test_psubw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] -; BDVER2-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: psubw %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: psubw (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_psubw: ; BTVER2: # %bb.0: @@ -7022,10 +7022,10 @@ ; ; BDVER2-LABEL: test_punpckhbw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] -; BDVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [2:0.50] +; BDVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_punpckhbw: ; BTVER2: # %bb.0: @@ -7107,10 +7107,10 @@ ; ; BDVER2-LABEL: test_punpckhdq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] -; BDVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [2:0.50] +; BDVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_punpckhdq: ; BTVER2: # %bb.0: @@ -7192,10 +7192,10 @@ ; ; BDVER2-LABEL: test_punpckhwd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; BDVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [2:0.50] +; BDVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_punpckhwd: ; BTVER2: # %bb.0: @@ -7277,10 +7277,10 @@ ; ; BDVER2-LABEL: test_punpcklbw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] -; BDVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [2:0.50] +; BDVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_punpcklbw: ; BTVER2: # %bb.0: @@ -7362,10 +7362,10 @@ ; ; BDVER2-LABEL: test_punpckldq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] -; BDVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [2:0.50] +; BDVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_punpckldq: ; BTVER2: # %bb.0: @@ -7447,10 +7447,10 @@ ; ; BDVER2-LABEL: test_punpcklwd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] -; BDVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [2:0.50] +; BDVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_punpcklwd: ; BTVER2: # %bb.0: @@ -7532,10 +7532,10 @@ ; ; BDVER2-LABEL: test_pxor: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] -; BDVER2-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] -; BDVER2-NEXT: movq %mm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: pxor %mm1, %mm0 # sched: [2:0.50] +; BDVER2-NEXT: pxor (%rdi), %mm0 # sched: [7:0.50] +; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pxor: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll +++ llvm/trunk/test/CodeGen/X86/popcnt-schedule.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -63,11 +63,11 @@ ; ; BDVER2-LABEL: test_ctpop_i16: ; BDVER2: # %bb.0: -; BDVER2-NEXT: popcntw (%rsi), %cx # sched: [9:1.00] -; BDVER2-NEXT: popcntw %di, %ax # sched: [3:1.00] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] +; BDVER2-NEXT: popcntw (%rsi), %cx # sched: [8:0.50] +; BDVER2-NEXT: popcntw %di, %ax # sched: [4:0.50] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] ; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_ctpop_i16: ; BTVER2: # %bb.0: @@ -137,10 +137,10 @@ ; ; BDVER2-LABEL: test_ctpop_i32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: popcntl (%rsi), %ecx # sched: [9:1.00] -; BDVER2-NEXT: popcntl %edi, %eax # sched: [3:1.00] -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: popcntl (%rsi), %ecx # sched: [8:0.50] +; BDVER2-NEXT: popcntl %edi, %eax # sched: [4:0.50] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_ctpop_i32: ; BTVER2: # %bb.0: @@ -208,10 +208,10 @@ ; ; BDVER2-LABEL: test_ctpop_i64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00] -; BDVER2-NEXT: popcntq %rdi, %rax # sched: [3:1.00] -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: popcntq (%rsi), %rcx # sched: [8:0.50] +; BDVER2-NEXT: popcntq %rdi, %rax # sched: [4:0.50] +; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_ctpop_i64: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll +++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+fma4 -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL @@ -40,9 +40,9 @@ ; ; BDVER2-LABEL: f32_no_estimate: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; BDVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:14.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; BDVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [9:9.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: f32_no_estimate: ; BTVER2: # %bb.0: @@ -117,7 +117,7 @@ ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: f32_one_step: ; BTVER2: # %bb.0: @@ -219,12 +219,12 @@ ; BDVER2-LABEL: f32_two_step: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] ; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: f32_two_step: ; BTVER2: # %bb.0: @@ -326,9 +326,9 @@ ; ; BDVER2-LABEL: v4f32_no_estimate: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50] -; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [5:0.50] +; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [9:9.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v4f32_no_estimate: ; BTVER2: # %bb.0: @@ -403,7 +403,7 @@ ; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v4f32_one_step: ; BTVER2: # %bb.0: @@ -507,12 +507,12 @@ ; BDVER2-LABEL: v4f32_two_step: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] +; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v4f32_two_step: ; BTVER2: # %bb.0: @@ -617,9 +617,9 @@ ; ; BDVER2-LABEL: v8f32_no_estimate: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:0.50] +; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [9:19.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v8f32_no_estimate: ; BTVER2: # %bb.0: @@ -698,10 +698,10 @@ ; ; BDVER2-LABEL: v8f32_one_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] +; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] ; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v8f32_one_step: ; BTVER2: # %bb.0: @@ -817,13 +817,13 @@ ; ; BDVER2-LABEL: v8f32_two_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v8f32_two_step: ; BTVER2: # %bb.0: @@ -936,10 +936,10 @@ ; ; BDVER2-LABEL: v16f32_no_estimate: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00] -; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:0.50] +; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [9:19.00] +; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [9:19.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v16f32_no_estimate: ; BTVER2: # %bb.0: @@ -1045,14 +1045,14 @@ ; ; BDVER2-LABEL: v16f32_one_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; BDVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50] +; BDVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [5:2.00] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1 # sched: [5:0.50] +; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v16f32_one_step: ; BTVER2: # %bb.0: @@ -1226,18 +1226,18 @@ ; ; BDVER2-LABEL: v16f32_two_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50] +; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00] +; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v16f32_two_step: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll +++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+fma4 -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL @@ -34,8 +34,8 @@ ; BDVER2-LABEL: f32_no_step_2: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: f32_no_step_2: ; BTVER2: # %bb.0: @@ -113,8 +113,8 @@ ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: f32_one_step_2: ; BTVER2: # %bb.0: @@ -216,9 +216,9 @@ ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] +; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:1.00] ; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: f32_one_step_2_divs: ; BTVER2: # %bb.0: @@ -336,13 +336,13 @@ ; BDVER2-LABEL: f32_two_step_2: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] ; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: f32_two_step_2: ; BTVER2: # %bb.0: @@ -465,8 +465,8 @@ ; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v4f32_one_step2: ; BTVER2: # %bb.0: @@ -570,9 +570,9 @@ ; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50] ; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:1.00] ; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v4f32_one_step_2_divs: ; BTVER2: # %bb.0: @@ -692,13 +692,13 @@ ; BDVER2-LABEL: v4f32_two_step2: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] +; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v4f32_two_step2: ; BTVER2: # %bb.0: @@ -826,11 +826,11 @@ ; ; BDVER2-LABEL: v8f32_one_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] +; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] ; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v8f32_one_step2: ; BTVER2: # %bb.0: @@ -940,12 +940,12 @@ ; ; BDVER2-LABEL: v8f32_one_step_2_divs: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] +; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00] ; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00] -; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [10:2.00] +; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v8f32_one_step_2_divs: ; BTVER2: # %bb.0: @@ -1078,14 +1078,14 @@ ; ; BDVER2-LABEL: v8f32_two_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] +; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v8f32_two_step2: ; BTVER2: # %bb.0: @@ -1190,8 +1190,8 @@ ; ; BDVER2-LABEL: v8f32_no_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v8f32_no_step: ; BTVER2: # %bb.0: @@ -1249,9 +1249,9 @@ ; ; BDVER2-LABEL: v8f32_no_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v8f32_no_step2: ; BTVER2: # %bb.0: @@ -1361,16 +1361,16 @@ ; ; BDVER2-LABEL: v16f32_one_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50] -; BDVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [7:2.00] +; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50] +; BDVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [5:2.00] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0 # sched: [5:0.50] +; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v16f32_one_step2: ; BTVER2: # %bb.0: @@ -1532,18 +1532,18 @@ ; ; BDVER2-LABEL: v16f32_one_step_2_divs: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50] +; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00] +; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [10:2.00] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:1.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:1.00] -; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [10:2.00] +; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v16f32_one_step_2_divs: ; BTVER2: # %bb.0: @@ -1745,20 +1745,20 @@ ; ; BDVER2-LABEL: v16f32_two_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00] -; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50] +; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00] +; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50] -; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50] ; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50] ; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v16f32_two_step2: ; BTVER2: # %bb.0: @@ -1904,9 +1904,9 @@ ; ; BDVER2-LABEL: v16f32_no_step: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [5:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v16f32_no_step: ; BTVER2: # %bb.0: @@ -1976,11 +1976,11 @@ ; ; BDVER2-LABEL: v16f32_no_step2: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00] -; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] -; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [5:2.00] +; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00] +; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: v16f32_no_step2: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll +++ llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+slow-shld | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+slow-shld | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 @@ -19,9 +19,9 @@ ; ; BDVER12-LABEL: lshift10_optsize: ; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33] -; BDVER12-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BDVER12-NEXT: shldq $10, %rsi, %rax # sched: [4:3.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift10_optsize: ; BTVER2: # %bb.0: # %entry @@ -47,7 +47,7 @@ ; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50] ; BDVER12-NEXT: shrq $54, %rsi # sched: [1:0.50] ; BDVER12-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift10: ; BTVER2: # %bb.0: # %entry @@ -77,9 +77,9 @@ ; ; BDVER12-LABEL: rshift10_optsize: ; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33] -; BDVER12-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BDVER12-NEXT: shrdq $62, %rsi, %rax # sched: [4:3.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: rshift10_optsize: ; BTVER2: # %bb.0: # %entry @@ -105,7 +105,7 @@ ; BDVER12: # %bb.0: # %entry ; BDVER12-NEXT: shrq $62, %rdi # sched: [1:0.50] ; BDVER12-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: rshift10: ; BTVER2: # %bb.0: # %entry @@ -135,11 +135,11 @@ ; ; BDVER12-LABEL: lshift_cl_optsize: ; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33] -; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33] +; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50] +; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50] ; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift_cl_optsize: ; BTVER2: # %bb.0: # %entry @@ -167,14 +167,14 @@ ; ; BDVER12-LABEL: lshift_cl: ; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33] -; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.33] -; BDVER12-NEXT: shlq %cl, %rdi # sched: [3:1.50] -; BDVER12-NEXT: negl %ecx # sched: [1:0.33] +; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50] +; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50] +; BDVER12-NEXT: shlq %cl, %rdi # sched: [1:0.50] +; BDVER12-NEXT: negl %ecx # sched: [1:0.50] ; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shrq %cl, %rax # sched: [3:1.50] -; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: shrq %cl, %rax # sched: [1:0.50] +; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift_cl: ; BTVER2: # %bb.0: # %entry @@ -211,11 +211,11 @@ ; ; BDVER12-LABEL: rshift_cl_optsize: ; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33] -; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33] +; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50] +; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50] ; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: rshift_cl_optsize: ; BTVER2: # %bb.0: # %entry @@ -243,14 +243,14 @@ ; ; BDVER12-LABEL: rshift_cl: ; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33] -; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.33] -; BDVER12-NEXT: shrq %cl, %rdi # sched: [3:1.50] -; BDVER12-NEXT: negl %ecx # sched: [1:0.33] +; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50] +; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50] +; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50] +; BDVER12-NEXT: negl %ecx # sched: [1:0.50] ; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shlq %cl, %rax # sched: [3:1.50] -; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50] +; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: rshift_cl: ; BTVER2: # %bb.0: # %entry @@ -287,10 +287,10 @@ ; ; BDVER12-LABEL: lshift_mem_cl_optsize: ; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.33] +; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.50] ; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [4:11.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift_mem_cl_optsize: ; BTVER2: # %bb.0: # %entry @@ -318,15 +318,15 @@ ; ; BDVER12-LABEL: lshift_mem_cl: ; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.33] ; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] -; BDVER12-NEXT: shlq %cl, %rax # sched: [3:1.50] -; BDVER12-NEXT: negl %ecx # sched: [1:0.33] +; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.50] +; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50] +; BDVER12-NEXT: negl %ecx # sched: [1:0.50] ; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx -; BDVER12-NEXT: shrq %cl, %rdi # sched: [3:1.50] -; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.33] -; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50] +; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.50] +; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift_mem_cl: ; BTVER2: # %bb.0: # %entry @@ -358,11 +358,11 @@ ; BDVER12-LABEL: lshift_mem: ; BDVER12: # %bb.0: # %entry ; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] -; BDVER12-NEXT: shlq $10, %rax # sched: [1:0.50] ; BDVER12-NEXT: shrq $54, %rdi # sched: [1:0.50] -; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.33] -; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: shlq $10, %rax # sched: [1:0.50] +; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.50] +; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift_mem: ; BTVER2: # %bb.0: # %entry @@ -389,8 +389,8 @@ ; ; BDVER12-LABEL: lshift_mem_optsize: ; BDVER12: # %bb.0: # %entry -; BDVER12-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [4:11.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift_mem_optsize: ; BTVER2: # %bb.0: # %entry @@ -418,9 +418,9 @@ ; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] ; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50] ; BDVER12-NEXT: shrq $54, %rax # sched: [1:0.50] -; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33] -; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50] +; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift_mem_b: ; BTVER2: # %bb.0: # %entry @@ -450,9 +450,9 @@ ; BDVER12-LABEL: lshift_mem_b_optsize: ; BDVER12: # %bb.0: # %entry ; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] -; BDVER12-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67] -; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: shrdq $54, %rdi, %rax # sched: [4:3.00] +; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:0.50] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: lshift_mem_b_optsize: ; BTVER2: # %bb.0: # %entry Index: llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll +++ llvm/trunk/test/CodeGen/X86/schedule-x86_32.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -81,9 +81,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: aaa # sched: [100:0.33] +; BDVER2-NEXT: aaa # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_aaa: ; BTVER2: # %bb.0: @@ -181,10 +181,10 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: aad # sched: [100:0.33] -; BDVER2-NEXT: aad $16 # sched: [100:0.33] +; BDVER2-NEXT: aad # sched: [100:0.50] +; BDVER2-NEXT: aad $16 # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_aad: ; BTVER2: # %bb.0: @@ -284,10 +284,10 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: aam # sched: [100:0.33] -; BDVER2-NEXT: aam $16 # sched: [100:0.33] +; BDVER2-NEXT: aam # sched: [100:0.50] +; BDVER2-NEXT: aam $16 # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_aam: ; BTVER2: # %bb.0: @@ -379,9 +379,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: aas # sched: [100:0.33] +; BDVER2-NEXT: aas # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_aas: ; BTVER2: # %bb.0: @@ -480,9 +480,9 @@ ; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.33] +; BDVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_arpl: ; BTVER2: # %bb.0: @@ -644,7 +644,7 @@ ; ; BDVER2-LABEL: test_bound: ; BDVER2: # %bb.0: -; BDVER2-NEXT: pushl %esi # sched: [5:1.00] +; BDVER2-NEXT: pushl %esi # sched: [1:0.50] ; BDVER2-NEXT: .cfi_def_cfa_offset 8 ; BDVER2-NEXT: .cfi_offset %esi, -8 ; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] @@ -652,12 +652,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: bound %ax, (%esi) # sched: [100:0.33] -; BDVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.33] +; BDVER2-NEXT: bound %ax, (%esi) # sched: [100:0.50] +; BDVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: popl %esi # sched: [6:0.50] +; BDVER2-NEXT: popl %esi # sched: [5:0.50] ; BDVER2-NEXT: .cfi_def_cfa_offset 4 -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_bound: ; BTVER2: # %bb.0: @@ -767,9 +767,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: daa # sched: [100:0.33] +; BDVER2-NEXT: daa # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_daa: ; BTVER2: # %bb.0: @@ -859,9 +859,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: das # sched: [100:0.33] +; BDVER2-NEXT: das # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_das: ; BTVER2: # %bb.0: @@ -968,10 +968,10 @@ ; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: decw %ax # sched: [1:0.33] -; BDVER2-NEXT: decw (%ecx) # sched: [7:1.00] +; BDVER2-NEXT: decw %ax # sched: [1:0.50] +; BDVER2-NEXT: decw (%ecx) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_dec16: ; BTVER2: # %bb.0: @@ -1081,10 +1081,10 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: decl %eax # sched: [1:0.33] -; BDVER2-NEXT: decl (%ecx) # sched: [7:1.00] +; BDVER2-NEXT: decl %eax # sched: [1:0.50] +; BDVER2-NEXT: decl (%ecx) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_dec32: ; BTVER2: # %bb.0: @@ -1195,10 +1195,10 @@ ; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: incw %ax # sched: [1:0.33] -; BDVER2-NEXT: incw (%ecx) # sched: [7:1.00] +; BDVER2-NEXT: incw %ax # sched: [1:0.50] +; BDVER2-NEXT: incw (%ecx) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_inc16: ; BTVER2: # %bb.0: @@ -1308,10 +1308,10 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: incl %eax # sched: [1:0.33] -; BDVER2-NEXT: incl (%ecx) # sched: [7:1.00] +; BDVER2-NEXT: incl %eax # sched: [1:0.50] +; BDVER2-NEXT: incl (%ecx) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_inc32: ; BTVER2: # %bb.0: @@ -1396,9 +1396,9 @@ ; BDVER2-LABEL: test_into: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: into # sched: [100:0.33] +; BDVER2-NEXT: into # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_into: ; BTVER2: # %bb.0: @@ -1496,10 +1496,10 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: JXTGT: -; BDVER2-NEXT: jcxz JXTGT # sched: [2:1.00] -; BDVER2-NEXT: jecxz JXTGT # sched: [2:1.00] +; BDVER2-NEXT: jcxz JXTGT # sched: [1:1.00] +; BDVER2-NEXT: jecxz JXTGT # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_jcxz_jecxz: ; BTVER2: # %bb.0: @@ -1584,9 +1584,9 @@ ; BDVER2-LABEL: test_leave: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: leave # sched: [7:0.67] +; BDVER2-NEXT: leave # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_leave: ; BTVER2: # %bb.0: @@ -1747,19 +1747,19 @@ ; BDVER2-LABEL: test_pop_push: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: popl %ds # sched: [100:0.33] -; BDVER2-NEXT: popl %es # sched: [100:0.33] -; BDVER2-NEXT: popl %ss # sched: [100:0.33] -; BDVER2-NEXT: popl %fs # sched: [100:0.33] -; BDVER2-NEXT: popl %gs # sched: [100:0.33] -; BDVER2-NEXT: pushl %cs # sched: [100:0.33] -; BDVER2-NEXT: pushl %ds # sched: [100:0.33] -; BDVER2-NEXT: pushl %es # sched: [100:0.33] -; BDVER2-NEXT: pushl %ss # sched: [100:0.33] -; BDVER2-NEXT: pushl %fs # sched: [100:0.33] -; BDVER2-NEXT: pushl %gs # sched: [100:0.33] +; BDVER2-NEXT: popl %ds # sched: [100:0.50] +; BDVER2-NEXT: popl %es # sched: [100:0.50] +; BDVER2-NEXT: popl %ss # sched: [100:0.50] +; BDVER2-NEXT: popl %fs # sched: [100:0.50] +; BDVER2-NEXT: popl %gs # sched: [100:0.50] +; BDVER2-NEXT: pushl %cs # sched: [100:0.50] +; BDVER2-NEXT: pushl %ds # sched: [100:0.50] +; BDVER2-NEXT: pushl %es # sched: [100:0.50] +; BDVER2-NEXT: pushl %ss # sched: [100:0.50] +; BDVER2-NEXT: pushl %fs # sched: [100:0.50] +; BDVER2-NEXT: pushl %gs # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_pop_push: ; BTVER2: # %bb.0: @@ -1922,15 +1922,15 @@ ; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: popw %ax # sched: [6:0.50] -; BDVER2-NEXT: popw (%ecx) # sched: [6:0.50] -; BDVER2-NEXT: pushw %ax # sched: [5:1.00] -; BDVER2-NEXT: pushw (%ecx) # sched: [5:1.00] +; BDVER2-NEXT: popw %ax # sched: [5:0.50] +; BDVER2-NEXT: popw (%ecx) # sched: [6:1.00] +; BDVER2-NEXT: pushw %ax # sched: [1:0.50] +; BDVER2-NEXT: pushw (%ecx) # sched: [6:1.00] ; BDVER2-NEXT: pushw $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: pushw $7 # sched: [1:1.00] +; BDVER2-NEXT: # sched: [1:0.50] +; BDVER2-NEXT: pushw $7 # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_pop_push_16: ; BTVER2: # %bb.0: @@ -2089,15 +2089,15 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: popl %eax # sched: [6:0.50] -; BDVER2-NEXT: popl (%ecx) # sched: [6:0.50] -; BDVER2-NEXT: pushl %eax # sched: [5:1.00] -; BDVER2-NEXT: pushl (%ecx) # sched: [5:1.00] +; BDVER2-NEXT: popl %eax # sched: [5:0.50] +; BDVER2-NEXT: popl (%ecx) # sched: [6:1.00] +; BDVER2-NEXT: pushl %eax # sched: [1:0.50] +; BDVER2-NEXT: pushl (%ecx) # sched: [6:1.00] ; BDVER2-NEXT: pushl $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: pushl $7 # sched: [1:1.00] +; BDVER2-NEXT: # sched: [1:0.50] +; BDVER2-NEXT: pushl $7 # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_pop_push_32: ; BTVER2: # %bb.0: @@ -2218,10 +2218,10 @@ ; BDVER2-NEXT: #APP ; BDVER2-NEXT: popal # sched: [5:0.50] ; BDVER2-NEXT: popfl # sched: [5:0.50] -; BDVER2-NEXT: pushal # sched: [1:1.00] -; BDVER2-NEXT: pushfl # sched: [1:1.00] +; BDVER2-NEXT: pushal # sched: [1:0.50] +; BDVER2-NEXT: pushfl # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_popa_popf_pusha_pushf: ; BTVER2: # %bb.0: @@ -2344,14 +2344,14 @@ ; BDVER2-LABEL: test_ret: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; BDVER2-NEXT: retl $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: lretl # sched: [6:1.00] +; BDVER2-NEXT: # sched: [5:1.00] +; BDVER2-NEXT: lretl # sched: [5:1.00] ; BDVER2-NEXT: lretl $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_ret: ; BTVER2: # %bb.0: @@ -2440,9 +2440,9 @@ ; BDVER2-LABEL: test_salc: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: salc # sched: [1:0.33] +; BDVER2-NEXT: salc # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_salc: ; BTVER2: # %bb.0: @@ -2567,11 +2567,11 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgl %eax, %eax # sched: [2:1.00] -; BDVER2-NEXT: xchgl %ecx, %eax # sched: [2:1.00] -; BDVER2-NEXT: xchgl %eax, (%edx) # sched: [6:1.00] +; BDVER2-NEXT: xchgl %eax, %eax # sched: [1:1.00] +; BDVER2-NEXT: xchgl %ecx, %eax # sched: [1:1.00] +; BDVER2-NEXT: xchgl %eax, (%edx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_xchg_32: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll +++ llvm/trunk/test/CodeGen/X86/schedule-x86_64.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -112,14 +112,14 @@ ; BDVER2-LABEL: test_adc_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: adcb $7, %al # sched: [2:0.67] -; BDVER2-NEXT: adcb $7, %dil # sched: [2:0.67] -; BDVER2-NEXT: adcb $7, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: adcb %dl, %dil # sched: [2:0.67] -; BDVER2-NEXT: adcb %dil, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: adcb (%rsi), %dil # sched: [7:0.67] +; BDVER2-NEXT: adcb $7, %al # sched: [1:1.00] +; BDVER2-NEXT: adcb $7, %dil # sched: [1:1.00] +; BDVER2-NEXT: adcb $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: adcb %dl, %dil # sched: [1:1.00] +; BDVER2-NEXT: adcb %dil, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: adcb (%rsi), %dil # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_adc_8: ; BTVER2: # %bb.0: @@ -288,18 +288,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: adcw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: adcw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: adcw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [9:1.00] -; BDVER2-NEXT: adcw $7, %di # sched: [2:0.67] -; BDVER2-NEXT: adcw $7, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: adcw %dx, %di # sched: [2:0.67] -; BDVER2-NEXT: adcw %di, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: adcw (%rsi), %di # sched: [7:0.67] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: adcw $7, %di # sched: [1:1.00] +; BDVER2-NEXT: adcw $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: adcw %dx, %di # sched: [1:1.00] +; BDVER2-NEXT: adcw %di, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: adcw (%rsi), %di # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_adc_16: ; BTVER2: # %bb.0: @@ -478,18 +478,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: adcl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: adcl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [9:1.00] -; BDVER2-NEXT: adcl $7, %edi # sched: [2:0.67] -; BDVER2-NEXT: adcl $7, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: adcl %edx, %edi # sched: [2:0.67] -; BDVER2-NEXT: adcl %edi, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: adcl (%rsi), %edi # sched: [7:0.67] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: adcl $7, %edi # sched: [1:1.00] +; BDVER2-NEXT: adcl $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: adcl %edx, %edi # sched: [1:1.00] +; BDVER2-NEXT: adcl %edi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: adcl (%rsi), %edi # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_adc_32: ; BTVER2: # %bb.0: @@ -668,18 +668,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: adcq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: adcq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [9:1.00] -; BDVER2-NEXT: adcq $7, %rdi # sched: [2:0.67] -; BDVER2-NEXT: adcq $7, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: adcq %rdx, %rdi # sched: [2:0.67] -; BDVER2-NEXT: adcq %rdi, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: adcq (%rsi), %rdi # sched: [7:0.67] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: adcq $7, %rdi # sched: [1:1.00] +; BDVER2-NEXT: adcq $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: adcq %rdx, %rdi # sched: [1:1.00] +; BDVER2-NEXT: adcq %rdi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: adcq (%rsi), %rdi # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_adc_64: ; BTVER2: # %bb.0: @@ -818,14 +818,14 @@ ; BDVER2-LABEL: test_add_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: addb $7, %al # sched: [1:0.33] -; BDVER2-NEXT: addb $7, %dil # sched: [1:0.33] -; BDVER2-NEXT: addb $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: addb %dl, %dil # sched: [1:0.33] -; BDVER2-NEXT: addb %dil, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: addb (%rsi), %dil # sched: [6:0.50] +; BDVER2-NEXT: addb $7, %al # sched: [1:0.50] +; BDVER2-NEXT: addb $7, %dil # sched: [1:0.50] +; BDVER2-NEXT: addb $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: addb %dl, %dil # sched: [1:0.50] +; BDVER2-NEXT: addb %dil, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: addb (%rsi), %dil # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_add_8: ; BTVER2: # %bb.0: @@ -994,18 +994,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: addw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: addw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: addw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: addw $7, %di # sched: [1:0.33] -; BDVER2-NEXT: addw $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: addw %dx, %di # sched: [1:0.33] -; BDVER2-NEXT: addw %di, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: addw (%rsi), %di # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: addw $7, %di # sched: [1:0.50] +; BDVER2-NEXT: addw $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: addw %dx, %di # sched: [1:0.50] +; BDVER2-NEXT: addw %di, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: addw (%rsi), %di # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_add_16: ; BTVER2: # %bb.0: @@ -1184,18 +1184,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: addl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: addl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: addl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: addl $7, %edi # sched: [1:0.33] -; BDVER2-NEXT: addl $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: addl %edx, %edi # sched: [1:0.33] -; BDVER2-NEXT: addl %edi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: addl (%rsi), %edi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: addl $7, %edi # sched: [1:0.50] +; BDVER2-NEXT: addl $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: addl %edx, %edi # sched: [1:0.50] +; BDVER2-NEXT: addl %edi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: addl (%rsi), %edi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_add_32: ; BTVER2: # %bb.0: @@ -1374,18 +1374,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: addq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: addq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: addq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: addq $7, %rdi # sched: [1:0.33] -; BDVER2-NEXT: addq $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: addq %rdx, %rdi # sched: [1:0.33] -; BDVER2-NEXT: addq %rdi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: addq (%rsi), %rdi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: addq $7, %rdi # sched: [1:0.50] +; BDVER2-NEXT: addq $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: addq %rdx, %rdi # sched: [1:0.50] +; BDVER2-NEXT: addq %rdi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: addq (%rsi), %rdi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_add_64: ; BTVER2: # %bb.0: @@ -1524,14 +1524,14 @@ ; BDVER2-LABEL: test_and_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: andb $7, %al # sched: [1:0.33] -; BDVER2-NEXT: andb $7, %dil # sched: [1:0.33] -; BDVER2-NEXT: andb $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: andb %dl, %dil # sched: [1:0.33] -; BDVER2-NEXT: andb %dil, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: andb (%rsi), %dil # sched: [6:0.50] +; BDVER2-NEXT: andb $7, %al # sched: [1:0.50] +; BDVER2-NEXT: andb $7, %dil # sched: [1:0.50] +; BDVER2-NEXT: andb $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: andb %dl, %dil # sched: [1:0.50] +; BDVER2-NEXT: andb %dil, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: andb (%rsi), %dil # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_and_8: ; BTVER2: # %bb.0: @@ -1700,18 +1700,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: andw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: andw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: andw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: andw $7, %di # sched: [1:0.33] -; BDVER2-NEXT: andw $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: andw %dx, %di # sched: [1:0.33] -; BDVER2-NEXT: andw %di, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: andw (%rsi), %di # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: andw $7, %di # sched: [1:0.50] +; BDVER2-NEXT: andw $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: andw %dx, %di # sched: [1:0.50] +; BDVER2-NEXT: andw %di, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: andw (%rsi), %di # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_and_16: ; BTVER2: # %bb.0: @@ -1890,18 +1890,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: andl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: andl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: andl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: andl $7, %edi # sched: [1:0.33] -; BDVER2-NEXT: andl $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: andl %edx, %edi # sched: [1:0.33] -; BDVER2-NEXT: andl %edi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: andl (%rsi), %edi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: andl $7, %edi # sched: [1:0.50] +; BDVER2-NEXT: andl $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: andl %edx, %edi # sched: [1:0.50] +; BDVER2-NEXT: andl %edi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: andl (%rsi), %edi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_and_32: ; BTVER2: # %bb.0: @@ -2080,18 +2080,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: andq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: andq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: andq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: andq $7, %rdi # sched: [1:0.33] -; BDVER2-NEXT: andq $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: andq %rdx, %rdi # sched: [1:0.33] -; BDVER2-NEXT: andq %rdi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: andq (%rsi), %rdi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: andq $7, %rdi # sched: [1:0.50] +; BDVER2-NEXT: andq $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: andq %rdx, %rdi # sched: [1:0.50] +; BDVER2-NEXT: andq %rdi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: andq (%rsi), %rdi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_and_64: ; BTVER2: # %bb.0: @@ -2214,12 +2214,12 @@ ; BDVER2-LABEL: test_bsf16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsfw %di, %ax # sched: [3:1.00] -; BDVER2-NEXT: bsfw (%rsi), %cx # sched: [8:1.00] +; BDVER2-NEXT: bsfw %di, %ax # sched: [3:2.00] +; BDVER2-NEXT: bsfw (%rsi), %cx # sched: [7:2.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] ; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bsf16: ; BTVER2: # %bb.0: @@ -2322,11 +2322,11 @@ ; BDVER2-LABEL: test_bsf32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsfl %edi, %eax # sched: [3:1.00] -; BDVER2-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00] +; BDVER2-NEXT: bsfl %edi, %eax # sched: [3:2.00] +; BDVER2-NEXT: bsfl (%rsi), %ecx # sched: [7:2.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bsf32: ; BTVER2: # %bb.0: @@ -2427,11 +2427,11 @@ ; BDVER2-LABEL: test_bsf64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsfq %rdi, %rax # sched: [3:1.00] -; BDVER2-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00] +; BDVER2-NEXT: bsfq %rdi, %rax # sched: [3:2.00] +; BDVER2-NEXT: bsfq (%rsi), %rcx # sched: [7:2.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bsf64: ; BTVER2: # %bb.0: @@ -2541,12 +2541,12 @@ ; BDVER2-LABEL: test_bsr16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsrw %di, %ax # sched: [3:1.00] -; BDVER2-NEXT: bsrw (%rsi), %cx # sched: [8:1.00] +; BDVER2-NEXT: bsrw %di, %ax # sched: [4:2.00] +; BDVER2-NEXT: bsrw (%rsi), %cx # sched: [8:2.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] ; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bsr16: ; BTVER2: # %bb.0: @@ -2649,11 +2649,11 @@ ; BDVER2-LABEL: test_bsr32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsrl %edi, %eax # sched: [3:1.00] -; BDVER2-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00] +; BDVER2-NEXT: bsrl %edi, %eax # sched: [4:2.00] +; BDVER2-NEXT: bsrl (%rsi), %ecx # sched: [8:2.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bsr32: ; BTVER2: # %bb.0: @@ -2754,11 +2754,11 @@ ; BDVER2-LABEL: test_bsr64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: bsrq %rdi, %rax # sched: [3:1.00] -; BDVER2-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00] +; BDVER2-NEXT: bsrq %rdi, %rax # sched: [4:2.00] +; BDVER2-NEXT: bsrq (%rsi), %rcx # sched: [8:2.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bsr64: ; BTVER2: # %bb.0: @@ -2835,9 +2835,9 @@ ; ; BDVER2-LABEL: test_bswap32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33] +; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50] ; BDVER2-NEXT: bswapl %eax # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bswap32: ; BTVER2: # %bb.0: @@ -2904,9 +2904,9 @@ ; ; BDVER2-LABEL: test_bswap64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33] -; BDVER2-NEXT: bswapq %rax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BDVER2-NEXT: bswapq %rax # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bswap64: ; BTVER2: # %bb.0: @@ -3104,23 +3104,23 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: btw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: btcw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: btrw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: btsw %si, %di # sched: [1:0.50] -; BDVER2-NEXT: btw %si, (%rdx) # sched: [9:1.00] -; BDVER2-NEXT: btcw %si, (%rdx) # sched: [9:1.00] -; BDVER2-NEXT: btrw %si, (%rdx) # sched: [9:1.00] -; BDVER2-NEXT: btsw %si, (%rdx) # sched: [9:1.00] +; BDVER2-NEXT: btcw %si, %di # sched: [2:0.50] +; BDVER2-NEXT: btrw %si, %di # sched: [2:0.50] +; BDVER2-NEXT: btsw %si, %di # sched: [2:0.50] +; BDVER2-NEXT: btw %si, (%rdx) # sched: [5:0.50] +; BDVER2-NEXT: btcw %si, (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: btrw %si, (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: btsw %si, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: btw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: btcw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: btrw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: btsw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: btw $7, (%rdx) # sched: [6:0.50] +; BDVER2-NEXT: btcw $7, %di # sched: [2:0.50] +; BDVER2-NEXT: btrw $7, %di # sched: [2:0.50] +; BDVER2-NEXT: btsw $7, %di # sched: [2:0.50] +; BDVER2-NEXT: btw $7, (%rdx) # sched: [5:0.50] ; BDVER2-NEXT: btcw $7, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: btrw $7, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: btsw $7, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bt_btc_btr_bts_16: ; BTVER2: # %bb.0: @@ -3349,23 +3349,23 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: btl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: btcl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: btrl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: btsl %esi, %edi # sched: [1:0.50] -; BDVER2-NEXT: btl %esi, (%rdx) # sched: [9:1.00] -; BDVER2-NEXT: btcl %esi, (%rdx) # sched: [9:1.00] -; BDVER2-NEXT: btrl %esi, (%rdx) # sched: [9:1.00] -; BDVER2-NEXT: btsl %esi, (%rdx) # sched: [9:1.00] +; BDVER2-NEXT: btcl %esi, %edi # sched: [2:0.50] +; BDVER2-NEXT: btrl %esi, %edi # sched: [2:0.50] +; BDVER2-NEXT: btsl %esi, %edi # sched: [2:0.50] +; BDVER2-NEXT: btl %esi, (%rdx) # sched: [5:0.50] +; BDVER2-NEXT: btcl %esi, (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: btrl %esi, (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: btsl %esi, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: btl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: btcl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: btrl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: btsl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: btl $7, (%rdx) # sched: [6:0.50] +; BDVER2-NEXT: btcl $7, %edi # sched: [2:0.50] +; BDVER2-NEXT: btrl $7, %edi # sched: [2:0.50] +; BDVER2-NEXT: btsl $7, %edi # sched: [2:0.50] +; BDVER2-NEXT: btl $7, (%rdx) # sched: [5:0.50] ; BDVER2-NEXT: btcl $7, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: btrl $7, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: btsl $7, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bt_btc_btr_bts_32: ; BTVER2: # %bb.0: @@ -3594,23 +3594,23 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: btq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btcq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btrq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btsq %rsi, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btq %rsi, (%rdx) # sched: [9:1.00] -; BDVER2-NEXT: btcq %rsi, (%rdx) # sched: [9:1.00] -; BDVER2-NEXT: btrq %rsi, (%rdx) # sched: [9:1.00] -; BDVER2-NEXT: btsq %rsi, (%rdx) # sched: [9:1.00] +; BDVER2-NEXT: btcq %rsi, %rdi # sched: [2:0.50] +; BDVER2-NEXT: btrq %rsi, %rdi # sched: [2:0.50] +; BDVER2-NEXT: btsq %rsi, %rdi # sched: [2:0.50] +; BDVER2-NEXT: btq %rsi, (%rdx) # sched: [5:0.50] +; BDVER2-NEXT: btcq %rsi, (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: btrq %rsi, (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: btsq %rsi, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: btq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btcq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btrq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btsq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: btq $7, (%rdx) # sched: [6:0.50] +; BDVER2-NEXT: btcq $7, %rdi # sched: [2:0.50] +; BDVER2-NEXT: btrq $7, %rdi # sched: [2:0.50] +; BDVER2-NEXT: btsq $7, %rdi # sched: [2:0.50] +; BDVER2-NEXT: btq $7, (%rdx) # sched: [5:0.50] ; BDVER2-NEXT: btcq $7, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: btrq $7, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: btsq $7, (%rdx) # sched: [7:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_bt_btc_btr_bts_64: ; BTVER2: # %bb.0: @@ -3761,14 +3761,14 @@ ; BDVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cbtw # sched: [1:0.33] +; BDVER2-NEXT: cbtw # sched: [1:0.50] ; BDVER2-NEXT: cltd # sched: [1:0.50] -; BDVER2-NEXT: cltq # sched: [1:0.33] +; BDVER2-NEXT: cltq # sched: [1:0.50] ; BDVER2-NEXT: cqto # sched: [1:0.50] -; BDVER2-NEXT: cwtd # sched: [2:1.00] -; BDVER2-NEXT: cwtl # sched: [1:0.33] +; BDVER2-NEXT: cwtd # sched: [1:0.50] +; BDVER2-NEXT: cwtl # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde: ; BTVER2: # %bb.0: @@ -3873,11 +3873,11 @@ ; BDVER2-LABEL: test_clc_cld_cmc: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: clc # sched: [1:0.25] -; BDVER2-NEXT: cld # sched: [1:0.33] -; BDVER2-NEXT: cmc # sched: [1:0.33] +; BDVER2-NEXT: clc # sched: [1:0.50] +; BDVER2-NEXT: cld # sched: [1:0.50] +; BDVER2-NEXT: cmc # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_clc_cld_cmc: ; BTVER2: # %bb.0: @@ -4000,14 +4000,14 @@ ; BDVER2-LABEL: test_cmp_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpb $7, %al # sched: [1:0.33] -; BDVER2-NEXT: cmpb $7, %dil # sched: [1:0.33] -; BDVER2-NEXT: cmpb $7, (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: cmpb %dil, %dil # sched: [1:0.33] -; BDVER2-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: cmpb (%rsi), %dil # sched: [6:0.50] +; BDVER2-NEXT: cmpb $7, %al # sched: [1:0.50] +; BDVER2-NEXT: cmpb $7, %dil # sched: [1:0.50] +; BDVER2-NEXT: cmpb $7, (%rsi) # sched: [5:0.50] +; BDVER2-NEXT: cmpb %dil, %dil # sched: [1:0.50] +; BDVER2-NEXT: cmpb %dil, (%rsi) # sched: [5:0.50] +; BDVER2-NEXT: cmpb (%rsi), %dil # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmp_8: ; BTVER2: # %bb.0: @@ -4176,18 +4176,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: cmpw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: cmpw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: cmpw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [6:0.50] -; BDVER2-NEXT: cmpw $7, %di # sched: [1:0.33] -; BDVER2-NEXT: cmpw $7, (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: cmpw %di, %di # sched: [1:0.33] -; BDVER2-NEXT: cmpw %di, (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: cmpw (%rsi), %di # sched: [6:0.50] +; BDVER2-NEXT: # sched: [5:0.50] +; BDVER2-NEXT: cmpw $7, %di # sched: [1:0.50] +; BDVER2-NEXT: cmpw $7, (%rsi) # sched: [5:0.50] +; BDVER2-NEXT: cmpw %di, %di # sched: [1:0.50] +; BDVER2-NEXT: cmpw %di, (%rsi) # sched: [5:0.50] +; BDVER2-NEXT: cmpw (%rsi), %di # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmp_16: ; BTVER2: # %bb.0: @@ -4366,18 +4366,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: cmpl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: cmpl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:0.50] -; BDVER2-NEXT: cmpl $7, %edi # sched: [1:0.33] -; BDVER2-NEXT: cmpl $7, (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: cmpl %edi, %edi # sched: [1:0.33] -; BDVER2-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: cmpl (%rsi), %edi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [5:0.50] +; BDVER2-NEXT: cmpl $7, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmpl $7, (%rsi) # sched: [5:0.50] +; BDVER2-NEXT: cmpl %edi, %edi # sched: [1:0.50] +; BDVER2-NEXT: cmpl %edi, (%rsi) # sched: [5:0.50] +; BDVER2-NEXT: cmpl (%rsi), %edi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmp_32: ; BTVER2: # %bb.0: @@ -4556,18 +4556,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: cmpq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: cmpq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:0.50] -; BDVER2-NEXT: cmpq $7, %rdi # sched: [1:0.33] -; BDVER2-NEXT: cmpq $7, (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: cmpq %rdi, %rdi # sched: [1:0.33] -; BDVER2-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [5:0.50] +; BDVER2-NEXT: cmpq $7, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmpq $7, (%rsi) # sched: [5:0.50] +; BDVER2-NEXT: cmpq %rdi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: cmpq %rdi, (%rsi) # sched: [5:0.50] +; BDVER2-NEXT: cmpq (%rsi), %rdi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmp_64: ; BTVER2: # %bb.0: @@ -4690,12 +4690,12 @@ ; BDVER2-LABEL: test_cmps: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [8:1.00] -; BDVER2-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [8:1.00] -; BDVER2-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [8:1.00] -; BDVER2-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [8:1.00] +; BDVER2-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.50] +; BDVER2-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.50] +; BDVER2-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.50] +; BDVER2-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmps: ; BTVER2: # %bb.0: @@ -4788,10 +4788,10 @@ ; BDVER2-LABEL: test_cmpxchg_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchgb %dil, %sil # sched: [5:1.33] -; BDVER2-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:2.00] +; BDVER2-NEXT: cmpxchgb %dil, %sil # sched: [3:1.00] +; BDVER2-NEXT: cmpxchgb %dil, (%rdx) # sched: [3:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmpxchg_8: ; BTVER2: # %bb.0: @@ -4879,10 +4879,10 @@ ; BDVER2-LABEL: test_cmpxchg_16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchgw %di, %si # sched: [5:1.33] -; BDVER2-NEXT: cmpxchgw %di, (%rdx) # sched: [8:2.00] +; BDVER2-NEXT: cmpxchgw %di, %si # sched: [3:1.00] +; BDVER2-NEXT: cmpxchgw %di, (%rdx) # sched: [3:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmpxchg_16: ; BTVER2: # %bb.0: @@ -4970,10 +4970,10 @@ ; BDVER2-LABEL: test_cmpxchg_32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchgl %edi, %esi # sched: [5:1.33] -; BDVER2-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:2.00] +; BDVER2-NEXT: cmpxchgl %edi, %esi # sched: [3:1.00] +; BDVER2-NEXT: cmpxchgl %edi, (%rdx) # sched: [3:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmpxchg_32: ; BTVER2: # %bb.0: @@ -5061,10 +5061,10 @@ ; BDVER2-LABEL: test_cmpxchg_64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.33] -; BDVER2-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:2.00] +; BDVER2-NEXT: cmpxchgq %rdi, %rsi # sched: [3:1.00] +; BDVER2-NEXT: cmpxchgq %rdi, (%rdx) # sched: [3:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmpxchg_64: ; BTVER2: # %bb.0: @@ -5152,10 +5152,10 @@ ; BDVER2-LABEL: test_cmpxchg8b_cmpxchg16b: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cmpxchg8b (%rdi) # sched: [6:1.00] -; BDVER2-NEXT: cmpxchg16b (%rdi) # sched: [6:1.00] +; BDVER2-NEXT: cmpxchg8b (%rdi) # sched: [3:1.00] +; BDVER2-NEXT: cmpxchg16b (%rdi) # sched: [3:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cmpxchg8b_cmpxchg16b: ; BTVER2: # %bb.0: @@ -5236,9 +5236,9 @@ ; BDVER2-LABEL: test_cpuid: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: cpuid # sched: [100:0.33] +; BDVER2-NEXT: cpuid # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_cpuid: ; BTVER2: # %bb.0: @@ -5325,10 +5325,10 @@ ; BDVER2-LABEL: test_dec8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: decb %dil # sched: [1:0.33] -; BDVER2-NEXT: decb (%rsi) # sched: [7:1.00] +; BDVER2-NEXT: decb %dil # sched: [1:0.50] +; BDVER2-NEXT: decb (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_dec8: ; BTVER2: # %bb.0: @@ -5416,10 +5416,10 @@ ; BDVER2-LABEL: test_dec16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: decw %di # sched: [1:0.33] -; BDVER2-NEXT: decw (%rsi) # sched: [7:1.00] +; BDVER2-NEXT: decw %di # sched: [1:0.50] +; BDVER2-NEXT: decw (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_dec16: ; BTVER2: # %bb.0: @@ -5507,10 +5507,10 @@ ; BDVER2-LABEL: test_dec32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: decl %edi # sched: [1:0.33] -; BDVER2-NEXT: decl (%rsi) # sched: [7:1.00] +; BDVER2-NEXT: decl %edi # sched: [1:0.50] +; BDVER2-NEXT: decl (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_dec32: ; BTVER2: # %bb.0: @@ -5598,10 +5598,10 @@ ; BDVER2-LABEL: test_dec64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: decq %rdi # sched: [1:0.33] -; BDVER2-NEXT: decq (%rsi) # sched: [7:1.00] +; BDVER2-NEXT: decq %rdi # sched: [1:0.50] +; BDVER2-NEXT: decq (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_dec64: ; BTVER2: # %bb.0: @@ -5756,16 +5756,16 @@ ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: divb %dil # sched: [25:10.00] -; BDVER2-NEXT: divb (%r8) # sched: [30:10.00] -; BDVER2-NEXT: divw %si # sched: [25:10.00] -; BDVER2-NEXT: divw (%r9) # sched: [30:10.00] -; BDVER2-NEXT: divl %edx # sched: [25:10.00] -; BDVER2-NEXT: divl (%rax) # sched: [30:10.00] -; BDVER2-NEXT: divq %rcx # sched: [25:10.00] -; BDVER2-NEXT: divq (%r10) # sched: [30:10.00] +; BDVER2-NEXT: divb %dil # sched: [12:12.00] +; BDVER2-NEXT: divb (%r8) # sched: [16:12.00] +; BDVER2-NEXT: divw %si # sched: [15:15.00] +; BDVER2-NEXT: divw (%r9) # sched: [19:15.00] +; BDVER2-NEXT: divl %edx # sched: [14:14.00] +; BDVER2-NEXT: divl (%rax) # sched: [18:14.00] +; BDVER2-NEXT: divq %rcx # sched: [14:14.00] +; BDVER2-NEXT: divq (%r10) # sched: [18:14.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_div: ; BTVER2: # %bb.0: @@ -5871,9 +5871,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: enter $7, $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [100:0.33] +; BDVER2-NEXT: # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_enter: ; BTVER2: # %bb.0: @@ -6028,16 +6028,16 @@ ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: idivb %dil # sched: [25:10.00] -; BDVER2-NEXT: idivb (%r8) # sched: [30:10.00] -; BDVER2-NEXT: idivw %si # sched: [25:10.00] -; BDVER2-NEXT: idivw (%r9) # sched: [30:10.00] -; BDVER2-NEXT: idivl %edx # sched: [25:10.00] -; BDVER2-NEXT: idivl (%rax) # sched: [30:10.00] -; BDVER2-NEXT: idivq %rcx # sched: [25:10.00] -; BDVER2-NEXT: idivq (%r10) # sched: [30:10.00] +; BDVER2-NEXT: idivb %dil # sched: [12:12.00] +; BDVER2-NEXT: idivb (%r8) # sched: [16:12.00] +; BDVER2-NEXT: idivw %si # sched: [15:17.00] +; BDVER2-NEXT: idivw (%r9) # sched: [19:17.00] +; BDVER2-NEXT: idivl %edx # sched: [14:25.00] +; BDVER2-NEXT: idivl (%rax) # sched: [18:25.00] +; BDVER2-NEXT: idivq %rcx # sched: [14:14.00] +; BDVER2-NEXT: idivq (%r10) # sched: [18:14.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_idiv: ; BTVER2: # %bb.0: @@ -6142,10 +6142,10 @@ ; BDVER2-LABEL: test_imul_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: imulb %dil # sched: [3:1.00] +; BDVER2-NEXT: imulb %dil # sched: [4:1.00] ; BDVER2-NEXT: imulb (%rsi) # sched: [8:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_imul_8: ; BTVER2: # %bb.0: @@ -6297,18 +6297,18 @@ ; BDVER2-LABEL: test_imul_16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: imulw %di # sched: [4:1.33] -; BDVER2-NEXT: imulw (%rsi) # sched: [9:1.33] -; BDVER2-NEXT: imulw %dx, %di # sched: [3:1.00] +; BDVER2-NEXT: imulw %di # sched: [4:1.00] +; BDVER2-NEXT: imulw (%rsi) # sched: [8:1.00] +; BDVER2-NEXT: imulw %dx, %di # sched: [4:1.00] ; BDVER2-NEXT: imulw (%rsi), %di # sched: [8:1.00] ; BDVER2-NEXT: imulw $511, %di, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [4:1.00] +; BDVER2-NEXT: # sched: [5:1.00] ; BDVER2-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [8:1.00] -; BDVER2-NEXT: imulw $7, %di, %di # sched: [4:1.00] -; BDVER2-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] +; BDVER2-NEXT: # sched: [9:1.00] +; BDVER2-NEXT: imulw $7, %di, %di # sched: [5:1.00] +; BDVER2-NEXT: imulw $7, (%rsi), %di # sched: [9:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_imul_16: ; BTVER2: # %bb.0: @@ -6477,17 +6477,17 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: imull %edi # sched: [4:1.00] -; BDVER2-NEXT: imull (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: imull %edx, %edi # sched: [3:1.00] +; BDVER2-NEXT: imull (%rsi) # sched: [8:1.00] +; BDVER2-NEXT: imull %edx, %edi # sched: [4:1.00] ; BDVER2-NEXT: imull (%rsi), %edi # sched: [8:1.00] ; BDVER2-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [3:1.00] +; BDVER2-NEXT: # sched: [4:1.00] ; BDVER2-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 ; BDVER2-NEXT: # sched: [8:1.00] -; BDVER2-NEXT: imull $7, %edi, %edi # sched: [3:1.00] +; BDVER2-NEXT: imull $7, %edi, %edi # sched: [4:1.00] ; BDVER2-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_imul_32: ; BTVER2: # %bb.0: @@ -6655,18 +6655,18 @@ ; BDVER2-LABEL: test_imul_64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: imulq %rdi # sched: [4:1.00] -; BDVER2-NEXT: imulq (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: imulq %rdx, %rdi # sched: [3:1.00] -; BDVER2-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] +; BDVER2-NEXT: imulq %rdi # sched: [6:4.00] +; BDVER2-NEXT: imulq (%rsi) # sched: [10:4.00] +; BDVER2-NEXT: imulq %rdx, %rdi # sched: [6:4.00] +; BDVER2-NEXT: imulq (%rsi), %rdi # sched: [10:4.00] ; BDVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [3:1.00] +; BDVER2-NEXT: # sched: [6:4.00] ; BDVER2-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [8:1.00] -; BDVER2-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00] -; BDVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00] +; BDVER2-NEXT: # sched: [10:4.00] +; BDVER2-NEXT: imulq $7, %rdi, %rdi # sched: [6:4.00] +; BDVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [10:4.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_imul_64: ; BTVER2: # %bb.0: @@ -6803,14 +6803,14 @@ ; BDVER2-LABEL: test_in: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: inb $7, %al # sched: [100:0.33] -; BDVER2-NEXT: inw $7, %ax # sched: [100:0.33] -; BDVER2-NEXT: inl $7, %eax # sched: [100:0.33] -; BDVER2-NEXT: inb %dx, %al # sched: [100:0.33] -; BDVER2-NEXT: inw %dx, %ax # sched: [100:0.33] -; BDVER2-NEXT: inl %dx, %eax # sched: [100:0.33] +; BDVER2-NEXT: inb $7, %al # sched: [100:0.50] +; BDVER2-NEXT: inw $7, %ax # sched: [100:0.50] +; BDVER2-NEXT: inl $7, %eax # sched: [100:0.50] +; BDVER2-NEXT: inb %dx, %al # sched: [100:0.50] +; BDVER2-NEXT: inw %dx, %ax # sched: [100:0.50] +; BDVER2-NEXT: inl %dx, %eax # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_in: ; BTVER2: # %bb.0: @@ -6907,10 +6907,10 @@ ; BDVER2-LABEL: test_inc8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: incb %dil # sched: [1:0.33] -; BDVER2-NEXT: incb (%rsi) # sched: [7:1.00] +; BDVER2-NEXT: incb %dil # sched: [1:0.50] +; BDVER2-NEXT: incb (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_inc8: ; BTVER2: # %bb.0: @@ -6998,10 +6998,10 @@ ; BDVER2-LABEL: test_inc16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: incw %di # sched: [1:0.33] -; BDVER2-NEXT: incw (%rsi) # sched: [7:1.00] +; BDVER2-NEXT: incw %di # sched: [1:0.50] +; BDVER2-NEXT: incw (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_inc16: ; BTVER2: # %bb.0: @@ -7089,10 +7089,10 @@ ; BDVER2-LABEL: test_inc32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: incl %edi # sched: [1:0.33] -; BDVER2-NEXT: incl (%rsi) # sched: [7:1.00] +; BDVER2-NEXT: incl %edi # sched: [1:0.50] +; BDVER2-NEXT: incl (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_inc32: ; BTVER2: # %bb.0: @@ -7180,10 +7180,10 @@ ; BDVER2-LABEL: test_inc64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: incq %rdi # sched: [1:0.33] -; BDVER2-NEXT: incq (%rsi) # sched: [7:1.00] +; BDVER2-NEXT: incq %rdi # sched: [1:0.50] +; BDVER2-NEXT: incq (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_inc64: ; BTVER2: # %bb.0: @@ -7280,11 +7280,11 @@ ; BDVER2-LABEL: test_ins: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.33] -; BDVER2-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.33] -; BDVER2-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.33] +; BDVER2-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.50] +; BDVER2-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.50] +; BDVER2-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_ins: ; BTVER2: # %bb.0: @@ -7367,9 +7367,9 @@ ; BDVER2-LABEL: test_int: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: int $7 # sched: [100:0.33] +; BDVER2-NEXT: int $7 # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_int: ; BTVER2: # %bb.0: @@ -7456,10 +7456,10 @@ ; BDVER2-LABEL: test_invlpg_invlpga: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: invlpg (%rdi) # sched: [100:0.33] -; BDVER2-NEXT: invlpga %rax, %ecx # sched: [100:0.33] +; BDVER2-NEXT: invlpg (%rdi) # sched: [100:0.50] +; BDVER2-NEXT: invlpga %rax, %ecx # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_invlpg_invlpga: ; BTVER2: # %bb.0: @@ -7812,7 +7812,7 @@ ; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00] ; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_jcc: ; BTVER2: # %bb.0: @@ -7968,10 +7968,10 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: JXTGT: -; BDVER2-NEXT: jecxz JXTGT # sched: [2:1.00] -; BDVER2-NEXT: jrcxz JXTGT # sched: [2:1.00] +; BDVER2-NEXT: jecxz JXTGT # sched: [1:1.00] +; BDVER2-NEXT: jrcxz JXTGT # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_jecxz_jrcxz: ; BTVER2: # %bb.0: @@ -8064,10 +8064,10 @@ ; BDVER2-LABEL: test_lahf_sahf: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: lahf # sched: [1:0.50] -; BDVER2-NEXT: sahf # sched: [1:0.50] +; BDVER2-NEXT: lahf # sched: [2:0.50] +; BDVER2-NEXT: sahf # sched: [2:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lahf_sahf: ; BTVER2: # %bb.0: @@ -8156,9 +8156,9 @@ ; BDVER2-LABEL: test_leave: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: leave # sched: [7:0.67] +; BDVER2-NEXT: leave # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_leave: ; BTVER2: # %bb.0: @@ -8261,12 +8261,12 @@ ; BDVER2-LABEL: test_lods: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: lodsb (%rsi), %al # sched: [7:0.67] -; BDVER2-NEXT: lodsw (%rsi), %ax # sched: [7:0.67] -; BDVER2-NEXT: lodsl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: lodsq (%rsi), %rax # sched: [6:0.50] +; BDVER2-NEXT: lodsb (%rsi), %al # sched: [100:0.50] +; BDVER2-NEXT: lodsw (%rsi), %ax # sched: [100:0.50] +; BDVER2-NEXT: lodsl (%rsi), %eax # sched: [100:0.50] +; BDVER2-NEXT: lodsq (%rsi), %rax # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_lods: ; BTVER2: # %bb.0: @@ -8380,7 +8380,7 @@ ; BDVER2-NEXT: loope LTGT # sched: [1:1.00] ; BDVER2-NEXT: loopne LTGT # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_loop: ; BTVER2: # %bb.0: @@ -8475,10 +8475,10 @@ ; BDVER2-LABEL: test_movnti: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] +; BDVER2-NEXT: movntil %edi, (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: movntiq %rdx, (%rcx) # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movnti: ; BTVER2: # %bb.0: @@ -8583,12 +8583,12 @@ ; BDVER2-LABEL: test_movs: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: movsb (%rsi), %es:(%rdi) # sched: [8:1.00] -; BDVER2-NEXT: movsw (%rsi), %es:(%rdi) # sched: [8:1.00] -; BDVER2-NEXT: movsl (%rsi), %es:(%rdi) # sched: [8:1.00] -; BDVER2-NEXT: movsq (%rsi), %es:(%rdi) # sched: [8:1.00] +; BDVER2-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.50] +; BDVER2-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.50] +; BDVER2-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.50] +; BDVER2-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movs: ; BTVER2: # %bb.0: @@ -8692,11 +8692,11 @@ ; BDVER2-LABEL: test_movslq: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: movslq %edi, %rax # sched: [1:0.33] +; BDVER2-NEXT: movslq %edi, %rax # sched: [1:0.50] ; BDVER2-NEXT: movslq (%rsi), %rcx # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movslq: ; BTVER2: # %bb.0: @@ -8856,16 +8856,16 @@ ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: mulb %dil # sched: [3:1.00] +; BDVER2-NEXT: mulb %dil # sched: [4:1.00] ; BDVER2-NEXT: mulb (%r8) # sched: [8:1.00] -; BDVER2-NEXT: mulw %si # sched: [4:1.33] -; BDVER2-NEXT: mulw (%r9) # sched: [9:1.33] +; BDVER2-NEXT: mulw %si # sched: [4:1.00] +; BDVER2-NEXT: mulw (%r9) # sched: [8:1.00] ; BDVER2-NEXT: mull %edx # sched: [4:1.00] -; BDVER2-NEXT: mull (%rax) # sched: [9:1.00] -; BDVER2-NEXT: mulq %rcx # sched: [4:1.00] -; BDVER2-NEXT: mulq (%r10) # sched: [9:1.00] +; BDVER2-NEXT: mull (%rax) # sched: [8:1.00] +; BDVER2-NEXT: mulq %rcx # sched: [6:4.00] +; BDVER2-NEXT: mulq (%r10) # sched: [10:4.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_mul: ; BTVER2: # %bb.0: @@ -9036,16 +9036,16 @@ ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: negb %dil # sched: [1:0.33] -; BDVER2-NEXT: negb (%r8) # sched: [7:1.00] -; BDVER2-NEXT: negw %si # sched: [1:0.33] -; BDVER2-NEXT: negw (%r9) # sched: [7:1.00] -; BDVER2-NEXT: negl %edx # sched: [1:0.33] -; BDVER2-NEXT: negl (%rax) # sched: [7:1.00] -; BDVER2-NEXT: negq %rcx # sched: [1:0.33] -; BDVER2-NEXT: negq (%r10) # sched: [7:1.00] +; BDVER2-NEXT: negb %dil # sched: [1:0.50] +; BDVER2-NEXT: negb (%r8) # sched: [6:1.00] +; BDVER2-NEXT: negw %si # sched: [1:0.50] +; BDVER2-NEXT: negw (%r9) # sched: [6:1.00] +; BDVER2-NEXT: negl %edx # sched: [1:0.50] +; BDVER2-NEXT: negl (%rax) # sched: [6:1.00] +; BDVER2-NEXT: negq %rcx # sched: [1:0.50] +; BDVER2-NEXT: negq (%r10) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_neg: ; BTVER2: # %bb.0: @@ -9190,15 +9190,15 @@ ; BDVER2-LABEL: test_nop: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: nop # sched: [1:0.25] -; BDVER2-NEXT: nopw %di # sched: [1:0.25] -; BDVER2-NEXT: nopw (%rcx) # sched: [1:0.25] -; BDVER2-NEXT: nopl %esi # sched: [1:0.25] -; BDVER2-NEXT: nopl (%r8) # sched: [1:0.25] -; BDVER2-NEXT: nopq %rdx # sched: [1:0.25] -; BDVER2-NEXT: nopq (%r9) # sched: [1:0.25] +; BDVER2-NEXT: nop # sched: [1:0.50] +; BDVER2-NEXT: nopw %di # sched: [1:0.50] +; BDVER2-NEXT: nopw (%rcx) # sched: [1:0.50] +; BDVER2-NEXT: nopl %esi # sched: [1:0.50] +; BDVER2-NEXT: nopl (%r8) # sched: [1:0.50] +; BDVER2-NEXT: nopq %rdx # sched: [1:0.50] +; BDVER2-NEXT: nopq (%r9) # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_nop: ; BTVER2: # %bb.0: @@ -9363,16 +9363,16 @@ ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50] ; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: notb %dil # sched: [1:0.33] -; BDVER2-NEXT: notb (%r8) # sched: [7:1.00] -; BDVER2-NEXT: notw %si # sched: [1:0.33] -; BDVER2-NEXT: notw (%r9) # sched: [7:1.00] -; BDVER2-NEXT: notl %edx # sched: [1:0.33] -; BDVER2-NEXT: notl (%rax) # sched: [7:1.00] -; BDVER2-NEXT: notq %rcx # sched: [1:0.33] -; BDVER2-NEXT: notq (%r10) # sched: [7:1.00] +; BDVER2-NEXT: notb %dil # sched: [1:0.50] +; BDVER2-NEXT: notb (%r8) # sched: [6:1.00] +; BDVER2-NEXT: notw %si # sched: [1:0.50] +; BDVER2-NEXT: notw (%r9) # sched: [6:1.00] +; BDVER2-NEXT: notl %edx # sched: [1:0.50] +; BDVER2-NEXT: notl (%rax) # sched: [6:1.00] +; BDVER2-NEXT: notq %rcx # sched: [1:0.50] +; BDVER2-NEXT: notq (%r10) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_not: ; BTVER2: # %bb.0: @@ -9509,14 +9509,14 @@ ; BDVER2-LABEL: test_or_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: orb $7, %al # sched: [1:0.33] -; BDVER2-NEXT: orb $7, %dil # sched: [1:0.33] -; BDVER2-NEXT: orb $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: orb %dl, %dil # sched: [1:0.33] -; BDVER2-NEXT: orb %dil, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: orb (%rsi), %dil # sched: [6:0.50] +; BDVER2-NEXT: orb $7, %al # sched: [1:0.50] +; BDVER2-NEXT: orb $7, %dil # sched: [1:0.50] +; BDVER2-NEXT: orb $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: orb %dl, %dil # sched: [1:0.50] +; BDVER2-NEXT: orb %dil, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: orb (%rsi), %dil # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_or_8: ; BTVER2: # %bb.0: @@ -9685,18 +9685,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: orw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: orw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: orw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: orw $7, %di # sched: [1:0.33] -; BDVER2-NEXT: orw $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: orw %dx, %di # sched: [1:0.33] -; BDVER2-NEXT: orw %di, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: orw (%rsi), %di # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: orw $7, %di # sched: [1:0.50] +; BDVER2-NEXT: orw $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: orw %dx, %di # sched: [1:0.50] +; BDVER2-NEXT: orw %di, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: orw (%rsi), %di # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_or_16: ; BTVER2: # %bb.0: @@ -9875,18 +9875,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: orl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: orl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: orl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: orl $7, %edi # sched: [1:0.33] -; BDVER2-NEXT: orl $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: orl %edx, %edi # sched: [1:0.33] -; BDVER2-NEXT: orl %edi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: orl (%rsi), %edi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: orl $7, %edi # sched: [1:0.50] +; BDVER2-NEXT: orl $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: orl %edx, %edi # sched: [1:0.50] +; BDVER2-NEXT: orl %edi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: orl (%rsi), %edi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_or_32: ; BTVER2: # %bb.0: @@ -10065,18 +10065,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: orq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: orq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: orq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: orq $7, %rdi # sched: [1:0.33] -; BDVER2-NEXT: orq $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: orq %rdx, %rdi # sched: [1:0.33] -; BDVER2-NEXT: orq %rdi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: orq (%rsi), %rdi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: orq $7, %rdi # sched: [1:0.50] +; BDVER2-NEXT: orq $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: orq %rdx, %rdi # sched: [1:0.50] +; BDVER2-NEXT: orq %rdi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: orq (%rsi), %rdi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_or_64: ; BTVER2: # %bb.0: @@ -10215,14 +10215,14 @@ ; BDVER2-LABEL: test_out: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: outb %al, $7 # sched: [100:0.33] -; BDVER2-NEXT: outw %ax, $7 # sched: [100:0.33] -; BDVER2-NEXT: outl %eax, $7 # sched: [100:0.33] -; BDVER2-NEXT: outb %al, %dx # sched: [100:0.33] -; BDVER2-NEXT: outw %ax, %dx # sched: [100:0.33] -; BDVER2-NEXT: outl %eax, %dx # sched: [100:0.33] +; BDVER2-NEXT: outb %al, $7 # sched: [100:0.50] +; BDVER2-NEXT: outw %ax, $7 # sched: [100:0.50] +; BDVER2-NEXT: outl %eax, $7 # sched: [100:0.50] +; BDVER2-NEXT: outb %al, %dx # sched: [100:0.50] +; BDVER2-NEXT: outw %ax, %dx # sched: [100:0.50] +; BDVER2-NEXT: outl %eax, %dx # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_out: ; BTVER2: # %bb.0: @@ -10327,11 +10327,11 @@ ; BDVER2-LABEL: test_outs: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: outsb (%rsi), %dx # sched: [100:0.33] -; BDVER2-NEXT: outsw (%rsi), %dx # sched: [100:0.33] -; BDVER2-NEXT: outsl (%rsi), %dx # sched: [100:0.33] +; BDVER2-NEXT: outsb (%rsi), %dx # sched: [100:0.50] +; BDVER2-NEXT: outsw (%rsi), %dx # sched: [100:0.50] +; BDVER2-NEXT: outsl (%rsi), %dx # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_outs: ; BTVER2: # %bb.0: @@ -10414,9 +10414,9 @@ ; BDVER2-LABEL: test_pause: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: pause # sched: [4:1.33] +; BDVER2-NEXT: pause # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pause: ; BTVER2: # %bb.0: @@ -10519,12 +10519,12 @@ ; BDVER2-LABEL: test_pop_push: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: popq %fs # sched: [100:0.33] -; BDVER2-NEXT: popq %gs # sched: [100:0.33] -; BDVER2-NEXT: pushq %fs # sched: [3:1.00] -; BDVER2-NEXT: pushq %gs # sched: [5:1.00] +; BDVER2-NEXT: popq %fs # sched: [100:0.50] +; BDVER2-NEXT: popq %gs # sched: [100:0.50] +; BDVER2-NEXT: pushq %fs # sched: [100:0.50] +; BDVER2-NEXT: pushq %gs # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pop_push: ; BTVER2: # %bb.0: @@ -10656,15 +10656,15 @@ ; BDVER2-LABEL: test_pop_push_16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: popw %ax # sched: [6:0.50] -; BDVER2-NEXT: popw (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: pushw %di # sched: [5:1.00] -; BDVER2-NEXT: pushw (%rsi) # sched: [5:1.00] +; BDVER2-NEXT: popw %ax # sched: [5:0.50] +; BDVER2-NEXT: popw (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: pushw %di # sched: [1:0.50] +; BDVER2-NEXT: pushw (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: pushw $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: pushw $7 # sched: [1:1.00] +; BDVER2-NEXT: # sched: [1:0.50] +; BDVER2-NEXT: pushw $7 # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pop_push_16: ; BTVER2: # %bb.0: @@ -10802,15 +10802,15 @@ ; BDVER2-LABEL: test_pop_push_64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: popq %rax # sched: [6:0.50] -; BDVER2-NEXT: popq (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: pushq %rdi # sched: [5:1.00] -; BDVER2-NEXT: pushq (%rsi) # sched: [5:1.00] +; BDVER2-NEXT: popq %rax # sched: [5:0.50] +; BDVER2-NEXT: popq (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: pushq %rdi # sched: [1:0.50] +; BDVER2-NEXT: pushq (%rsi) # sched: [6:1.00] ; BDVER2-NEXT: pushq $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [1:1.00] -; BDVER2-NEXT: pushq $7 # sched: [5:1.00] +; BDVER2-NEXT: # sched: [1:0.50] +; BDVER2-NEXT: pushq $7 # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_pop_push_64: ; BTVER2: # %bb.0: @@ -10910,9 +10910,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: popfq # sched: [5:0.50] -; BDVER2-NEXT: pushfq # sched: [5:1.00] +; BDVER2-NEXT: pushfq # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_popf_pushf: ; BTVER2: # %bb.0: @@ -11081,20 +11081,20 @@ ; BDVER2-LABEL: test_rcl_rcr_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rclb %dil # sched: [2:1.50] -; BDVER2-NEXT: rcrb %dil # sched: [2:1.50] -; BDVER2-NEXT: rclb (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrb (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rclb $7, %dil # sched: [5:4.00] -; BDVER2-NEXT: rcrb $7, %dil # sched: [5:4.00] -; BDVER2-NEXT: rclb $7, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrb $7, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rclb %cl, %dil # sched: [5:4.00] -; BDVER2-NEXT: rcrb %cl, %dil # sched: [5:4.00] -; BDVER2-NEXT: rclb %cl, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrb %cl, (%rdx) # sched: [11:3.50] +; BDVER2-NEXT: rclb %dil # sched: [1:0.50] +; BDVER2-NEXT: rcrb %dil # sched: [1:0.50] +; BDVER2-NEXT: rclb (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrb (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rclb $7, %dil # sched: [13:0.50] +; BDVER2-NEXT: rcrb $7, %dil # sched: [12:0.50] +; BDVER2-NEXT: rclb $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrb $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rclb %cl, %dil # sched: [12:0.50] +; BDVER2-NEXT: rcrb %cl, %dil # sched: [11:0.50] +; BDVER2-NEXT: rclb %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrb %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rcl_rcr_8: ; BTVER2: # %bb.0: @@ -11282,20 +11282,20 @@ ; BDVER2-LABEL: test_rcl_rcr_16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rclw %di # sched: [2:1.50] -; BDVER2-NEXT: rcrw %di # sched: [2:1.50] -; BDVER2-NEXT: rclw (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrw (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rclw $7, %di # sched: [5:4.00] -; BDVER2-NEXT: rcrw $7, %di # sched: [5:4.00] -; BDVER2-NEXT: rclw $7, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrw $7, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rclw %cl, %di # sched: [5:4.00] -; BDVER2-NEXT: rcrw %cl, %di # sched: [5:4.00] -; BDVER2-NEXT: rclw %cl, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrw %cl, (%rdx) # sched: [11:3.50] +; BDVER2-NEXT: rclw %di # sched: [1:0.50] +; BDVER2-NEXT: rcrw %di # sched: [1:0.50] +; BDVER2-NEXT: rclw (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrw (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rclw $7, %di # sched: [11:0.50] +; BDVER2-NEXT: rcrw $7, %di # sched: [10:0.50] +; BDVER2-NEXT: rclw $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrw $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rclw %cl, %di # sched: [10:0.50] +; BDVER2-NEXT: rcrw %cl, %di # sched: [9:0.50] +; BDVER2-NEXT: rclw %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrw %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rcl_rcr_16: ; BTVER2: # %bb.0: @@ -11483,20 +11483,20 @@ ; BDVER2-LABEL: test_rcl_rcr_32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rcll %edi # sched: [2:1.50] -; BDVER2-NEXT: rcrl %edi # sched: [2:1.50] -; BDVER2-NEXT: rcll (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrl (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcll $7, %edi # sched: [5:4.00] -; BDVER2-NEXT: rcrl $7, %edi # sched: [5:4.00] -; BDVER2-NEXT: rcll $7, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrl $7, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcll %cl, %edi # sched: [5:4.00] -; BDVER2-NEXT: rcrl %cl, %edi # sched: [5:4.00] -; BDVER2-NEXT: rcll %cl, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrl %cl, (%rdx) # sched: [11:3.50] +; BDVER2-NEXT: rcll %edi # sched: [1:0.50] +; BDVER2-NEXT: rcrl %edi # sched: [1:0.50] +; BDVER2-NEXT: rcll (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrl (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcll $7, %edi # sched: [8:0.50] +; BDVER2-NEXT: rcrl $7, %edi # sched: [7:0.50] +; BDVER2-NEXT: rcll $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrl $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcll %cl, %edi # sched: [7:0.50] +; BDVER2-NEXT: rcrl %cl, %edi # sched: [7:0.50] +; BDVER2-NEXT: rcll %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrl %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rcl_rcr_32: ; BTVER2: # %bb.0: @@ -11684,20 +11684,20 @@ ; BDVER2-LABEL: test_rcl_rcr_64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rclq %rdi # sched: [2:1.50] -; BDVER2-NEXT: rcrq %rdi # sched: [2:1.50] -; BDVER2-NEXT: rclq (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrq (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rclq $7, %rdi # sched: [5:4.00] -; BDVER2-NEXT: rcrq $7, %rdi # sched: [5:4.00] -; BDVER2-NEXT: rclq $7, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrq $7, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rclq %cl, %rdi # sched: [5:4.00] -; BDVER2-NEXT: rcrq %cl, %rdi # sched: [5:4.00] -; BDVER2-NEXT: rclq %cl, (%rdx) # sched: [11:3.50] -; BDVER2-NEXT: rcrq %cl, (%rdx) # sched: [11:3.50] +; BDVER2-NEXT: rclq %rdi # sched: [1:0.50] +; BDVER2-NEXT: rcrq %rdi # sched: [1:0.50] +; BDVER2-NEXT: rclq (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrq (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rclq $7, %rdi # sched: [8:0.50] +; BDVER2-NEXT: rcrq $7, %rdi # sched: [7:0.50] +; BDVER2-NEXT: rclq $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrq $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rclq %cl, %rdi # sched: [7:0.50] +; BDVER2-NEXT: rcrq %cl, %rdi # sched: [7:0.50] +; BDVER2-NEXT: rclq %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rcrq %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rcl_rcr_64: ; BTVER2: # %bb.0: @@ -11806,10 +11806,10 @@ ; BDVER2-LABEL: test_rdmsr_wrmsr: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rdmsr # sched: [100:0.33] -; BDVER2-NEXT: wrmsr # sched: [100:0.33] +; BDVER2-NEXT: rdmsr # sched: [100:0.50] +; BDVER2-NEXT: wrmsr # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rdmsr_wrmsr: ; BTVER2: # %bb.0: @@ -11890,9 +11890,9 @@ ; BDVER2-LABEL: test_rdpmc: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rdpmc # sched: [100:0.33] +; BDVER2-NEXT: rdpmc # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rdpmc: ; BTVER2: # %bb.0: @@ -11979,10 +11979,10 @@ ; BDVER2-LABEL: test_rdtsc_rdtscp: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rdtsc # sched: [100:0.33] -; BDVER2-NEXT: rdtscp # sched: [100:0.33] +; BDVER2-NEXT: rdtsc # sched: [100:0.50] +; BDVER2-NEXT: rdtscp # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rdtsc_rdtscp: ; BTVER2: # %bb.0: @@ -12103,14 +12103,14 @@ ; BDVER2-LABEL: test_ret: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; BDVER2-NEXT: retq $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [6:1.00] -; BDVER2-NEXT: lretl # sched: [6:1.00] +; BDVER2-NEXT: # sched: [5:1.00] +; BDVER2-NEXT: lretl # sched: [5:1.00] ; BDVER2-NEXT: lretl $4095 # imm = 0xFFF -; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_ret: ; BTVER2: # %bb.0: @@ -12287,20 +12287,20 @@ ; BDVER2-LABEL: test_rol_ror_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rolb %dil # sched: [2:1.00] -; BDVER2-NEXT: rorb %dil # sched: [2:1.00] -; BDVER2-NEXT: rolb (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rorb (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rolb $7, %dil # sched: [2:1.00] -; BDVER2-NEXT: rorb $7, %dil # sched: [2:1.00] -; BDVER2-NEXT: rolb $7, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rorb $7, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rolb %cl, %dil # sched: [3:1.50] -; BDVER2-NEXT: rorb %cl, %dil # sched: [3:1.50] -; BDVER2-NEXT: rolb %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: rorb %cl, (%rdx) # sched: [9:1.50] +; BDVER2-NEXT: rolb %dil # sched: [1:0.50] +; BDVER2-NEXT: rorb %dil # sched: [1:0.50] +; BDVER2-NEXT: rolb (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorb (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rolb $7, %dil # sched: [1:0.50] +; BDVER2-NEXT: rorb $7, %dil # sched: [1:0.50] +; BDVER2-NEXT: rolb $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorb $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rolb %cl, %dil # sched: [1:0.50] +; BDVER2-NEXT: rorb %cl, %dil # sched: [1:0.50] +; BDVER2-NEXT: rolb %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorb %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rol_ror_8: ; BTVER2: # %bb.0: @@ -12488,20 +12488,20 @@ ; BDVER2-LABEL: test_rol_ror_16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rolw %di # sched: [2:1.00] -; BDVER2-NEXT: rorw %di # sched: [2:1.00] -; BDVER2-NEXT: rolw (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rorw (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rolw $7, %di # sched: [2:1.00] -; BDVER2-NEXT: rorw $7, %di # sched: [2:1.00] -; BDVER2-NEXT: rolw $7, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rorw $7, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rolw %cl, %di # sched: [3:1.50] -; BDVER2-NEXT: rorw %cl, %di # sched: [3:1.50] -; BDVER2-NEXT: rolw %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: rorw %cl, (%rdx) # sched: [9:1.50] +; BDVER2-NEXT: rolw %di # sched: [1:0.50] +; BDVER2-NEXT: rorw %di # sched: [1:0.50] +; BDVER2-NEXT: rolw (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorw (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rolw $7, %di # sched: [1:0.50] +; BDVER2-NEXT: rorw $7, %di # sched: [1:0.50] +; BDVER2-NEXT: rolw $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorw $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rolw %cl, %di # sched: [1:0.50] +; BDVER2-NEXT: rorw %cl, %di # sched: [1:0.50] +; BDVER2-NEXT: rolw %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorw %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rol_ror_16: ; BTVER2: # %bb.0: @@ -12689,20 +12689,20 @@ ; BDVER2-LABEL: test_rol_ror_32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: roll %edi # sched: [2:1.00] -; BDVER2-NEXT: rorl %edi # sched: [2:1.00] -; BDVER2-NEXT: roll (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rorl (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: roll $7, %edi # sched: [2:1.00] -; BDVER2-NEXT: rorl $7, %edi # sched: [2:1.00] -; BDVER2-NEXT: roll $7, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rorl $7, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: roll %cl, %edi # sched: [3:1.50] -; BDVER2-NEXT: rorl %cl, %edi # sched: [3:1.50] -; BDVER2-NEXT: roll %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: rorl %cl, (%rdx) # sched: [9:1.50] +; BDVER2-NEXT: roll %edi # sched: [1:0.50] +; BDVER2-NEXT: rorl %edi # sched: [1:0.50] +; BDVER2-NEXT: roll (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorl (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: roll $7, %edi # sched: [1:0.50] +; BDVER2-NEXT: rorl $7, %edi # sched: [1:0.50] +; BDVER2-NEXT: roll $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorl $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: roll %cl, %edi # sched: [1:0.50] +; BDVER2-NEXT: rorl %cl, %edi # sched: [1:0.50] +; BDVER2-NEXT: roll %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorl %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rol_ror_32: ; BTVER2: # %bb.0: @@ -12890,20 +12890,20 @@ ; BDVER2-LABEL: test_rol_ror_64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: rolq %rdi # sched: [2:1.00] -; BDVER2-NEXT: rorq %rdi # sched: [2:1.00] -; BDVER2-NEXT: rolq (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rorq (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rolq $7, %rdi # sched: [2:1.00] -; BDVER2-NEXT: rorq $7, %rdi # sched: [2:1.00] -; BDVER2-NEXT: rolq $7, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rorq $7, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: rolq %cl, %rdi # sched: [3:1.50] -; BDVER2-NEXT: rorq %cl, %rdi # sched: [3:1.50] -; BDVER2-NEXT: rolq %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: rorq %cl, (%rdx) # sched: [9:1.50] +; BDVER2-NEXT: rolq %rdi # sched: [1:0.50] +; BDVER2-NEXT: rorq %rdi # sched: [1:0.50] +; BDVER2-NEXT: rolq (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorq (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rolq $7, %rdi # sched: [1:0.50] +; BDVER2-NEXT: rorq $7, %rdi # sched: [1:0.50] +; BDVER2-NEXT: rolq $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorq $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rolq %cl, %rdi # sched: [1:0.50] +; BDVER2-NEXT: rorq %cl, %rdi # sched: [1:0.50] +; BDVER2-NEXT: rolq %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: rorq %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_rol_ror_64: ; BTVER2: # %bb.0: @@ -13143,23 +13143,23 @@ ; BDVER2-NEXT: sarb %dil # sched: [1:0.50] ; BDVER2-NEXT: shlb %dil # sched: [1:0.50] ; BDVER2-NEXT: shrb %dil # sched: [1:0.50] -; BDVER2-NEXT: sarb (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shlb (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shrb (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: sarb (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shlb (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrb (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: sarb $7, %dil # sched: [1:0.50] ; BDVER2-NEXT: shlb $7, %dil # sched: [1:0.50] ; BDVER2-NEXT: shrb $7, %dil # sched: [1:0.50] -; BDVER2-NEXT: sarb $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shlb $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shrb $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: sarb %cl, %dil # sched: [3:1.50] -; BDVER2-NEXT: shlb %cl, %dil # sched: [3:1.50] -; BDVER2-NEXT: shrb %cl, %dil # sched: [3:1.50] -; BDVER2-NEXT: sarb %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: shlb %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: shrb %cl, (%rdx) # sched: [9:1.50] +; BDVER2-NEXT: sarb $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shlb $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrb $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: sarb %cl, %dil # sched: [1:0.50] +; BDVER2-NEXT: shlb %cl, %dil # sched: [1:0.50] +; BDVER2-NEXT: shrb %cl, %dil # sched: [1:0.50] +; BDVER2-NEXT: sarb %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shlb %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrb %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sar_shl_shr_8: ; BTVER2: # %bb.0: @@ -13410,23 +13410,23 @@ ; BDVER2-NEXT: sarw %di # sched: [1:0.50] ; BDVER2-NEXT: shlw %di # sched: [1:0.50] ; BDVER2-NEXT: shrw %di # sched: [1:0.50] -; BDVER2-NEXT: sarw (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shlw (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shrw (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: sarw (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shlw (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrw (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: sarw $7, %di # sched: [1:0.50] ; BDVER2-NEXT: shlw $7, %di # sched: [1:0.50] ; BDVER2-NEXT: shrw $7, %di # sched: [1:0.50] -; BDVER2-NEXT: sarw $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shlw $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shrw $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: sarw %cl, %di # sched: [3:1.50] -; BDVER2-NEXT: shlw %cl, %di # sched: [3:1.50] -; BDVER2-NEXT: shrw %cl, %di # sched: [3:1.50] -; BDVER2-NEXT: sarw %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: shlw %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: shrw %cl, (%rdx) # sched: [9:1.50] +; BDVER2-NEXT: sarw $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shlw $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrw $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: sarw %cl, %di # sched: [1:0.50] +; BDVER2-NEXT: shlw %cl, %di # sched: [1:0.50] +; BDVER2-NEXT: shrw %cl, %di # sched: [1:0.50] +; BDVER2-NEXT: sarw %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shlw %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrw %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sar_shl_shr_16: ; BTVER2: # %bb.0: @@ -13677,23 +13677,23 @@ ; BDVER2-NEXT: sarl %edi # sched: [1:0.50] ; BDVER2-NEXT: shll %edi # sched: [1:0.50] ; BDVER2-NEXT: shrl %edi # sched: [1:0.50] -; BDVER2-NEXT: sarl (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shll (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shrl (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: sarl (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shll (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrl (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: sarl $7, %edi # sched: [1:0.50] ; BDVER2-NEXT: shll $7, %edi # sched: [1:0.50] ; BDVER2-NEXT: shrl $7, %edi # sched: [1:0.50] -; BDVER2-NEXT: sarl $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shll $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shrl $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: sarl %cl, %edi # sched: [3:1.50] -; BDVER2-NEXT: shll %cl, %edi # sched: [3:1.50] -; BDVER2-NEXT: shrl %cl, %edi # sched: [3:1.50] -; BDVER2-NEXT: sarl %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: shll %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: shrl %cl, (%rdx) # sched: [9:1.50] +; BDVER2-NEXT: sarl $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shll $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrl $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: sarl %cl, %edi # sched: [1:0.50] +; BDVER2-NEXT: shll %cl, %edi # sched: [1:0.50] +; BDVER2-NEXT: shrl %cl, %edi # sched: [1:0.50] +; BDVER2-NEXT: sarl %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shll %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrl %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sar_shl_shr_32: ; BTVER2: # %bb.0: @@ -13944,23 +13944,23 @@ ; BDVER2-NEXT: sarq %rdi # sched: [1:0.50] ; BDVER2-NEXT: shlq %rdi # sched: [1:0.50] ; BDVER2-NEXT: shrq %rdi # sched: [1:0.50] -; BDVER2-NEXT: sarq (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shlq (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shrq (%rdx) # sched: [7:1.00] +; BDVER2-NEXT: sarq (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shlq (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrq (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: sarq $7, %rdi # sched: [1:0.50] ; BDVER2-NEXT: shlq $7, %rdi # sched: [1:0.50] ; BDVER2-NEXT: shrq $7, %rdi # sched: [1:0.50] -; BDVER2-NEXT: sarq $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shlq $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: shrq $7, (%rdx) # sched: [7:1.00] -; BDVER2-NEXT: sarq %cl, %rdi # sched: [3:1.50] -; BDVER2-NEXT: shlq %cl, %rdi # sched: [3:1.50] -; BDVER2-NEXT: shrq %cl, %rdi # sched: [3:1.50] -; BDVER2-NEXT: sarq %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: shlq %cl, (%rdx) # sched: [9:1.50] -; BDVER2-NEXT: shrq %cl, (%rdx) # sched: [9:1.50] +; BDVER2-NEXT: sarq $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shlq $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrq $7, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: sarq %cl, %rdi # sched: [1:0.50] +; BDVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] +; BDVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] +; BDVER2-NEXT: sarq %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shlq %cl, (%rdx) # sched: [5:1.00] +; BDVER2-NEXT: shrq %cl, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sar_shl_shr_64: ; BTVER2: # %bb.0: @@ -14113,14 +14113,14 @@ ; BDVER2-LABEL: test_sbb_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: sbbb $7, %al # sched: [2:0.67] -; BDVER2-NEXT: sbbb $7, %dil # sched: [2:0.67] -; BDVER2-NEXT: sbbb $7, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: sbbb %dl, %dil # sched: [2:0.67] -; BDVER2-NEXT: sbbb %dil, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: sbbb (%rsi), %dil # sched: [7:0.67] +; BDVER2-NEXT: sbbb $7, %al # sched: [1:1.00] +; BDVER2-NEXT: sbbb $7, %dil # sched: [1:1.00] +; BDVER2-NEXT: sbbb $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: sbbb %dl, %dil # sched: [1:1.00] +; BDVER2-NEXT: sbbb %dil, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: sbbb (%rsi), %dil # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sbb_8: ; BTVER2: # %bb.0: @@ -14289,18 +14289,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: sbbw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: sbbw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: sbbw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [9:1.00] -; BDVER2-NEXT: sbbw $7, %di # sched: [2:0.67] -; BDVER2-NEXT: sbbw $7, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: sbbw %dx, %di # sched: [2:0.67] -; BDVER2-NEXT: sbbw %di, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: sbbw (%rsi), %di # sched: [7:0.67] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: sbbw $7, %di # sched: [1:1.00] +; BDVER2-NEXT: sbbw $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: sbbw %dx, %di # sched: [1:1.00] +; BDVER2-NEXT: sbbw %di, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: sbbw (%rsi), %di # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sbb_16: ; BTVER2: # %bb.0: @@ -14479,18 +14479,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: sbbl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: sbbl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [9:1.00] -; BDVER2-NEXT: sbbl $7, %edi # sched: [2:0.67] -; BDVER2-NEXT: sbbl $7, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: sbbl %edx, %edi # sched: [2:0.67] -; BDVER2-NEXT: sbbl %edi, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: sbbl (%rsi), %edi # sched: [7:0.67] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: sbbl $7, %edi # sched: [1:1.00] +; BDVER2-NEXT: sbbl $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: sbbl %edx, %edi # sched: [1:1.00] +; BDVER2-NEXT: sbbl %edi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: sbbl (%rsi), %edi # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sbb_32: ; BTVER2: # %bb.0: @@ -14669,18 +14669,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: sbbq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: sbbq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [2:0.67] +; BDVER2-NEXT: # sched: [1:1.00] ; BDVER2-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [9:1.00] -; BDVER2-NEXT: sbbq $7, %rdi # sched: [2:0.67] -; BDVER2-NEXT: sbbq $7, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: sbbq %rdx, %rdi # sched: [2:0.67] -; BDVER2-NEXT: sbbq %rdi, (%rsi) # sched: [9:1.00] -; BDVER2-NEXT: sbbq (%rsi), %rdi # sched: [7:0.67] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: sbbq $7, %rdi # sched: [1:1.00] +; BDVER2-NEXT: sbbq $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: sbbq %rdx, %rdi # sched: [1:1.00] +; BDVER2-NEXT: sbbq %rdi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: sbbq (%rsi), %rdi # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sbb_64: ; BTVER2: # %bb.0: @@ -14803,12 +14803,12 @@ ; BDVER2-LABEL: test_scas: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: scasb %es:(%rdi), %al # sched: [2:0.67] -; BDVER2-NEXT: scasw %es:(%rdi), %ax # sched: [2:0.67] -; BDVER2-NEXT: scasl %es:(%rdi), %eax # sched: [2:0.67] -; BDVER2-NEXT: scasq %es:(%rdi), %rax # sched: [2:0.67] +; BDVER2-NEXT: scasb %es:(%rdi), %al # sched: [100:0.50] +; BDVER2-NEXT: scasw %es:(%rdi), %ax # sched: [100:0.50] +; BDVER2-NEXT: scasl %es:(%rdi), %eax # sched: [100:0.50] +; BDVER2-NEXT: scasq %es:(%rdi), %rax # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_scas: ; BTVER2: # %bb.0: @@ -15147,8 +15147,8 @@ ; BDVER2-NEXT: setae %dil # sched: [1:0.50] ; BDVER2-NEXT: sete %dil # sched: [1:0.50] ; BDVER2-NEXT: setne %dil # sched: [1:0.50] -; BDVER2-NEXT: setbe %dil # sched: [2:1.00] -; BDVER2-NEXT: seta %dil # sched: [2:1.00] +; BDVER2-NEXT: setbe %dil # sched: [1:0.50] +; BDVER2-NEXT: seta %dil # sched: [1:0.50] ; BDVER2-NEXT: sets %dil # sched: [1:0.50] ; BDVER2-NEXT: setns %dil # sched: [1:0.50] ; BDVER2-NEXT: setp %dil # sched: [1:0.50] @@ -15157,24 +15157,24 @@ ; BDVER2-NEXT: setge %dil # sched: [1:0.50] ; BDVER2-NEXT: setle %dil # sched: [1:0.50] ; BDVER2-NEXT: setg %dil # sched: [1:0.50] -; BDVER2-NEXT: seto (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setno (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setb (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setae (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: sete (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setne (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setbe (%rsi) # sched: [3:1.00] -; BDVER2-NEXT: seta (%rsi) # sched: [3:1.00] -; BDVER2-NEXT: sets (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setns (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setp (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setnp (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setl (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setge (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setle (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: setg (%rsi) # sched: [2:1.00] +; BDVER2-NEXT: seto (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: setno (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: setb (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: setae (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: sete (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: setne (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: setbe (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: seta (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: sets (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: setns (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: setp (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: setnp (%rsi) # sched: [1:0.50] +; BDVER2-NEXT: setl (%rsi) # sched: [1:1.00] +; BDVER2-NEXT: setge (%rsi) # sched: [1:1.00] +; BDVER2-NEXT: setle (%rsi) # sched: [1:1.00] +; BDVER2-NEXT: setg (%rsi) # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_setcc: ; BTVER2: # %bb.0: @@ -15373,16 +15373,16 @@ ; BDVER2-LABEL: test_shld_shrd_16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: shldw %cl, %si, %di # sched: [4:1.50] -; BDVER2-NEXT: shrdw %cl, %si, %di # sched: [4:1.50] -; BDVER2-NEXT: shldw %cl, %si, (%rdx) # sched: [10:1.50] -; BDVER2-NEXT: shrdw %cl, %si, (%rdx) # sched: [10:1.50] -; BDVER2-NEXT: shldw $7, %si, %di # sched: [2:0.67] -; BDVER2-NEXT: shrdw $7, %si, %di # sched: [2:0.67] -; BDVER2-NEXT: shldw $7, %si, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: shrdw $7, %si, (%rdx) # sched: [8:1.00] +; BDVER2-NEXT: shldw %cl, %si, %di # sched: [4:4.00] +; BDVER2-NEXT: shrdw %cl, %si, %di # sched: [4:4.00] +; BDVER2-NEXT: shldw %cl, %si, (%rdx) # sched: [4:11.00] +; BDVER2-NEXT: shrdw %cl, %si, (%rdx) # sched: [4:11.00] +; BDVER2-NEXT: shldw $7, %si, %di # sched: [4:3.00] +; BDVER2-NEXT: shrdw $7, %si, %di # sched: [3:3.00] +; BDVER2-NEXT: shldw $7, %si, (%rdx) # sched: [4:11.00] +; BDVER2-NEXT: shrdw $7, %si, (%rdx) # sched: [4:11.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_shld_shrd_16: ; BTVER2: # %bb.0: @@ -15530,16 +15530,16 @@ ; BDVER2-LABEL: test_shld_shrd_32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: shldl %cl, %esi, %edi # sched: [4:1.50] -; BDVER2-NEXT: shrdl %cl, %esi, %edi # sched: [4:1.50] -; BDVER2-NEXT: shldl %cl, %esi, (%rdx) # sched: [10:1.50] -; BDVER2-NEXT: shrdl %cl, %esi, (%rdx) # sched: [10:1.50] -; BDVER2-NEXT: shldl $7, %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: shrdl $7, %esi, %edi # sched: [2:0.67] -; BDVER2-NEXT: shldl $7, %esi, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: shrdl $7, %esi, (%rdx) # sched: [8:1.00] +; BDVER2-NEXT: shldl %cl, %esi, %edi # sched: [4:4.00] +; BDVER2-NEXT: shrdl %cl, %esi, %edi # sched: [4:4.00] +; BDVER2-NEXT: shldl %cl, %esi, (%rdx) # sched: [4:11.00] +; BDVER2-NEXT: shrdl %cl, %esi, (%rdx) # sched: [4:11.00] +; BDVER2-NEXT: shldl $7, %esi, %edi # sched: [3:3.00] +; BDVER2-NEXT: shrdl $7, %esi, %edi # sched: [4:3.00] +; BDVER2-NEXT: shldl $7, %esi, (%rdx) # sched: [4:11.00] +; BDVER2-NEXT: shrdl $7, %esi, (%rdx) # sched: [4:11.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_shld_shrd_32: ; BTVER2: # %bb.0: @@ -15687,16 +15687,16 @@ ; BDVER2-LABEL: test_shld_shrd_64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50] -; BDVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50] -; BDVER2-NEXT: shldq %cl, %rsi, (%rdx) # sched: [10:1.50] -; BDVER2-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [10:1.50] -; BDVER2-NEXT: shldq $7, %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: shrdq $7, %rsi, %rdi # sched: [2:0.67] -; BDVER2-NEXT: shldq $7, %rsi, (%rdx) # sched: [8:1.00] -; BDVER2-NEXT: shrdq $7, %rsi, (%rdx) # sched: [8:1.00] +; BDVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:4.00] +; BDVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:4.00] +; BDVER2-NEXT: shldq %cl, %rsi, (%rdx) # sched: [4:11.00] +; BDVER2-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [4:11.00] +; BDVER2-NEXT: shldq $7, %rsi, %rdi # sched: [4:3.00] +; BDVER2-NEXT: shrdq $7, %rsi, %rdi # sched: [4:3.00] +; BDVER2-NEXT: shldq $7, %rsi, (%rdx) # sched: [4:11.00] +; BDVER2-NEXT: shrdq $7, %rsi, (%rdx) # sched: [4:11.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_shld_shrd_64: ; BTVER2: # %bb.0: @@ -15801,10 +15801,10 @@ ; BDVER2-LABEL: test_stc_std: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: stc # sched: [1:0.33] -; BDVER2-NEXT: std # sched: [1:0.33] +; BDVER2-NEXT: stc # sched: [1:0.50] +; BDVER2-NEXT: std # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_stc_std: ; BTVER2: # %bb.0: @@ -15912,12 +15912,12 @@ ; BDVER2-LABEL: test_stos: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: stosb %al, %es:(%rdi) # sched: [5:1.00] -; BDVER2-NEXT: stosw %ax, %es:(%rdi) # sched: [5:1.00] -; BDVER2-NEXT: stosl %eax, %es:(%rdi) # sched: [5:1.00] -; BDVER2-NEXT: stosq %rax, %es:(%rdi) # sched: [5:1.00] +; BDVER2-NEXT: stosb %al, %es:(%rdi) # sched: [100:0.50] +; BDVER2-NEXT: stosw %ax, %es:(%rdi) # sched: [100:0.50] +; BDVER2-NEXT: stosl %eax, %es:(%rdi) # sched: [100:0.50] +; BDVER2-NEXT: stosq %rax, %es:(%rdi) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_stos: ; BTVER2: # %bb.0: @@ -16044,14 +16044,14 @@ ; BDVER2-LABEL: test_sub_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: subb $7, %al # sched: [1:0.33] -; BDVER2-NEXT: subb $7, %dil # sched: [1:0.33] -; BDVER2-NEXT: subb $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: subb %dl, %dil # sched: [1:0.33] -; BDVER2-NEXT: subb %dil, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: subb (%rsi), %dil # sched: [6:0.50] +; BDVER2-NEXT: subb $7, %al # sched: [1:0.50] +; BDVER2-NEXT: subb $7, %dil # sched: [1:0.50] +; BDVER2-NEXT: subb $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: subb %dl, %dil # sched: [1:0.50] +; BDVER2-NEXT: subb %dil, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: subb (%rsi), %dil # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sub_8: ; BTVER2: # %bb.0: @@ -16220,18 +16220,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: subw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: subw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: subw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: subw $7, %di # sched: [1:0.33] -; BDVER2-NEXT: subw $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: subw %dx, %di # sched: [1:0.33] -; BDVER2-NEXT: subw %di, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: subw (%rsi), %di # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: subw $7, %di # sched: [1:0.50] +; BDVER2-NEXT: subw $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: subw %dx, %di # sched: [1:0.50] +; BDVER2-NEXT: subw %di, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: subw (%rsi), %di # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sub_16: ; BTVER2: # %bb.0: @@ -16410,18 +16410,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: subl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: subl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: subl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: subl $7, %edi # sched: [1:0.33] -; BDVER2-NEXT: subl $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: subl %edx, %edi # sched: [1:0.33] -; BDVER2-NEXT: subl %edi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: subl (%rsi), %edi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: subl $7, %edi # sched: [1:0.50] +; BDVER2-NEXT: subl $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: subl %edx, %edi # sched: [1:0.50] +; BDVER2-NEXT: subl %edi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: subl (%rsi), %edi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sub_32: ; BTVER2: # %bb.0: @@ -16600,18 +16600,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: subq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: subq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: subq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: subq $7, %rdi # sched: [1:0.33] -; BDVER2-NEXT: subq $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: subq %rdx, %rdi # sched: [1:0.33] -; BDVER2-NEXT: subq %rdi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: subq (%rsi), %rdi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: subq $7, %rdi # sched: [1:0.50] +; BDVER2-NEXT: subq $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: subq %rdx, %rdi # sched: [1:0.50] +; BDVER2-NEXT: subq %rdi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: subq (%rsi), %rdi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_sub_64: ; BTVER2: # %bb.0: @@ -16748,13 +16748,13 @@ ; BDVER2-LABEL: test_test_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: testb $7, %al # sched: [1:0.33] -; BDVER2-NEXT: testb $7, %dil # sched: [1:0.33] -; BDVER2-NEXT: testb $7, (%rsi) # sched: [6:0.50] -; BDVER2-NEXT: testb %dil, %dil # sched: [1:0.33] -; BDVER2-NEXT: testb %dil, (%rsi) # sched: [6:0.50] +; BDVER2-NEXT: testb $7, %al # sched: [1:0.50] +; BDVER2-NEXT: testb $7, %dil # sched: [1:0.50] +; BDVER2-NEXT: testb $7, (%rsi) # sched: [5:0.50] +; BDVER2-NEXT: testb %dil, %dil # sched: [1:0.50] +; BDVER2-NEXT: testb %dil, (%rsi) # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_test_8: ; BTVER2: # %bb.0: @@ -16897,15 +16897,15 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: testw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: testw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: testw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [6:0.50] -; BDVER2-NEXT: testw %di, %di # sched: [1:0.33] -; BDVER2-NEXT: testw %di, (%rsi) # sched: [6:0.50] +; BDVER2-NEXT: # sched: [5:0.50] +; BDVER2-NEXT: testw %di, %di # sched: [1:0.50] +; BDVER2-NEXT: testw %di, (%rsi) # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_test_16: ; BTVER2: # %bb.0: @@ -17054,15 +17054,15 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: testl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: testl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: testl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:0.50] -; BDVER2-NEXT: testl %edi, %edi # sched: [1:0.33] -; BDVER2-NEXT: testl %edi, (%rsi) # sched: [6:0.50] +; BDVER2-NEXT: # sched: [5:0.50] +; BDVER2-NEXT: testl %edi, %edi # sched: [1:0.50] +; BDVER2-NEXT: testl %edi, (%rsi) # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_test_32: ; BTVER2: # %bb.0: @@ -17211,15 +17211,15 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: testq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: testq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: testq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [6:0.50] -; BDVER2-NEXT: testq %rdi, %rdi # sched: [1:0.33] -; BDVER2-NEXT: testq %rdi, (%rsi) # sched: [6:0.50] +; BDVER2-NEXT: # sched: [5:0.50] +; BDVER2-NEXT: testq %rdi, %rdi # sched: [1:0.50] +; BDVER2-NEXT: testq %rdi, (%rsi) # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_test_64: ; BTVER2: # %bb.0: @@ -17313,9 +17313,9 @@ ; BDVER2-LABEL: test_ud2: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: ud2 # sched: [100:0.33] +; BDVER2-NEXT: ud2 # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_ud2: ; BTVER2: # %bb.0: @@ -17411,9 +17411,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: xaddb %dil, %sil # sched: [2:1.00] -; BDVER2-NEXT: xaddb %dil, (%rdx) # sched: [8:1.00] +; BDVER2-NEXT: xaddb %dil, (%rdx) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xadd_8: ; BTVER2: # %bb.0: @@ -17502,9 +17502,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: xaddw %di, %si # sched: [2:1.00] -; BDVER2-NEXT: xaddw %di, (%rdx) # sched: [8:1.00] +; BDVER2-NEXT: xaddw %di, (%rdx) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xadd_16: ; BTVER2: # %bb.0: @@ -17593,9 +17593,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: xaddl %edi, %esi # sched: [2:1.00] -; BDVER2-NEXT: xaddl %edi, (%rdx) # sched: [8:1.00] +; BDVER2-NEXT: xaddl %edi, (%rdx) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xadd_32: ; BTVER2: # %bb.0: @@ -17684,9 +17684,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: xaddq %rdi, %rsi # sched: [2:1.00] -; BDVER2-NEXT: xaddq %rdi, (%rdx) # sched: [8:1.00] +; BDVER2-NEXT: xaddq %rdi, (%rdx) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xadd_64: ; BTVER2: # %bb.0: @@ -17775,10 +17775,10 @@ ; BDVER2-LABEL: test_xchg_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgb %sil, %dil # sched: [2:1.00] -; BDVER2-NEXT: xchgb %dil, (%rdx) # sched: [6:1.00] +; BDVER2-NEXT: xchgb %sil, %dil # sched: [1:1.00] +; BDVER2-NEXT: xchgb %dil, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xchg_8: ; BTVER2: # %bb.0: @@ -17874,11 +17874,11 @@ ; BDVER2-LABEL: test_xchg_16: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgw %di, %ax # sched: [2:1.00] +; BDVER2-NEXT: xchgw %di, %ax # sched: [1:1.00] ; BDVER2-NEXT: xchgw %si, %di # sched: [2:1.00] -; BDVER2-NEXT: xchgw %di, (%rdx) # sched: [6:1.00] +; BDVER2-NEXT: xchgw %di, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xchg_16: ; BTVER2: # %bb.0: @@ -17976,11 +17976,11 @@ ; BDVER2-LABEL: test_xchg_32: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgl %edi, %eax # sched: [2:1.00] -; BDVER2-NEXT: xchgl %esi, %edi # sched: [2:1.00] -; BDVER2-NEXT: xchgl %edi, (%rdx) # sched: [6:1.00] +; BDVER2-NEXT: xchgl %edi, %eax # sched: [1:1.00] +; BDVER2-NEXT: xchgl %esi, %edi # sched: [1:1.00] +; BDVER2-NEXT: xchgl %edi, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xchg_32: ; BTVER2: # %bb.0: @@ -18078,11 +18078,11 @@ ; BDVER2-LABEL: test_xchg_64: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: xchgq %rdi, %rax # sched: [2:1.00] -; BDVER2-NEXT: xchgq %rsi, %rdi # sched: [2:1.00] -; BDVER2-NEXT: xchgq %rdi, (%rdx) # sched: [6:1.00] +; BDVER2-NEXT: xchgq %rdi, %rax # sched: [1:1.00] +; BDVER2-NEXT: xchgq %rsi, %rdi # sched: [1:1.00] +; BDVER2-NEXT: xchgq %rdi, (%rdx) # sched: [5:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xchg_64: ; BTVER2: # %bb.0: @@ -18165,9 +18165,9 @@ ; BDVER2-LABEL: test_xlat: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: xlatb # sched: [5:0.50] +; BDVER2-NEXT: xlatb # sched: [6:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xlat: ; BTVER2: # %bb.0: @@ -18286,14 +18286,14 @@ ; BDVER2-LABEL: test_xor_8: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: xorb $7, %al # sched: [1:0.33] -; BDVER2-NEXT: xorb $7, %dil # sched: [1:0.33] -; BDVER2-NEXT: xorb $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: xorb %dl, %dil # sched: [1:0.33] -; BDVER2-NEXT: xorb %dil, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: xorb (%rsi), %dil # sched: [6:0.50] +; BDVER2-NEXT: xorb $7, %al # sched: [1:0.50] +; BDVER2-NEXT: xorb $7, %dil # sched: [1:0.50] +; BDVER2-NEXT: xorb $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: xorb %dl, %dil # sched: [1:0.50] +; BDVER2-NEXT: xorb %dil, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: xorb (%rsi), %dil # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xor_8: ; BTVER2: # %bb.0: @@ -18462,18 +18462,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: xorw $511, %ax # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: xorw $511, %di # imm = 0x1FF -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: xorw $511, (%rsi) # imm = 0x1FF -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: xorw $7, %di # sched: [1:0.33] -; BDVER2-NEXT: xorw $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: xorw %dx, %di # sched: [1:0.33] -; BDVER2-NEXT: xorw %di, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: xorw (%rsi), %di # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: xorw $7, %di # sched: [1:0.50] +; BDVER2-NEXT: xorw $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: xorw %dx, %di # sched: [1:0.50] +; BDVER2-NEXT: xorw %di, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: xorw (%rsi), %di # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xor_16: ; BTVER2: # %bb.0: @@ -18652,18 +18652,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: xorl $665536, %eax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: xorl $665536, %edi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: xorl $7, %edi # sched: [1:0.33] -; BDVER2-NEXT: xorl $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: xorl %edx, %edi # sched: [1:0.33] -; BDVER2-NEXT: xorl %edi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: xorl (%rsi), %edi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: xorl $7, %edi # sched: [1:0.50] +; BDVER2-NEXT: xorl $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: xorl %edx, %edi # sched: [1:0.50] +; BDVER2-NEXT: xorl %edi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: xorl (%rsi), %edi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xor_32: ; BTVER2: # %bb.0: @@ -18842,18 +18842,18 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP ; BDVER2-NEXT: xorq $665536, %rax # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: xorq $665536, %rdi # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [1:0.33] +; BDVER2-NEXT: # sched: [1:0.50] ; BDVER2-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: xorq $7, %rdi # sched: [1:0.33] -; BDVER2-NEXT: xorq $7, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: xorq %rdx, %rdi # sched: [1:0.33] -; BDVER2-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00] -; BDVER2-NEXT: xorq (%rsi), %rdi # sched: [6:0.50] +; BDVER2-NEXT: # sched: [6:1.00] +; BDVER2-NEXT: xorq $7, %rdi # sched: [1:0.50] +; BDVER2-NEXT: xorq $7, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: xorq %rdx, %rdi # sched: [1:0.50] +; BDVER2-NEXT: xorq %rdi, (%rsi) # sched: [6:1.00] +; BDVER2-NEXT: xorq (%rsi), %rdi # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_xor_64: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll +++ llvm/trunk/test/CodeGen/X86/small-byval-memcpy.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s --check-prefix=CORE2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 | FileCheck %s --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) Index: llvm/trunk/test/CodeGen/X86/sse-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll @@ -14,8 +14,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE @@ -104,15 +104,15 @@ ; ; BDVER2-SSE-LABEL: test_addps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_addps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_addps: ; BTVER2-SSE: # %bb.0: @@ -224,15 +224,15 @@ ; ; BDVER2-SSE-LABEL: test_addss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_addss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_addss: ; BTVER2-SSE: # %bb.0: @@ -348,15 +348,15 @@ ; ; BDVER2-SSE-LABEL: test_andps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_andps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_andps: ; BTVER2-SSE: # %bb.0: @@ -476,15 +476,15 @@ ; ; BDVER2-SSE-LABEL: test_andnotps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_andnotps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_andnotps: ; BTVER2-SSE: # %bb.0: @@ -615,17 +615,17 @@ ; ; BDVER2-SSE-LABEL: test_cmpps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] -; BDVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00] +; BDVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cmpps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BDVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00] +; BDVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cmpps: ; BTVER2-SSE: # %bb.0: @@ -745,15 +745,15 @@ ; ; BDVER2-SSE-LABEL: test_cmpss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cmpss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cmpss: ; BTVER2-SSE: # %bb.0: @@ -974,31 +974,31 @@ ; ; BDVER2-SSE-LABEL: test_comiss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_comiss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] +; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] +; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_comiss: ; BTVER2-SSE: # %bb.0: @@ -1157,17 +1157,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtsi2ss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00] -; BDVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00] +; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtsi2ss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] -; BDVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2ss: ; BTVER2-SSE: # %bb.0: @@ -1297,17 +1297,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtsi2ssq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] -; BDVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [13:1.00] +; BDVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtsi2ssq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] -; BDVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2ssq: ; BTVER2-SSE: # %bb.0: @@ -1437,17 +1437,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtss2si: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] -; BDVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [18:1.00] +; BDVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [13:1.00] +; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtss2si: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] -; BDVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [18:1.00] +; BDVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [13:1.00] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtss2si: ; BTVER2-SSE: # %bb.0: @@ -1580,17 +1580,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtss2siq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] -; BDVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [18:1.00] +; BDVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [13:1.00] +; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtss2siq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] -; BDVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [18:1.00] +; BDVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [13:1.00] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtss2siq: ; BTVER2-SSE: # %bb.0: @@ -1723,17 +1723,17 @@ ; ; BDVER2-SSE-LABEL: test_cvttss2si: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] -; BDVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [18:1.00] +; BDVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [13:1.00] +; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvttss2si: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] -; BDVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [18:1.00] +; BDVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [13:1.00] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvttss2si: ; BTVER2-SSE: # %bb.0: @@ -1863,17 +1863,17 @@ ; ; BDVER2-SSE-LABEL: test_cvttss2siq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] -; BDVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [18:1.00] +; BDVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [13:1.00] +; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvttss2siq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] -; BDVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [18:1.00] +; BDVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [13:1.00] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvttss2siq: ; BTVER2-SSE: # %bb.0: @@ -1990,15 +1990,15 @@ ; ; BDVER2-SSE-LABEL: test_divps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00] -; BDVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [9:9.50] +; BDVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [14:9.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_divps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:14.00] -; BDVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:14.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [9:9.50] +; BDVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [14:9.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_divps: ; BTVER2-SSE: # %bb.0: @@ -2110,15 +2110,15 @@ ; ; BDVER2-SSE-LABEL: test_divss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00] -; BDVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [9:9.50] +; BDVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [14:9.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_divss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:14.00] -; BDVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:14.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [9:9.50] +; BDVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [14:9.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_divss: ; BTVER2-SSE: # %bb.0: @@ -2230,15 +2230,15 @@ ; ; BDVER2-SSE-LABEL: test_ldmxcsr: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BDVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50] +; BDVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_ldmxcsr: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; BDVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50] +; BDVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_ldmxcsr: ; BTVER2-SSE: # %bb.0: @@ -2352,15 +2352,15 @@ ; ; BDVER2-SSE-LABEL: test_maxps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_maxps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_maxps: ; BTVER2-SSE: # %bb.0: @@ -2473,15 +2473,15 @@ ; ; BDVER2-SSE-LABEL: test_maxss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_maxss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_maxss: ; BTVER2-SSE: # %bb.0: @@ -2594,15 +2594,15 @@ ; ; BDVER2-SSE-LABEL: test_minps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_minps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_minps: ; BTVER2-SSE: # %bb.0: @@ -2715,15 +2715,15 @@ ; ; BDVER2-SSE-LABEL: test_minss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_minss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_minss: ; BTVER2-SSE: # %bb.0: @@ -2849,17 +2849,17 @@ ; ; BDVER2-SSE-LABEL: test_movaps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; BDVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [5:1.00] ; BDVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movaps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BDVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; BDVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movaps: ; BTVER2-SSE: # %bb.0: @@ -2970,13 +2970,13 @@ ; ; BDVER2-SSE-LABEL: test_movhlps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movhlps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movhlps: ; BTVER2-SSE: # %bb.0: @@ -3111,19 +3111,19 @@ ; ; BDVER2-SSE-LABEL: test_movhps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movhps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movhps: ; BTVER2-SSE: # %bb.0: @@ -3249,15 +3249,15 @@ ; ; BDVER2-SSE-LABEL: test_movlhps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movlhps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] +; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movlhps: ; BTVER2-SSE: # %bb.0: @@ -3395,19 +3395,19 @@ ; ; BDVER2-SSE-LABEL: test_movlps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movlps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movlps: ; BTVER2-SSE: # %bb.0: @@ -3520,13 +3520,13 @@ ; ; BDVER2-SSE-LABEL: test_movmskps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movmskps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovmskps %xmm0, %eax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movmskps: ; BTVER2-SSE: # %bb.0: @@ -3626,13 +3626,13 @@ ; ; BDVER2-SSE-LABEL: test_movntps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movntps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [3:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movntps: ; BTVER2-SSE: # %bb.0: @@ -3751,17 +3751,17 @@ ; ; BDVER2-SSE-LABEL: test_movss_mem: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; BDVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; BDVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movss_mem: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; BDVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; BDVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movss_mem: ; BTVER2-SSE: # %bb.0: @@ -3870,13 +3870,13 @@ ; ; BDVER2-SSE-LABEL: test_movss_reg: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movss_reg: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movss_reg: ; BTVER2-SSE: # %bb.0: @@ -3995,17 +3995,17 @@ ; ; BDVER2-SSE-LABEL: test_movups: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; BDVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [5:1.00] ; BDVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movups: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BDVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; BDVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movups: ; BTVER2-SSE: # %bb.0: @@ -4122,14 +4122,14 @@ ; BDVER2-SSE-LABEL: test_mulps: ; BDVER2-SSE: # %bb.0: ; BDVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_mulps: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_mulps: ; BTVER2-SSE: # %bb.0: @@ -4242,14 +4242,14 @@ ; BDVER2-SSE-LABEL: test_mulss: ; BDVER2-SSE: # %bb.0: ; BDVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_mulss: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_mulss: ; BTVER2-SSE: # %bb.0: @@ -4365,15 +4365,15 @@ ; ; BDVER2-SSE-LABEL: test_orps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_orps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_orps: ; BTVER2-SSE: # %bb.0: @@ -4547,7 +4547,7 @@ ; BDVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] ; BDVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] ; BDVER2-SSE-NEXT: #NO_APP -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_prefetch: ; BDVER2: # %bb.0: @@ -4557,7 +4557,7 @@ ; BDVER2-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] ; BDVER2-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_prefetch: ; BTVER2-SSE: # %bb.0: @@ -4699,16 +4699,16 @@ ; BDVER2-SSE-LABEL: test_rcpps: ; BDVER2-SSE: # %bb.0: ; BDVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_rcpps: ; BDVER2: # %bb.0: +; BDVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] ; BDVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_rcpps: ; BTVER2-SSE: # %bb.0: @@ -4854,19 +4854,19 @@ ; ; BDVER2-SSE-LABEL: test_rcpss: ; BDVER2-SSE: # %bb.0: +; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; BDVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] ; BDVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_rcpss: ; BDVER2: # %bb.0: +; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] ; BDVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_rcpss: ; BTVER2-SSE: # %bb.0: @@ -5006,16 +5006,16 @@ ; BDVER2-SSE-LABEL: test_rsqrtps: ; BDVER2-SSE: # %bb.0: ; BDVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_rsqrtps: ; BDVER2: # %bb.0: +; BDVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] ; BDVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_rsqrtps: ; BTVER2-SSE: # %bb.0: @@ -5161,19 +5161,19 @@ ; ; BDVER2-SSE-LABEL: test_rsqrtss: ; BDVER2-SSE: # %bb.0: +; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; BDVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] ; BDVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_rsqrtss: ; BDVER2: # %bb.0: +; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; BDVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] ; BDVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_rsqrtss: ; BTVER2-SSE: # %bb.0: @@ -5290,13 +5290,13 @@ ; ; BDVER2-SSE-LABEL: test_sfence: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: sfence # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: sfence # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_sfence: ; BDVER2: # %bb.0: -; BDVER2-NEXT: sfence # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: sfence # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_sfence: ; BTVER2-SSE: # %bb.0: @@ -5416,17 +5416,17 @@ ; ; BDVER2-SSE-LABEL: test_shufps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [2:0.50] +; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:0.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_shufps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; BDVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [2:0.50] +; BDVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:0.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_shufps: ; BTVER2-SSE: # %bb.0: @@ -5557,17 +5557,17 @@ ; ; BDVER2-SSE-LABEL: test_sqrtps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00] -; BDVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [9:10.50] +; BDVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [14:10.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_sqrtps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:14.00] -; BDVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:14.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:10.50] +; BDVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [9:10.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtps: ; BTVER2-SSE: # %bb.0: @@ -5713,19 +5713,19 @@ ; ; BDVER2-SSE-LABEL: test_sqrtss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00] -; BDVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] -; BDVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50] +; BDVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [9:10.50] +; BDVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [9:10.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_sqrtss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] -; BDVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; BDVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:14.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50] +; BDVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:10.50] +; BDVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:10.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtss: ; BTVER2-SSE: # %bb.0: @@ -5847,15 +5847,15 @@ ; ; BDVER2-SSE-LABEL: test_stmxcsr: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; BDVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:0.50] ; BDVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_stmxcsr: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; BDVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:0.50] ; BDVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_stmxcsr: ; BTVER2-SSE: # %bb.0: @@ -5969,15 +5969,15 @@ ; ; BDVER2-SSE-LABEL: test_subps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_subps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_subps: ; BTVER2-SSE: # %bb.0: @@ -6089,15 +6089,15 @@ ; ; BDVER2-SSE-LABEL: test_subss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_subss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_subss: ; BTVER2-SSE: # %bb.0: @@ -6313,31 +6313,31 @@ ; ; BDVER2-SSE-LABEL: test_ucomiss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_ucomiss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] +; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] +; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_ucomiss: ; BTVER2-SSE: # %bb.0: @@ -6496,17 +6496,17 @@ ; ; BDVER2-SSE-LABEL: test_unpckhps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] +; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_unpckhps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BDVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] +; BDVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_unpckhps: ; BTVER2-SSE: # %bb.0: @@ -6636,17 +6636,17 @@ ; ; BDVER2-SSE-LABEL: test_unpcklps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] +; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_unpcklps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BDVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] +; BDVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_unpcklps: ; BTVER2-SSE: # %bb.0: @@ -6767,15 +6767,15 @@ ; ; BDVER2-SSE-LABEL: test_xorps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_xorps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_xorps: ; BTVER2-SSE: # %bb.0: @@ -6923,19 +6923,19 @@ ; ; BDVER2-SSE-LABEL: test_fnop: ; BDVER2-SSE: # %bb.0: +; BDVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25] ; BDVER2-SSE-NEXT: #APP -; BDVER2-SSE-NEXT: nop # sched: [1:0.25] +; BDVER2-SSE-NEXT: nop # sched: [1:0.50] ; BDVER2-SSE-NEXT: #NO_APP -; BDVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_fnop: ; BDVER2: # %bb.0: +; BDVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: nop # sched: [1:0.25] +; BDVER2-NEXT: nop # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_fnop: ; BTVER2-SSE: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll @@ -14,8 +14,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE @@ -102,15 +102,15 @@ ; ; BDVER2-SSE-LABEL: test_addpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_addpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_addpd: ; BTVER2-SSE: # %bb.0: @@ -222,15 +222,15 @@ ; ; BDVER2-SSE-LABEL: test_addsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_addsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_addsd: ; BTVER2-SSE: # %bb.0: @@ -355,17 +355,17 @@ ; ; BDVER2-SSE-LABEL: test_andpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_andpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_andpd: ; BTVER2-SSE: # %bb.0: @@ -499,17 +499,17 @@ ; ; BDVER2-SSE-LABEL: test_andnotpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_andnotpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_andnotpd: ; BTVER2-SSE: # %bb.0: @@ -625,13 +625,13 @@ ; ; BDVER2-SSE-LABEL: test_clflush: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: clflush (%rdi) # sched: [5:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_clflush: ; BDVER2: # %bb.0: -; BDVER2-NEXT: clflush (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: clflush (%rdi) # sched: [5:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_clflush: ; BTVER2-SSE: # %bb.0: @@ -751,17 +751,17 @@ ; ; BDVER2-SSE-LABEL: test_cmppd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; BDVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00] +; BDVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cmppd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BDVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00] +; BDVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cmppd: ; BTVER2-SSE: # %bb.0: @@ -880,15 +880,15 @@ ; ; BDVER2-SSE-LABEL: test_cmpsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cmpsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cmpsd: ; BTVER2-SSE: # %bb.0: @@ -1109,31 +1109,31 @@ ; ; BDVER2-SSE-LABEL: test_comisd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_comisd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] +; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] +; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_comisd: ; BTVER2-SSE: # %bb.0: @@ -1294,17 +1294,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtdq2pd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [8:1.00] +; BDVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [13:1.00] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtdq2pd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [13:1.00] +; BDVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtdq2pd: ; BTVER2-SSE: # %bb.0: @@ -1437,17 +1437,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtdq2ps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; BDVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:1.00] ; BDVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtdq2ps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] ; BDVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtdq2ps: ; BTVER2-SSE: # %bb.0: @@ -1579,17 +1579,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtpd2dq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [8:1.00] +; BDVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [13:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtpd2dq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [13:1.00] +; BDVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtpd2dq: ; BTVER2-SSE: # %bb.0: @@ -1722,17 +1722,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtpd2ps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [8:1.00] +; BDVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [13:1.00] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtpd2ps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [13:1.00] +; BDVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtpd2ps: ; BTVER2-SSE: # %bb.0: @@ -1864,17 +1864,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtps2dq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BDVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:1.00] ; BDVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtps2dq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] ; BDVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtps2dq: ; BTVER2-SSE: # %bb.0: @@ -2006,17 +2006,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtps2pd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; BDVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [8:1.00] +; BDVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [13:1.00] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtps2pd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [13:1.00] +; BDVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtps2pd: ; BTVER2-SSE: # %bb.0: @@ -2148,17 +2148,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtsd2si: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] -; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [18:1.00] +; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [13:1.00] +; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtsd2si: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00] -; BDVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [10:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [18:1.00] +; BDVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [13:1.00] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsd2si: ; BTVER2-SSE: # %bb.0: @@ -2291,17 +2291,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtsd2siq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] -; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [18:1.00] +; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [13:1.00] +; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtsd2siq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] -; BDVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [18:1.00] +; BDVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [13:1.00] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsd2siq: ; BTVER2-SSE: # %bb.0: @@ -2449,18 +2449,18 @@ ; BDVER2-SSE-LABEL: test_cvtsd2ss: ; BDVER2-SSE: # %bb.0: ; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] +; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] ; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtsd2ss: ; BDVER2: # %bb.0: +; BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; BDVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; BDVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsd2ss: ; BTVER2-SSE: # %bb.0: @@ -2594,17 +2594,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtsi2sd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] ; BDVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] +; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtsi2sd: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] ; BDVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2sd: ; BTVER2-SSE: # %bb.0: @@ -2734,17 +2734,17 @@ ; ; BDVER2-SSE-LABEL: test_cvtsi2sdq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] +; BDVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [13:1.00] ; BDVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtsi2sdq: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] ; BDVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2sdq: ; BTVER2-SSE: # %bb.0: @@ -2890,19 +2890,19 @@ ; ; BDVER2-SSE-LABEL: test_cvtss2sd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00] +; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvtss2sd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; BDVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; BDVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00] +; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvtss2sd: ; BTVER2-SSE: # %bb.0: @@ -3038,17 +3038,17 @@ ; ; BDVER2-SSE-LABEL: test_cvttpd2dq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [8:1.00] +; BDVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [13:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvttpd2dq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [13:1.00] +; BDVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvttpd2dq: ; BTVER2-SSE: # %bb.0: @@ -3181,17 +3181,17 @@ ; ; BDVER2-SSE-LABEL: test_cvttps2dq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BDVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:1.00] ; BDVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvttps2dq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] ; BDVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvttps2dq: ; BTVER2-SSE: # %bb.0: @@ -3321,17 +3321,17 @@ ; ; BDVER2-SSE-LABEL: test_cvttsd2si: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] -; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [18:1.00] +; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [13:1.00] +; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvttsd2si: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] -; BDVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [10:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [18:1.00] +; BDVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [13:1.00] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvttsd2si: ; BTVER2-SSE: # %bb.0: @@ -3461,17 +3461,17 @@ ; ; BDVER2-SSE-LABEL: test_cvttsd2siq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] -; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [18:1.00] +; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [13:1.00] +; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_cvttsd2siq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] -; BDVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [18:1.00] +; BDVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [13:1.00] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_cvttsd2siq: ; BTVER2-SSE: # %bb.0: @@ -3588,15 +3588,15 @@ ; ; BDVER2-SSE-LABEL: test_divpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00] -; BDVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [9:9.50] +; BDVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [14:9.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_divpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:22.00] -; BDVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:22.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [9:9.50] +; BDVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:9.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_divpd: ; BTVER2-SSE: # %bb.0: @@ -3708,15 +3708,15 @@ ; ; BDVER2-SSE-LABEL: test_divsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00] -; BDVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [9:9.50] +; BDVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [14:9.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_divsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:22.00] -; BDVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:22.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [9:9.50] +; BDVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:9.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_divsd: ; BTVER2-SSE: # %bb.0: @@ -3821,13 +3821,13 @@ ; ; BDVER2-SSE-LABEL: test_lfence: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: lfence # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: lfence # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_lfence: ; BDVER2: # %bb.0: -; BDVER2-NEXT: lfence # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: lfence # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_lfence: ; BTVER2-SSE: # %bb.0: @@ -3927,13 +3927,13 @@ ; ; BDVER2-SSE-LABEL: test_mfence: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mfence # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: mfence # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_mfence: ; BDVER2: # %bb.0: -; BDVER2-NEXT: mfence # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: mfence # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_mfence: ; BTVER2-SSE: # %bb.0: @@ -4032,12 +4032,12 @@ ; BDVER2-SSE-LABEL: test_maskmovdqu: ; BDVER2-SSE: # %bb.0: ; BDVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_maskmovdqu: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_maskmovdqu: ; BTVER2-SSE: # %bb.0: @@ -4144,15 +4144,15 @@ ; ; BDVER2-SSE-LABEL: test_maxpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_maxpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_maxpd: ; BTVER2-SSE: # %bb.0: @@ -4265,15 +4265,15 @@ ; ; BDVER2-SSE-LABEL: test_maxsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_maxsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_maxsd: ; BTVER2-SSE: # %bb.0: @@ -4386,15 +4386,15 @@ ; ; BDVER2-SSE-LABEL: test_minpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_minpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_minpd: ; BTVER2-SSE: # %bb.0: @@ -4507,15 +4507,15 @@ ; ; BDVER2-SSE-LABEL: test_minsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_minsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_minsd: ; BTVER2-SSE: # %bb.0: @@ -4641,17 +4641,17 @@ ; ; BDVER2-SSE-LABEL: test_movapd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BDVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00] ; BDVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movapd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BDVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; BDVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movapd: ; BTVER2-SSE: # %bb.0: @@ -4780,17 +4780,17 @@ ; ; BDVER2-SSE-LABEL: test_movdqa: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movdqa: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movdqa: ; BTVER2-SSE: # %bb.0: @@ -4919,17 +4919,17 @@ ; ; BDVER2-SSE-LABEL: test_movdqu: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movdqu: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movdqu: ; BTVER2-SSE: # %bb.0: @@ -5097,23 +5097,23 @@ ; ; BDVER2-SSE-LABEL: test_movd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; BDVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; BDVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; BDVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; BDVER2-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; BDVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; BDVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [10:0.50] +; BDVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [2:0.50] +; BDVER2-SSE-NEXT: movd %xmm2, %eax # sched: [10:1.00] +; BDVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [2:0.50] +; BDVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; BDVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BDVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; BDVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovd %edi, %xmm1 # sched: [10:0.50] +; BDVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [2:0.50] +; BDVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vmovd %xmm0, %eax # sched: [10:1.00] +; BDVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movd: ; BTVER2-SSE: # %bb.0: @@ -5298,23 +5298,23 @@ ; ; BDVER2-SSE-LABEL: test_movd_64: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; BDVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; BDVER2-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; BDVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; BDVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [10:0.50] +; BDVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [2:0.50] +; BDVER2-SSE-NEXT: movq %xmm2, %rax # sched: [10:1.00] +; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [2:0.50] +; BDVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movd_64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; BDVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BDVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; BDVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovq %rdi, %xmm1 # sched: [10:0.50] +; BDVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [2:0.50] +; BDVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vmovq %xmm0, %rax # sched: [10:1.00] +; BDVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movd_64: ; BTVER2-SSE: # %bb.0: @@ -5474,19 +5474,19 @@ ; ; BDVER2-SSE-LABEL: test_movhpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movhpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movhpd: ; BTVER2-SSE: # %bb.0: @@ -5636,19 +5636,19 @@ ; ; BDVER2-SSE-LABEL: test_movlpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movlpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movlpd: ; BTVER2-SSE: # %bb.0: @@ -5760,13 +5760,13 @@ ; ; BDVER2-SSE-LABEL: test_movmskpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movmskpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movmskpd: ; BTVER2-SSE: # %bb.0: @@ -5875,15 +5875,15 @@ ; ; BDVER2-SSE-LABEL: test_movntdqa: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movntdqa: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movntdqa: ; BTVER2-SSE: # %bb.0: @@ -5994,15 +5994,15 @@ ; ; BDVER2-SSE-LABEL: test_movntpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movntpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movntpd: ; BTVER2-SSE: # %bb.0: @@ -6126,17 +6126,17 @@ ; ; BDVER2-SSE-LABEL: test_movq_mem: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movq_mem: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movq_mem: ; BTVER2-SSE: # %bb.0: @@ -6256,15 +6256,15 @@ ; ; BDVER2-SSE-LABEL: test_movq_reg: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movq_reg: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; BDVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50] +; BDVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movq_reg: ; BTVER2-SSE: # %bb.0: @@ -6388,17 +6388,17 @@ ; ; BDVER2-SSE-LABEL: test_movsd_mem: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; BDVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; BDVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [2:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movsd_mem: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; BDVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; BDVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [2:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movsd_mem: ; BTVER2-SSE: # %bb.0: @@ -6513,14 +6513,14 @@ ; ; BDVER2-SSE-LABEL: test_movsd_reg: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [2:0.50] +; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movsd_reg: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movsd_reg: ; BTVER2-SSE: # %bb.0: @@ -6641,17 +6641,17 @@ ; ; BDVER2-SSE-LABEL: test_movupd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BDVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00] ; BDVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movupd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; BDVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; BDVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movupd: ; BTVER2-SSE: # %bb.0: @@ -6768,14 +6768,14 @@ ; BDVER2-SSE-LABEL: test_mulpd: ; BDVER2-SSE: # %bb.0: ; BDVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_mulpd: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_mulpd: ; BTVER2-SSE: # %bb.0: @@ -6888,14 +6888,14 @@ ; BDVER2-SSE-LABEL: test_mulsd: ; BDVER2-SSE: # %bb.0: ; BDVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_mulsd: ; BDVER2: # %bb.0: ; BDVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_mulsd: ; BTVER2-SSE: # %bb.0: @@ -7020,17 +7020,17 @@ ; ; BDVER2-SSE-LABEL: test_orpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_orpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_orpd: ; BTVER2-SSE: # %bb.0: @@ -7155,15 +7155,15 @@ ; ; BDVER2-SSE-LABEL: test_packssdw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_packssdw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_packssdw: ; BTVER2-SSE: # %bb.0: @@ -7281,15 +7281,15 @@ ; ; BDVER2-SSE-LABEL: test_packsswb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_packsswb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_packsswb: ; BTVER2-SSE: # %bb.0: @@ -7407,15 +7407,15 @@ ; ; BDVER2-SSE-LABEL: test_packuswb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_packuswb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_packuswb: ; BTVER2-SSE: # %bb.0: @@ -7533,15 +7533,15 @@ ; ; BDVER2-SSE-LABEL: test_paddb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_paddb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_paddb: ; BTVER2-SSE: # %bb.0: @@ -7657,15 +7657,15 @@ ; ; BDVER2-SSE-LABEL: test_paddd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_paddd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_paddd: ; BTVER2-SSE: # %bb.0: @@ -7777,15 +7777,15 @@ ; ; BDVER2-SSE-LABEL: test_paddq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_paddq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_paddq: ; BTVER2-SSE: # %bb.0: @@ -7901,15 +7901,15 @@ ; ; BDVER2-SSE-LABEL: test_paddsb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_paddsb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_paddsb: ; BTVER2-SSE: # %bb.0: @@ -8026,15 +8026,15 @@ ; ; BDVER2-SSE-LABEL: test_paddsw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_paddsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_paddsw: ; BTVER2-SSE: # %bb.0: @@ -8151,15 +8151,15 @@ ; ; BDVER2-SSE-LABEL: test_paddusb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_paddusb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_paddusb: ; BTVER2-SSE: # %bb.0: @@ -8276,15 +8276,15 @@ ; ; BDVER2-SSE-LABEL: test_paddusw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_paddusw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_paddusw: ; BTVER2-SSE: # %bb.0: @@ -8401,15 +8401,15 @@ ; ; BDVER2-SSE-LABEL: test_paddw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_paddw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_paddw: ; BTVER2-SSE: # %bb.0: @@ -8534,17 +8534,17 @@ ; ; BDVER2-SSE-LABEL: test_pand: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; BDVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pand: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BDVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pand: ; BTVER2-SSE: # %bb.0: @@ -8690,19 +8690,19 @@ ; ; BDVER2-SSE-LABEL: test_pandn: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; BDVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [2:0.50] +; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pandn: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BDVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pandn: ; BTVER2-SSE: # %bb.0: @@ -8829,15 +8829,15 @@ ; ; BDVER2-SSE-LABEL: test_pavgb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pavgb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pavgb: ; BTVER2-SSE: # %bb.0: @@ -8963,15 +8963,15 @@ ; ; BDVER2-SSE-LABEL: test_pavgw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pavgw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pavgw: ; BTVER2-SSE: # %bb.0: @@ -9108,17 +9108,17 @@ ; ; BDVER2-SSE-LABEL: test_pcmpeqb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpeqb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcomeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-NEXT: vpcomeqb %xmm1, %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-NEXT: vpcomeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpeqb: ; BTVER2-SSE: # %bb.0: @@ -9251,17 +9251,17 @@ ; ; BDVER2-SSE-LABEL: test_pcmpeqd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpeqd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcomeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-NEXT: vpcomeqd %xmm1, %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpeqd: ; BTVER2-SSE: # %bb.0: @@ -9394,17 +9394,17 @@ ; ; BDVER2-SSE-LABEL: test_pcmpeqw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpeqw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcomeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-NEXT: vpcomeqw %xmm1, %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-NEXT: vpcomeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpeqw: ; BTVER2-SSE: # %bb.0: @@ -9543,18 +9543,18 @@ ; ; BDVER2-SSE-LABEL: test_pcmpgtb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; BDVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [2:0.50] +; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpgtb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcomgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-NEXT: vpcomgtb %xmm1, %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-NEXT: vpcomgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpgtb: ; BTVER2-SSE: # %bb.0: @@ -9695,18 +9695,18 @@ ; ; BDVER2-SSE-LABEL: test_pcmpgtd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; BDVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [2:0.50] +; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpgtd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcomgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-NEXT: vpcomgtd %xmm1, %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-NEXT: vpcomeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpgtd: ; BTVER2-SSE: # %bb.0: @@ -9847,18 +9847,18 @@ ; ; BDVER2-SSE-LABEL: test_pcmpgtw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; BDVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [2:0.50] +; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpgtw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcomgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-NEXT: vpcomgtw %xmm1, %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-NEXT: vpcomgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpgtw: ; BTVER2-SSE: # %bb.0: @@ -9978,15 +9978,15 @@ ; ; BDVER2-SSE-LABEL: test_pextrw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] +; BDVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [13:1.00] ; BDVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pextrw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] +; BDVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [13:1.00] ; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pextrw: ; BTVER2-SSE: # %bb.0: @@ -10100,15 +10100,15 @@ ; ; BDVER2-SSE-LABEL: test_pinsrw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pinsrw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pinsrw: ; BTVER2-SSE: # %bb.0: @@ -10220,15 +10220,15 @@ ; ; BDVER2-SSE-LABEL: test_pmaddwd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmaddwd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmaddwd: ; BTVER2-SSE: # %bb.0: @@ -10346,15 +10346,15 @@ ; ; BDVER2-SSE-LABEL: test_pmaxsw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmaxsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmaxsw: ; BTVER2-SSE: # %bb.0: @@ -10471,15 +10471,15 @@ ; ; BDVER2-SSE-LABEL: test_pmaxub: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmaxub: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmaxub: ; BTVER2-SSE: # %bb.0: @@ -10596,15 +10596,15 @@ ; ; BDVER2-SSE-LABEL: test_pminsw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pminsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pminsw: ; BTVER2-SSE: # %bb.0: @@ -10721,15 +10721,15 @@ ; ; BDVER2-SSE-LABEL: test_pminub: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pminub: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pminub: ; BTVER2-SSE: # %bb.0: @@ -10831,13 +10831,13 @@ ; ; BDVER2-SSE-LABEL: test_pmovmskb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [13:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovmskb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [13:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovmskb: ; BTVER2-SSE: # %bb.0: @@ -10944,15 +10944,15 @@ ; ; BDVER2-SSE-LABEL: test_pmulhuw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmulhuw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmulhuw: ; BTVER2-SSE: # %bb.0: @@ -11065,15 +11065,15 @@ ; ; BDVER2-SSE-LABEL: test_pmulhw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmulhw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmulhw: ; BTVER2-SSE: # %bb.0: @@ -11186,15 +11186,15 @@ ; ; BDVER2-SSE-LABEL: test_pmullw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmullw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmullw: ; BTVER2-SSE: # %bb.0: @@ -11306,15 +11306,15 @@ ; ; BDVER2-SSE-LABEL: test_pmuludq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmuludq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmuludq: ; BTVER2-SSE: # %bb.0: @@ -11441,17 +11441,17 @@ ; ; BDVER2-SSE-LABEL: test_por: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_por: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_por: ; BTVER2-SSE: # %bb.0: @@ -11568,15 +11568,15 @@ ; ; BDVER2-SSE-LABEL: test_psadbw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [4:0.50] +; BDVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psadbw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; BDVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psadbw: ; BTVER2-SSE: # %bb.0: @@ -11706,17 +11706,17 @@ ; ; BDVER2-SSE-LABEL: test_pshufd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] +; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [2:0.50] ; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pshufd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] ; BDVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [2:0.50] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pshufd: ; BTVER2-SSE: # %bb.0: @@ -11849,17 +11849,17 @@ ; ; BDVER2-SSE-LABEL: test_pshufhw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] +; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50] ; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pshufhw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] ; BDVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50] +; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pshufhw: ; BTVER2-SSE: # %bb.0: @@ -11992,17 +11992,17 @@ ; ; BDVER2-SSE-LABEL: test_pshuflw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] +; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50] ; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pshuflw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] ; BDVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50] +; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pshuflw: ; BTVER2-SSE: # %bb.0: @@ -12132,17 +12132,17 @@ ; ; BDVER2-SSE-LABEL: test_pslld: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] -; BDVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [3:0.50] +; BDVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:0.50] +; BDVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pslld: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pslld: ; BTVER2-SSE: # %bb.0: @@ -12254,13 +12254,13 @@ ; ; BDVER2-SSE-LABEL: test_pslldq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pslldq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pslldq: ; BTVER2-SSE: # %bb.0: @@ -12379,17 +12379,17 @@ ; ; BDVER2-SSE-LABEL: test_psllq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] -; BDVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [3:0.50] +; BDVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:0.50] +; BDVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psllq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psllq: ; BTVER2-SSE: # %bb.0: @@ -12521,17 +12521,17 @@ ; ; BDVER2-SSE-LABEL: test_psllw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] -; BDVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [3:0.50] +; BDVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:0.50] +; BDVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psllw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psllw: ; BTVER2-SSE: # %bb.0: @@ -12663,17 +12663,17 @@ ; ; BDVER2-SSE-LABEL: test_psrad: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] -; BDVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [3:0.50] +; BDVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:0.50] +; BDVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psrad: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psrad: ; BTVER2-SSE: # %bb.0: @@ -12805,17 +12805,17 @@ ; ; BDVER2-SSE-LABEL: test_psraw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] -; BDVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [3:0.50] +; BDVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:0.50] +; BDVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psraw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psraw: ; BTVER2-SSE: # %bb.0: @@ -12947,17 +12947,17 @@ ; ; BDVER2-SSE-LABEL: test_psrld: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] -; BDVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [3:0.50] +; BDVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:0.50] +; BDVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psrld: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psrld: ; BTVER2-SSE: # %bb.0: @@ -13069,13 +13069,13 @@ ; ; BDVER2-SSE-LABEL: test_psrldq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psrldq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psrldq: ; BTVER2-SSE: # %bb.0: @@ -13194,17 +13194,17 @@ ; ; BDVER2-SSE-LABEL: test_psrlq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] -; BDVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [3:0.50] +; BDVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:0.50] +; BDVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psrlq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psrlq: ; BTVER2-SSE: # %bb.0: @@ -13336,17 +13336,17 @@ ; ; BDVER2-SSE-LABEL: test_psrlw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] -; BDVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [3:0.50] +; BDVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:0.50] +; BDVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psrlw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psrlw: ; BTVER2-SSE: # %bb.0: @@ -13469,15 +13469,15 @@ ; ; BDVER2-SSE-LABEL: test_psubb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psubb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psubb: ; BTVER2-SSE: # %bb.0: @@ -13593,15 +13593,15 @@ ; ; BDVER2-SSE-LABEL: test_psubd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psubd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psubd: ; BTVER2-SSE: # %bb.0: @@ -13713,15 +13713,15 @@ ; ; BDVER2-SSE-LABEL: test_psubq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psubq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psubq: ; BTVER2-SSE: # %bb.0: @@ -13837,15 +13837,15 @@ ; ; BDVER2-SSE-LABEL: test_psubsb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psubsb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psubsb: ; BTVER2-SSE: # %bb.0: @@ -13962,15 +13962,15 @@ ; ; BDVER2-SSE-LABEL: test_psubsw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psubsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psubsw: ; BTVER2-SSE: # %bb.0: @@ -14087,15 +14087,15 @@ ; ; BDVER2-SSE-LABEL: test_psubusb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psubusb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psubusb: ; BTVER2-SSE: # %bb.0: @@ -14212,15 +14212,15 @@ ; ; BDVER2-SSE-LABEL: test_psubusw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psubusw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psubusw: ; BTVER2-SSE: # %bb.0: @@ -14337,15 +14337,15 @@ ; ; BDVER2-SSE-LABEL: test_psubw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psubw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psubw: ; BTVER2-SSE: # %bb.0: @@ -14461,15 +14461,15 @@ ; ; BDVER2-SSE-LABEL: test_punpckhbw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] +; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50] ; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_punpckhbw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] +; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50] ; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_punpckhbw: ; BTVER2-SSE: # %bb.0: @@ -14596,17 +14596,17 @@ ; ; BDVER2-SSE-LABEL: test_punpckhdq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] ; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_punpckhdq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] ; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_punpckhdq: ; BTVER2-SSE: # %bb.0: @@ -14736,17 +14736,17 @@ ; ; BDVER2-SSE-LABEL: test_punpckhqdq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] ; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_punpckhqdq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] ; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_punpckhqdq: ; BTVER2-SSE: # %bb.0: @@ -14867,15 +14867,15 @@ ; ; BDVER2-SSE-LABEL: test_punpckhwd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] ; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_punpckhwd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] ; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_punpckhwd: ; BTVER2-SSE: # %bb.0: @@ -14991,15 +14991,15 @@ ; ; BDVER2-SSE-LABEL: test_punpcklbw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] ; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_punpcklbw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] ; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_punpcklbw: ; BTVER2-SSE: # %bb.0: @@ -15126,17 +15126,17 @@ ; ; BDVER2-SSE-LABEL: test_punpckldq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] +; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] ; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_punpckldq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] +; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] ; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_punpckldq: ; BTVER2-SSE: # %bb.0: @@ -15266,17 +15266,17 @@ ; ; BDVER2-SSE-LABEL: test_punpcklqdq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] ; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_punpcklqdq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] ; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_punpcklqdq: ; BTVER2-SSE: # %bb.0: @@ -15397,15 +15397,15 @@ ; ; BDVER2-SSE-LABEL: test_punpcklwd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] ; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_punpcklwd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] ; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_punpcklwd: ; BTVER2-SSE: # %bb.0: @@ -15530,17 +15530,17 @@ ; ; BDVER2-SSE-LABEL: test_pxor: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; BDVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pxor: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; BDVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pxor: ; BTVER2-SSE: # %bb.0: @@ -15670,17 +15670,17 @@ ; ; BDVER2-SSE-LABEL: test_shufpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50] +; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_shufpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; BDVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50] +; BDVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_shufpd: ; BTVER2-SSE: # %bb.0: @@ -15811,17 +15811,17 @@ ; ; BDVER2-SSE-LABEL: test_sqrtpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00] -; BDVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [9:13.50] +; BDVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [14:13.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_sqrtpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:21.00] -; BDVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:21.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [14:13.50] +; BDVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [9:13.50] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtpd: ; BTVER2-SSE: # %bb.0: @@ -15967,19 +15967,19 @@ ; ; BDVER2-SSE-LABEL: test_sqrtsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00] -; BDVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; BDVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50] +; BDVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [9:13.50] +; BDVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [9:13.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_sqrtsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00] -; BDVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; BDVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:21.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50] +; BDVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [9:13.50] +; BDVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [9:13.50] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtsd: ; BTVER2-SSE: # %bb.0: @@ -16101,15 +16101,15 @@ ; ; BDVER2-SSE-LABEL: test_subpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_subpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_subpd: ; BTVER2-SSE: # %bb.0: @@ -16221,15 +16221,15 @@ ; ; BDVER2-SSE-LABEL: test_subsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_subsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_subsd: ; BTVER2-SSE: # %bb.0: @@ -16445,31 +16445,31 @@ ; ; BDVER2-SSE-LABEL: test_ucomisd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_ucomisd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-NEXT: setnp %al # sched: [1:0.50] ; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] +; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] +; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_ucomisd: ; BTVER2-SSE: # %bb.0: @@ -16628,17 +16628,17 @@ ; ; BDVER2-SSE-LABEL: test_unpckhpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] +; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_unpckhpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] +; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_unpckhpd: ; BTVER2-SSE: # %bb.0: @@ -16776,18 +16776,18 @@ ; ; BDVER2-SSE-LABEL: test_unpcklpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] +; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50] +; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [2:0.50] +; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_unpcklpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [2:0.50] +; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50] +; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_unpcklpd: ; BTVER2-SSE: # %bb.0: @@ -16919,17 +16919,17 @@ ; ; BDVER2-SSE-LABEL: test_xorpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_xorpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_xorpd: ; BTVER2-SSE: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll @@ -14,9 +14,9 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+sse3 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+sse3 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 @@ -102,15 +102,15 @@ ; ; BDVER2-SSE-LABEL: test_addsubpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_addsubpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_addsubpd: ; BTVER2-SSE: # %bb.0: @@ -223,15 +223,15 @@ ; ; BDVER2-SSE-LABEL: test_addsubps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_addsubps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_addsubps: ; BTVER2-SSE: # %bb.0: @@ -344,15 +344,15 @@ ; ; BDVER2-SSE-LABEL: test_haddpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] -; BDVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [11:1.00] +; BDVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [16:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_haddpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BDVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00] +; BDVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_haddpd: ; BTVER2-SSE: # %bb.0: @@ -465,15 +465,15 @@ ; ; BDVER2-SSE-LABEL: test_haddps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] -; BDVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [11:1.00] +; BDVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [16:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_haddps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BDVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] +; BDVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_haddps: ; BTVER2-SSE: # %bb.0: @@ -586,15 +586,15 @@ ; ; BDVER2-SSE-LABEL: test_hsubpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] -; BDVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [11:1.00] +; BDVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [16:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_hsubpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BDVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00] +; BDVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_hsubpd: ; BTVER2-SSE: # %bb.0: @@ -707,15 +707,15 @@ ; ; BDVER2-SSE-LABEL: test_hsubps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] -; BDVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [11:1.00] +; BDVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [16:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_hsubps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; BDVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] +; BDVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_hsubps: ; BTVER2-SSE: # %bb.0: @@ -817,13 +817,13 @@ ; ; BDVER2-SSE-LABEL: test_lddqu: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_lddqu: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_lddqu: ; BTVER2-SSE: # %bb.0: @@ -943,17 +943,17 @@ ; ; BDVER2-SSE-LABEL: test_monitor: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33] ; BDVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: monitor # sched: [100:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BDVER2-SSE-NEXT: monitor # sched: [100:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_monitor: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %esi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] -; BDVER2-NEXT: monitor # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BDVER2-NEXT: monitor # sched: [100:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_monitor: ; BTVER2-SSE: # %bb.0: @@ -1082,17 +1082,17 @@ ; ; BDVER2-SSE-LABEL: test_movddup: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] -; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50] -; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [2:0.50] +; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [7:0.50] +; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movddup: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; BDVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] -; BDVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [7:0.50] +; BDVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [2:0.50] +; BDVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movddup: ; BTVER2-SSE: # %bb.0: @@ -1223,17 +1223,17 @@ ; ; BDVER2-SSE-LABEL: test_movshdup: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] -; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [2:0.50] +; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [7:0.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movshdup: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; BDVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [7:0.50] +; BDVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [2:0.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movshdup: ; BTVER2-SSE: # %bb.0: @@ -1364,17 +1364,17 @@ ; ; BDVER2-SSE-LABEL: test_movsldup: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] -; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [2:0.50] +; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [7:0.50] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movsldup: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; BDVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [7:0.50] +; BDVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [2:0.50] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movsldup: ; BTVER2-SSE: # %bb.0: @@ -1504,17 +1504,17 @@ ; ; BDVER2-SSE-LABEL: test_mwait: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33] -; BDVER2-SSE-NEXT: mwait # sched: [100:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50] +; BDVER2-SSE-NEXT: mwait # sched: [100:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_mwait: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %esi, %eax # sched: [1:0.33] -; BDVER2-NEXT: movl %edi, %ecx # sched: [1:0.33] -; BDVER2-NEXT: mwait # sched: [100:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movl %esi, %eax # sched: [1:0.50] +; BDVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] +; BDVER2-NEXT: mwait # sched: [100:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_mwait: ; BTVER2-SSE: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll @@ -13,8 +13,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=+sse4.1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE @@ -107,17 +107,17 @@ ; ; BDVER2-SSE-LABEL: test_blendpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50] +; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [2:0.50] ; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_blendpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50] +; BDVER2-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [2:0.50] ; BDVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_blendpd: ; BTVER2-SSE: # %bb.0: @@ -240,17 +240,17 @@ ; ; BDVER2-SSE-LABEL: test_blendps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [2:0.50] ; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_blendps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [2:0.50] ; BDVER2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_blendps: ; BTVER2-SSE: # %bb.0: @@ -382,18 +382,18 @@ ; ; BDVER2-SSE-LABEL: test_blendvpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] -; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00] -; BDVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; BDVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50] +; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BDVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BDVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_blendvpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BDVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_blendvpd: ; BTVER2-SSE: # %bb.0: @@ -527,18 +527,18 @@ ; ; BDVER2-SSE-LABEL: test_blendvps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] -; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00] -; BDVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; BDVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] +; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BDVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BDVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_blendvps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BDVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_blendvps: ; BTVER2-SSE: # %bb.0: @@ -651,15 +651,15 @@ ; ; BDVER2-SSE-LABEL: test_dppd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [15:1.50] +; BDVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [20:1.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_dppd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [15:1.50] +; BDVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [20:1.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_dppd: ; BTVER2-SSE: # %bb.0: @@ -766,15 +766,15 @@ ; ; BDVER2-SSE-LABEL: test_dpps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] -; BDVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [25:1.50] +; BDVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [30:1.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_dpps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] -; BDVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [25:1.50] +; BDVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [30:1.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_dpps: ; BTVER2-SSE: # %bb.0: @@ -881,15 +881,15 @@ ; ; BDVER2-SSE-LABEL: test_extractps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] -; BDVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [13:1.00] +; BDVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_extractps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] -; BDVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [13:1.00] +; BDVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_extractps: ; BTVER2-SSE: # %bb.0: @@ -997,15 +997,15 @@ ; ; BDVER2-SSE-LABEL: test_insertps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [2:0.50] +; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_insertps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [2:0.50] +; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_insertps: ; BTVER2-SSE: # %bb.0: @@ -1100,13 +1100,13 @@ ; ; BDVER2-SSE-LABEL: test_movntdqa: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_movntdqa: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_movntdqa: ; BTVER2-SSE: # %bb.0: @@ -1207,15 +1207,15 @@ ; ; BDVER2-SSE-LABEL: test_mpsadbw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [9:2.00] +; BDVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [14:2.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_mpsadbw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [9:2.00] +; BDVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_mpsadbw: ; BTVER2-SSE: # %bb.0: @@ -1323,15 +1323,15 @@ ; ; BDVER2-SSE-LABEL: test_packusdw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_packusdw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_packusdw: ; BTVER2-SSE: # %bb.0: @@ -1460,18 +1460,18 @@ ; ; BDVER2-SSE-LABEL: test_pblendvb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] -; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00] -; BDVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] -; BDVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [2:0.50] +; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BDVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BDVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pblendvb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; BDVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pblendvb: ; BTVER2-SSE: # %bb.0: @@ -1596,17 +1596,17 @@ ; ; BDVER2-SSE-LABEL: test_pblendw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] +; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [2:0.50] ; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pblendw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] +; BDVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [2:0.50] ; BDVER2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pblendw: ; BTVER2-SSE: # %bb.0: @@ -1717,15 +1717,15 @@ ; ; BDVER2-SSE-LABEL: test_pcmpeqq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpeqq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcomeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpcomeqq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpcomeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpeqq: ; BTVER2-SSE: # %bb.0: @@ -1833,15 +1833,15 @@ ; ; BDVER2-SSE-LABEL: test_pextrb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] -; BDVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [13:1.00] +; BDVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pextrb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] -; BDVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [13:1.00] +; BDVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pextrb: ; BTVER2-SSE: # %bb.0: @@ -1960,17 +1960,17 @@ ; ; BDVER2-SSE-LABEL: test_pextrd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] -; BDVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [13:1.00] +; BDVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pextrd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] -; BDVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [13:1.00] +; BDVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pextrd: ; BTVER2-SSE: # %bb.0: @@ -2081,15 +2081,15 @@ ; ; BDVER2-SSE-LABEL: test_pextrq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] -; BDVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [13:1.00] +; BDVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pextrq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] -; BDVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [13:1.00] +; BDVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pextrq: ; BTVER2-SSE: # %bb.0: @@ -2195,15 +2195,15 @@ ; ; BDVER2-SSE-LABEL: test_pextrw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] -; BDVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [13:1.00] +; BDVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pextrw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] -; BDVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [13:1.00] +; BDVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [13:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pextrw: ; BTVER2-SSE: # %bb.0: @@ -2310,15 +2310,15 @@ ; ; BDVER2-SSE-LABEL: test_phminposuw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_phminposuw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_phminposuw: ; BTVER2-SSE: # %bb.0: @@ -2425,15 +2425,15 @@ ; ; BDVER2-SSE-LABEL: test_pinsrb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pinsrb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pinsrb: ; BTVER2-SSE: # %bb.0: @@ -2539,15 +2539,15 @@ ; ; BDVER2-SSE-LABEL: test_pinsrd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pinsrd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pinsrd: ; BTVER2-SSE: # %bb.0: @@ -2665,17 +2665,17 @@ ; ; BDVER2-SSE-LABEL: test_pinsrq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:0.50] +; BDVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pinsrq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:0.50] +; BDVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pinsrq: ; BTVER2-SSE: # %bb.0: @@ -2786,15 +2786,15 @@ ; ; BDVER2-SSE-LABEL: test_pmaxsb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmaxsb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmaxsb: ; BTVER2-SSE: # %bb.0: @@ -2901,15 +2901,15 @@ ; ; BDVER2-SSE-LABEL: test_pmaxsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmaxsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmaxsd: ; BTVER2-SSE: # %bb.0: @@ -3016,15 +3016,15 @@ ; ; BDVER2-SSE-LABEL: test_pmaxud: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmaxud: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmaxud: ; BTVER2-SSE: # %bb.0: @@ -3131,15 +3131,15 @@ ; ; BDVER2-SSE-LABEL: test_pmaxuw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmaxuw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmaxuw: ; BTVER2-SSE: # %bb.0: @@ -3246,15 +3246,15 @@ ; ; BDVER2-SSE-LABEL: test_pminsb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pminsb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pminsb: ; BTVER2-SSE: # %bb.0: @@ -3361,15 +3361,15 @@ ; ; BDVER2-SSE-LABEL: test_pminsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pminsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pminsd: ; BTVER2-SSE: # %bb.0: @@ -3476,15 +3476,15 @@ ; ; BDVER2-SSE-LABEL: test_pminud: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pminud: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pminud: ; BTVER2-SSE: # %bb.0: @@ -3591,15 +3591,15 @@ ; ; BDVER2-SSE-LABEL: test_pminuw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pminuw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pminuw: ; BTVER2-SSE: # %bb.0: @@ -3719,17 +3719,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovsxbw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovsxbw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] ; BDVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovsxbw: ; BTVER2-SSE: # %bb.0: @@ -3854,17 +3854,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovsxbd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovsxbd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] ; BDVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovsxbd: ; BTVER2-SSE: # %bb.0: @@ -3989,17 +3989,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovsxbq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovsxbq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] ; BDVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovsxbq: ; BTVER2-SSE: # %bb.0: @@ -4124,17 +4124,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovsxdq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovsxdq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] ; BDVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovsxdq: ; BTVER2-SSE: # %bb.0: @@ -4259,17 +4259,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovsxwd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovsxwd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] ; BDVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovsxwd: ; BTVER2-SSE: # %bb.0: @@ -4394,17 +4394,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovsxwq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovsxwq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] ; BDVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovsxwq: ; BTVER2-SSE: # %bb.0: @@ -4529,17 +4529,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovzxbw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovzxbw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] ; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [2:0.50] +; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovzxbw: ; BTVER2-SSE: # %bb.0: @@ -4664,17 +4664,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovzxbd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovzxbd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] ; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [2:0.50] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovzxbd: ; BTVER2-SSE: # %bb.0: @@ -4799,17 +4799,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovzxbq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovzxbq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] ; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [2:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovzxbq: ; BTVER2-SSE: # %bb.0: @@ -4934,17 +4934,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovzxdq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovzxdq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] ; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [2:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovzxdq: ; BTVER2-SSE: # %bb.0: @@ -5069,17 +5069,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovzxwd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovzxwd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] ; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [2:0.50] +; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovzxwd: ; BTVER2-SSE: # %bb.0: @@ -5204,17 +5204,17 @@ ; ; BDVER2-SSE-LABEL: test_pmovzxwq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] +; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [2:0.50] ; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmovzxwq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] ; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [2:0.50] +; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmovzxwq: ; BTVER2-SSE: # %bb.0: @@ -5338,17 +5338,17 @@ ; ; BDVER2-SSE-LABEL: test_pmuldq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [9:1.00] +; BDVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmuldq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [9:1.00] +; BDVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmuldq: ; BTVER2-SSE: # %bb.0: @@ -5460,15 +5460,15 @@ ; ; BDVER2-SSE-LABEL: test_pmulld: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:2.00] +; BDVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [10:2.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmulld: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BDVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmulld: ; BTVER2-SSE: # %bb.0: @@ -5622,23 +5622,23 @@ ; ; BDVER2-SSE-LABEL: test_ptest: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-SSE-NEXT: setb %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-SSE-NEXT: setb %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_ptest: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] +; BDVER2-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00] ; BDVER2-NEXT: setb %al # sched: [1:0.50] -; BDVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] +; BDVER2-NEXT: vptest (%rdi), %xmm0 # sched: [6:1.00] ; BDVER2-NEXT: setb %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33] -; BDVER2-NEXT: movzbl %cl, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] +; BDVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_ptest: ; BTVER2-SSE: # %bb.0: @@ -5776,17 +5776,17 @@ ; ; BDVER2-SSE-LABEL: test_roundpd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] +; BDVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00] ; BDVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_roundpd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] ; BDVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_roundpd: ; BTVER2-SSE: # %bb.0: @@ -5912,17 +5912,17 @@ ; ; BDVER2-SSE-LABEL: test_roundps: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] +; BDVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00] ; BDVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_roundps: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] ; BDVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_roundps: ; BTVER2-SSE: # %bb.0: @@ -6053,18 +6053,18 @@ ; ; BDVER2-SSE-LABEL: test_roundsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; BDVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] +; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] ; BDVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00] +; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_roundsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] +; BDVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] ; BDVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_roundsd: ; BTVER2-SSE: # %bb.0: @@ -6197,18 +6197,18 @@ ; ; BDVER2-SSE-LABEL: test_roundss: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] -; BDVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] +; BDVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] ; BDVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00] +; BDVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [5:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_roundss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] +; BDVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] ; BDVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_roundss: ; BTVER2-SSE: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll @@ -13,8 +13,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=+sse4.2,+pclmul -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE @@ -107,17 +107,17 @@ ; ; BDVER2-SSE-LABEL: crc32_32_8: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00] +; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [7:2.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: crc32_32_8: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33] -; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00] +; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: crc32_32_8: ; BTVER2-SSE: # %bb.0: @@ -240,17 +240,17 @@ ; ; BDVER2-SSE-LABEL: crc32_32_16: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00] -; BDVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: crc32w %si, %eax # sched: [5:2.00] +; BDVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [7:2.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: crc32_32_16: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33] -; BDVER2-NEXT: crc32w %si, %eax # sched: [3:1.00] -; BDVER2-NEXT: crc32w (%rdx), %eax # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BDVER2-NEXT: crc32w %si, %eax # sched: [5:2.00] +; BDVER2-NEXT: crc32w (%rdx), %eax # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: crc32_32_16: ; BTVER2-SSE: # %bb.0: @@ -373,17 +373,17 @@ ; ; BDVER2-SSE-LABEL: crc32_32_32: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; BDVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: crc32l %esi, %eax # sched: [6:2.00] +; BDVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [7:2.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: crc32_32_32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33] -; BDVER2-NEXT: crc32l %esi, %eax # sched: [3:1.00] -; BDVER2-NEXT: crc32l (%rdx), %eax # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50] +; BDVER2-NEXT: crc32l %esi, %eax # sched: [6:2.00] +; BDVER2-NEXT: crc32l (%rdx), %eax # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: crc32_32_32: ; BTVER2-SSE: # %bb.0: @@ -506,17 +506,17 @@ ; ; BDVER2-SSE-LABEL: crc32_64_8: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33] -; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00] +; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [7:2.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: crc32_64_8: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33] -; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00] -; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00] +; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: crc32_64_8: ; BTVER2-SSE: # %bb.0: @@ -639,17 +639,17 @@ ; ; BDVER2-SSE-LABEL: crc32_64_64: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33] -; BDVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; BDVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BDVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [10:2.00] +; BDVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [7:2.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: crc32_64_64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33] -; BDVER2-NEXT: crc32q %rsi, %rax # sched: [3:1.00] -; BDVER2-NEXT: crc32q (%rdx), %rax # sched: [8:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BDVER2-NEXT: crc32q %rsi, %rax # sched: [10:2.00] +; BDVER2-NEXT: crc32q (%rdx), %rax # sched: [7:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: crc32_64_64: ; BTVER2-SSE: # %bb.0: @@ -844,29 +844,29 @@ ; ; BDVER2-SSE-LABEL: test_pcmpestri: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33] -; BDVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67] -; BDVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.33] -; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33] -; BDVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] +; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] +; BDVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [15:4.00] +; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] +; BDVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.50] +; BDVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [20:4.50] ; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx ; BDVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpestri: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33] -; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33] -; BDVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67] -; BDVER2-NEXT: movl %ecx, %esi # sched: [1:0.33] -; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33] -; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33] -; BDVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] +; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50] +; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50] +; BDVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [15:4.00] +; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50] +; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50] +; BDVER2-NEXT: movl %ecx, %esi # sched: [1:0.50] +; BDVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [20:4.50] ; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx ; BDVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpestri: ; BTVER2-SSE: # %bb.0: @@ -1050,23 +1050,23 @@ ; ; BDVER2-SSE-LABEL: test_pcmpestrm: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33] -; BDVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67] -; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33] -; BDVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] +; BDVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00] +; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] +; BDVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [15:4.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpestrm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33] -; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33] -; BDVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67] -; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33] -; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33] -; BDVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50] +; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50] +; BDVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00] +; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50] +; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50] +; BDVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [15:4.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpestrm: ; BTVER2-SSE: # %bb.0: @@ -1225,21 +1225,21 @@ ; ; BDVER2-SSE-LABEL: test_pcmpistri: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] -; BDVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.33] -; BDVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] +; BDVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [14:1.00] +; BDVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.50] +; BDVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [19:1.00] ; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx ; BDVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpistri: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] -; BDVER2-NEXT: movl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] +; BDVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [14:1.00] +; BDVER2-NEXT: movl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [19:1.00] ; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx ; BDVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpistri: ; BTVER2-SSE: # %bb.0: @@ -1359,15 +1359,15 @@ ; ; BDVER2-SSE-LABEL: test_pcmpistrm: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; BDVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [6:1.00] +; BDVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpistrm: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; BDVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [6:1.00] +; BDVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpistrm: ; BTVER2-SSE: # %bb.0: @@ -1474,15 +1474,15 @@ ; ; BDVER2-SSE-LABEL: test_pcmpgtq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [7:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pcmpgtq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpcomgtq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pcmpgtq: ; BTVER2-SSE: # %bb.0: @@ -1590,15 +1590,15 @@ ; ; BDVER2-SSE-LABEL: test_pclmulqdq: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00] -; BDVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [12:1.00] +; BDVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [17:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pclmulqdq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [14:6.00] -; BDVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [14:5.67] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [13:1.00] +; BDVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [17:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pclmulqdq: ; BTVER2-SSE: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse4a-schedule.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1 @@ -12,8 +12,8 @@ ; ; BDVER2-LABEL: test_extrq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: extrq %xmm1, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_extrq: ; BTVER2: # %bb.0: @@ -37,8 +37,8 @@ ; ; BDVER2-LABEL: test_extrqi: ; BDVER2: # %bb.0: -; BDVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: extrq $2, $3, %xmm0 # sched: [3:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_extrqi: ; BTVER2: # %bb.0: @@ -62,8 +62,8 @@ ; ; BDVER2-LABEL: test_insertq: ; BDVER2: # %bb.0: -; BDVER2-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: insertq %xmm1, %xmm0 # sched: [3:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_insertq: ; BTVER2: # %bb.0: @@ -87,8 +87,8 @@ ; ; BDVER2-LABEL: test_insertqi: ; BDVER2: # %bb.0: -; BDVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [3:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_insertqi: ; BTVER2: # %bb.0: @@ -112,8 +112,8 @@ ; ; BDVER2-LABEL: test_movntsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [3:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movntsd: ; BTVER2: # %bb.0: @@ -137,8 +137,8 @@ ; ; BDVER2-LABEL: test_movntss: ; BDVER2: # %bb.0: -; BDVER2-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: movntss %xmm0, (%rdi) # sched: [3:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-LABEL: test_movntss: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll +++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll @@ -14,8 +14,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+ssse3 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE @@ -117,17 +117,17 @@ ; ; BDVER2-SSE-LABEL: test_pabsb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pabsb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] ; BDVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pabsb: ; BTVER2-SSE: # %bb.0: @@ -260,17 +260,17 @@ ; ; BDVER2-SSE-LABEL: test_pabsd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pabsd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] ; BDVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pabsd: ; BTVER2-SSE: # %bb.0: @@ -403,17 +403,17 @@ ; ; BDVER2-SSE-LABEL: test_pabsw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] +; BDVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [2:0.50] ; BDVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pabsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] ; BDVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pabsw: ; BTVER2-SSE: # %bb.0: @@ -541,16 +541,16 @@ ; ; BDVER2-SSE-LABEL: test_palignr: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] +; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [2:0.50] ; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] -; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [2:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_palignr: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] +; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [2:0.50] ; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_palignr: ; BTVER2-SSE: # %bb.0: @@ -664,15 +664,15 @@ ; ; BDVER2-SSE-LABEL: test_phaddd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] -; BDVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [10:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_phaddd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; BDVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_phaddd: ; BTVER2-SSE: # %bb.0: @@ -785,15 +785,15 @@ ; ; BDVER2-SSE-LABEL: test_phaddsw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50] -; BDVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [10:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_phaddsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; BDVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_phaddsw: ; BTVER2-SSE: # %bb.0: @@ -906,15 +906,15 @@ ; ; BDVER2-SSE-LABEL: test_phaddw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50] -; BDVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [10:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_phaddw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; BDVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_phaddw: ; BTVER2-SSE: # %bb.0: @@ -1027,15 +1027,15 @@ ; ; BDVER2-SSE-LABEL: test_phsubd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] -; BDVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [10:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_phsubd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; BDVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_phsubd: ; BTVER2-SSE: # %bb.0: @@ -1148,15 +1148,15 @@ ; ; BDVER2-SSE-LABEL: test_phsubsw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50] -; BDVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [10:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_phsubsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; BDVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_phsubsw: ; BTVER2-SSE: # %bb.0: @@ -1269,15 +1269,15 @@ ; ; BDVER2-SSE-LABEL: test_phsubw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50] -; BDVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [5:0.50] +; BDVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [10:0.50] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_phsubw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; BDVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [5:0.50] +; BDVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_phsubw: ; BTVER2-SSE: # %bb.0: @@ -1390,15 +1390,15 @@ ; ; BDVER2-SSE-LABEL: test_pmaddubsw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmaddubsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmaddubsw: ; BTVER2-SSE: # %bb.0: @@ -1512,15 +1512,15 @@ ; ; BDVER2-SSE-LABEL: test_pmulhrsw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00] +; BDVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [9:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pmulhrsw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pmulhrsw: ; BTVER2-SSE: # %bb.0: @@ -1633,15 +1633,15 @@ ; ; BDVER2-SSE-LABEL: test_pshufb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [3:2.00] +; BDVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [8:2.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_pshufb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BDVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_pshufb: ; BTVER2-SSE: # %bb.0: @@ -1758,15 +1758,15 @@ ; ; BDVER2-SSE-LABEL: test_psignb: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psignb: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psignb: ; BTVER2-SSE: # %bb.0: @@ -1883,15 +1883,15 @@ ; ; BDVER2-SSE-LABEL: test_psignd: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psignd: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psignd: ; BTVER2-SSE: # %bb.0: @@ -2008,15 +2008,15 @@ ; ; BDVER2-SSE-LABEL: test_psignw: ; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] +; BDVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [2:0.50] ; BDVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [1:1.00] +; BDVER2-SSE-NEXT: retq # sched: [5:1.00] ; ; BDVER2-LABEL: test_psignw: ; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BTVER2-SSE-LABEL: test_psignw: ; BTVER2-SSE: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/tbm-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/tbm-schedule.ll +++ llvm/trunk/test/CodeGen/X86/tbm-schedule.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4 @@ -16,12 +16,12 @@ ; ; BDVER2-LABEL: test_x86_tbm_bextri_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; BDVER2-NEXT: # sched: [2:1.00] ; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: # sched: [6:0.50] +; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; BDVER2-NEXT: # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_bextri_u32: ; BDVER3: # %bb.0: @@ -57,12 +57,12 @@ ; ; BDVER2-LABEL: test_x86_tbm_bextri_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 -; BDVER2-NEXT: # sched: [2:1.00] ; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04 -; BDVER2-NEXT: # sched: [7:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: # sched: [6:0.50] +; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; BDVER2-NEXT: # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_bextri_u64: ; BDVER3: # %bb.0: @@ -96,10 +96,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blcfill_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blcfilll %edi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blcfilll %edi, %ecx # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blcfill_u32: ; BDVER3: # %bb.0: @@ -133,10 +133,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blcfill_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blcfillq %rdi, %rcx # sched: [1:0.33] ; BDVER2-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blcfillq %rdi, %rcx # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blcfill_u64: ; BDVER3: # %bb.0: @@ -170,10 +170,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blci_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blcil %edi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: blcil (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blcil %edi, %ecx # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blci_u32: ; BDVER3: # %bb.0: @@ -209,10 +209,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blci_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blciq %rdi, %rcx # sched: [1:0.33] ; BDVER2-NEXT: blciq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blciq %rdi, %rcx # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blci_u64: ; BDVER3: # %bb.0: @@ -248,10 +248,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blcic_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blcicl %edi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: blcicl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blcicl %edi, %ecx # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blcic_u32: ; BDVER3: # %bb.0: @@ -287,10 +287,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blcic_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blcicq %rdi, %rcx # sched: [1:0.33] ; BDVER2-NEXT: blcicq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blcicq %rdi, %rcx # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blcic_u64: ; BDVER3: # %bb.0: @@ -326,10 +326,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blcmsk_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blcmskl %edi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blcmskl %edi, %ecx # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blcmsk_u32: ; BDVER3: # %bb.0: @@ -363,10 +363,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blcmsk_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blcmskq %rdi, %rcx # sched: [1:0.33] ; BDVER2-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blcmskq %rdi, %rcx # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blcmsk_u64: ; BDVER3: # %bb.0: @@ -400,10 +400,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blcs_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blcsl %edi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: blcsl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blcsl %edi, %ecx # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blcs_u32: ; BDVER3: # %bb.0: @@ -437,10 +437,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blcs_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blcsq %rdi, %rcx # sched: [1:0.33] ; BDVER2-NEXT: blcsq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blcsq %rdi, %rcx # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blcs_u64: ; BDVER3: # %bb.0: @@ -474,10 +474,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blsfill_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blsfilll %edi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsfilll %edi, %ecx # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blsfill_u32: ; BDVER3: # %bb.0: @@ -511,10 +511,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blsfill_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blsfillq %rdi, %rcx # sched: [1:0.33] ; BDVER2-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsfillq %rdi, %rcx # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blsfill_u64: ; BDVER3: # %bb.0: @@ -548,10 +548,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blsic_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blsicl %edi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: blsicl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsicl %edi, %ecx # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blsic_u32: ; BDVER3: # %bb.0: @@ -587,10 +587,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_blsic_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: blsicq %rdi, %rcx # sched: [1:0.33] ; BDVER2-NEXT: blsicq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: blsicq %rdi, %rcx # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_blsic_u64: ; BDVER3: # %bb.0: @@ -626,10 +626,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_t1mskc_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: t1mskcl %edi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: t1mskcl %edi, %ecx # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_t1mskc_u32: ; BDVER3: # %bb.0: @@ -665,10 +665,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_t1mskc_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: t1mskcq %rdi, %rcx # sched: [1:0.33] ; BDVER2-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: t1mskcq %rdi, %rcx # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_t1mskc_u64: ; BDVER3: # %bb.0: @@ -704,10 +704,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_tzmsk_u32: ; BDVER2: # %bb.0: -; BDVER2-NEXT: tzmskl %edi, %ecx # sched: [1:0.33] ; BDVER2-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: tzmskl %edi, %ecx # sched: [2:0.50] +; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_tzmsk_u32: ; BDVER3: # %bb.0: @@ -743,10 +743,10 @@ ; ; BDVER2-LABEL: test_x86_tbm_tzmsk_u64: ; BDVER2: # %bb.0: -; BDVER2-NEXT: tzmskq %rdi, %rcx # sched: [1:0.33] ; BDVER2-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33] -; BDVER2-NEXT: retq # sched: [1:1.00] +; BDVER2-NEXT: tzmskq %rdi, %rcx # sched: [2:0.50] +; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BDVER2-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_x86_tbm_tzmsk_u64: ; BDVER3: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/wide-fma-contraction.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/wide-fma-contraction.ll +++ llvm/trunk/test/CodeGen/X86/wide-fma-contraction.ll @@ -30,8 +30,8 @@ ; CHECK-NOFMA-NEXT: andl $-32, %esp ; CHECK-NOFMA-NEXT: subl $32, %esp ; CHECK-NOFMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 -; CHECK-NOFMA-NEXT: vaddps 8(%ebp), %ymm0, %ymm0 ; CHECK-NOFMA-NEXT: vmulps %ymm3, %ymm1, %ymm1 +; CHECK-NOFMA-NEXT: vaddps 8(%ebp), %ymm0, %ymm0 ; CHECK-NOFMA-NEXT: vaddps 40(%ebp), %ymm1, %ymm1 ; CHECK-NOFMA-NEXT: movl %ebp, %esp ; CHECK-NOFMA-NEXT: popl %ebp Index: llvm/trunk/test/CodeGen/X86/x87-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/x87-schedule.ll +++ llvm/trunk/test/CodeGen/X86/x87-schedule.ll @@ -8,7 +8,7 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 @@ -72,9 +72,9 @@ ; BDVER2-LABEL: test_f2xm1: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: f2xm1 # sched: [100:0.33] +; BDVER2-NEXT: f2xm1 # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_f2xm1: ; BTVER2: # %bb.0: @@ -155,7 +155,7 @@ ; BDVER2-NEXT: #APP ; BDVER2-NEXT: fabs # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fabs: ; BTVER2: # %bb.0: @@ -276,12 +276,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fadd %st(0), %st(1) # sched: [3:1.00] -; BDVER2-NEXT: fadd %st(2) # sched: [3:1.00] +; BDVER2-NEXT: fadd %st(0), %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fadd %st(2) # sched: [5:1.00] ; BDVER2-NEXT: fadds (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: faddl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fadd: ; BTVER2: # %bb.0: @@ -412,12 +412,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: faddp %st(1) # sched: [3:1.00] -; BDVER2-NEXT: faddp %st(2) # sched: [3:1.00] -; BDVER2-NEXT: fiadds (%ecx) # sched: [13:2.00] -; BDVER2-NEXT: fiaddl (%eax) # sched: [13:2.00] +; BDVER2-NEXT: faddp %st(1) # sched: [5:1.00] +; BDVER2-NEXT: faddp %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fiadds (%ecx) # sched: [10:1.00] +; BDVER2-NEXT: fiaddl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_faddp_fiadd: ; BTVER2: # %bb.0: @@ -523,10 +523,10 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fbld (%eax) # sched: [100:0.33] -; BDVER2-NEXT: fbstp (%eax) # sched: [100:0.33] +; BDVER2-NEXT: fbld (%eax) # sched: [100:0.50] +; BDVER2-NEXT: fbstp (%eax) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fbld_fbstp: ; BTVER2: # %bb.0: @@ -611,7 +611,7 @@ ; BDVER2-NEXT: #APP ; BDVER2-NEXT: fchs # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fchs: ; BTVER2: # %bb.0: @@ -698,10 +698,10 @@ ; BDVER2-LABEL: test_fclex: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.33] -; BDVER2-NEXT: fnclex # sched: [100:0.33] +; BDVER2-NEXT: wait # sched: [100:0.50] +; BDVER2-NEXT: fnclex # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fclex: ; BTVER2: # %bb.0: @@ -782,9 +782,9 @@ ; BDVER2-LABEL: test_fnclex: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fnclex # sched: [100:0.33] +; BDVER2-NEXT: fnclex # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fnclex: ; BTVER2: # %bb.0: @@ -919,16 +919,16 @@ ; BDVER2-LABEL: test_fcmov: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcmovb %st(1), %st(0) # sched: [3:2.00] -; BDVER2-NEXT: fcmovbe %st(1), %st(0) # sched: [3:2.00] -; BDVER2-NEXT: fcmove %st(1), %st(0) # sched: [3:2.00] -; BDVER2-NEXT: fcmovnb %st(1), %st(0) # sched: [3:2.00] -; BDVER2-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:2.00] -; BDVER2-NEXT: fcmovne %st(1), %st(0) # sched: [3:2.00] -; BDVER2-NEXT: fcmovnu %st(1), %st(0) # sched: [3:2.00] -; BDVER2-NEXT: fcmovu %st(1), %st(0) # sched: [3:2.00] +; BDVER2-NEXT: fcmovb %st(1), %st(0) # sched: [1:1.00] +; BDVER2-NEXT: fcmovbe %st(1), %st(0) # sched: [1:1.00] +; BDVER2-NEXT: fcmove %st(1), %st(0) # sched: [1:1.00] +; BDVER2-NEXT: fcmovnb %st(1), %st(0) # sched: [1:1.00] +; BDVER2-NEXT: fcmovnbe %st(1), %st(0) # sched: [1:1.00] +; BDVER2-NEXT: fcmovne %st(1), %st(0) # sched: [1:1.00] +; BDVER2-NEXT: fcmovnu %st(1), %st(0) # sched: [1:1.00] +; BDVER2-NEXT: fcmovu %st(1), %st(0) # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fcmov: ; BTVER2: # %bb.0: @@ -1065,10 +1065,10 @@ ; BDVER2-NEXT: #APP ; BDVER2-NEXT: fcom %st(1) # sched: [1:1.00] ; BDVER2-NEXT: fcom %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fcoms (%ecx) # sched: [8:1.00] -; BDVER2-NEXT: fcoml (%eax) # sched: [8:1.00] +; BDVER2-NEXT: fcoms (%ecx) # sched: [6:1.00] +; BDVER2-NEXT: fcoml (%eax) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fcom: ; BTVER2: # %bb.0: @@ -1209,11 +1209,11 @@ ; BDVER2-NEXT: #APP ; BDVER2-NEXT: fcomp %st(1) # sched: [1:1.00] ; BDVER2-NEXT: fcomp %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fcomps (%ecx) # sched: [8:1.00] -; BDVER2-NEXT: fcompl (%eax) # sched: [8:1.00] -; BDVER2-NEXT: fcompp # sched: [100:0.33] +; BDVER2-NEXT: fcomps (%ecx) # sched: [6:1.00] +; BDVER2-NEXT: fcompl (%eax) # sched: [6:1.00] +; BDVER2-NEXT: fcompp # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fcomp_fcompp: ; BTVER2: # %bb.0: @@ -1312,10 +1312,10 @@ ; BDVER2-LABEL: test_fcomi_fcomip: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcomi %st(3) # sched: [3:1.00] -; BDVER2-NEXT: fcompi %st(3) # sched: [3:1.00] +; BDVER2-NEXT: fcomi %st(3) # sched: [1:1.00] +; BDVER2-NEXT: fcompi %st(3) # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fcomi_fcomip: ; BTVER2: # %bb.0: @@ -1396,9 +1396,9 @@ ; BDVER2-LABEL: test_fcos: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fcos # sched: [100:0.33] +; BDVER2-NEXT: fcos # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fcos: ; BTVER2: # %bb.0: @@ -1477,9 +1477,9 @@ ; BDVER2-LABEL: test_fdecstp: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdecstp # sched: [1:1.00] +; BDVER2-NEXT: fdecstp # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fdecstp: ; BTVER2: # %bb.0: @@ -1600,12 +1600,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdiv %st(0), %st(1) # sched: [14:14.00] -; BDVER2-NEXT: fdiv %st(2) # sched: [14:14.00] -; BDVER2-NEXT: fdivs (%ecx) # sched: [31:1.00] -; BDVER2-NEXT: fdivl (%eax) # sched: [31:1.00] +; BDVER2-NEXT: fdiv %st(0), %st(1) # sched: [9:9.50] +; BDVER2-NEXT: fdiv %st(2) # sched: [9:9.50] +; BDVER2-NEXT: fdivs (%ecx) # sched: [14:9.50] +; BDVER2-NEXT: fdivl (%eax) # sched: [14:9.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fdiv: ; BTVER2: # %bb.0: @@ -1736,12 +1736,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdivp %st(1) # sched: [14:14.00] -; BDVER2-NEXT: fdivp %st(2) # sched: [14:14.00] -; BDVER2-NEXT: fidivs (%ecx) # sched: [34:1.00] -; BDVER2-NEXT: fidivl (%eax) # sched: [34:1.00] +; BDVER2-NEXT: fdivp %st(1) # sched: [9:9.50] +; BDVER2-NEXT: fdivp %st(2) # sched: [9:9.50] +; BDVER2-NEXT: fidivs (%ecx) # sched: [14:9.50] +; BDVER2-NEXT: fidivl (%eax) # sched: [14:9.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fdivp_fidiv: ; BTVER2: # %bb.0: @@ -1872,12 +1872,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdivr %st(0), %st(1) # sched: [14:14.00] -; BDVER2-NEXT: fdivr %st(2) # sched: [14:14.00] -; BDVER2-NEXT: fdivrs (%ecx) # sched: [31:1.00] -; BDVER2-NEXT: fdivrl (%eax) # sched: [31:1.00] +; BDVER2-NEXT: fdivr %st(0), %st(1) # sched: [9:9.50] +; BDVER2-NEXT: fdivr %st(2) # sched: [9:9.50] +; BDVER2-NEXT: fdivrs (%ecx) # sched: [14:9.50] +; BDVER2-NEXT: fdivrl (%eax) # sched: [14:9.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fdivr: ; BTVER2: # %bb.0: @@ -2008,12 +2008,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fdivrp %st(1) # sched: [14:14.00] -; BDVER2-NEXT: fdivrp %st(2) # sched: [14:14.00] -; BDVER2-NEXT: fidivrs (%ecx) # sched: [34:1.00] -; BDVER2-NEXT: fidivrl (%eax) # sched: [34:1.00] +; BDVER2-NEXT: fdivrp %st(1) # sched: [9:9.50] +; BDVER2-NEXT: fdivrp %st(2) # sched: [9:9.50] +; BDVER2-NEXT: fidivrs (%ecx) # sched: [14:9.50] +; BDVER2-NEXT: fidivrl (%eax) # sched: [14:9.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fdivrp_fidivr: ; BTVER2: # %bb.0: @@ -2102,9 +2102,9 @@ ; BDVER2-LABEL: test_ffree: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: ffree %st(0) # sched: [1:1.00] +; BDVER2-NEXT: ffree %st(0) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_ffree: ; BTVER2: # %bb.0: @@ -2225,12 +2225,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: ficoms (%ecx) # sched: [11:2.00] -; BDVER2-NEXT: ficoml (%eax) # sched: [11:2.00] -; BDVER2-NEXT: ficomps (%ecx) # sched: [11:2.00] -; BDVER2-NEXT: ficompl (%eax) # sched: [11:2.00] +; BDVER2-NEXT: ficoms (%ecx) # sched: [6:1.00] +; BDVER2-NEXT: ficoml (%eax) # sched: [6:1.00] +; BDVER2-NEXT: ficomps (%ecx) # sched: [6:1.00] +; BDVER2-NEXT: ficompl (%eax) # sched: [6:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_ficom: ; BTVER2: # %bb.0: @@ -2362,11 +2362,11 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: filds (%edx) # sched: [10:1.00] -; BDVER2-NEXT: fildl (%ecx) # sched: [10:1.00] -; BDVER2-NEXT: fildll (%eax) # sched: [10:1.00] +; BDVER2-NEXT: filds (%edx) # sched: [5:0.50] +; BDVER2-NEXT: fildl (%ecx) # sched: [5:0.50] +; BDVER2-NEXT: fildll (%eax) # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fild: ; BTVER2: # %bb.0: @@ -2455,9 +2455,9 @@ ; BDVER2-LABEL: test_fincstp: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fincstp # sched: [1:1.00] +; BDVER2-NEXT: fincstp # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fincstp: ; BTVER2: # %bb.0: @@ -2544,10 +2544,10 @@ ; BDVER2-LABEL: test_finit: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.33] -; BDVER2-NEXT: fninit # sched: [5:1.33] +; BDVER2-NEXT: wait # sched: [100:0.50] +; BDVER2-NEXT: fninit # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_finit: ; BTVER2: # %bb.0: @@ -2628,9 +2628,9 @@ ; BDVER2-LABEL: test_fninit: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fninit # sched: [5:1.33] +; BDVER2-NEXT: fninit # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fninit: ; BTVER2: # %bb.0: @@ -2792,16 +2792,16 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fists (%edx) # sched: [9:1.00] -; BDVER2-NEXT: fistl (%ecx) # sched: [9:1.00] -; BDVER2-NEXT: fistps (%edx) # sched: [9:1.00] -; BDVER2-NEXT: fistpl (%ecx) # sched: [9:1.00] -; BDVER2-NEXT: fistpll (%eax) # sched: [9:1.00] -; BDVER2-NEXT: fisttps (%edx) # sched: [5:1.00] -; BDVER2-NEXT: fisttpl (%ecx) # sched: [5:1.00] -; BDVER2-NEXT: fisttpll (%eax) # sched: [5:1.00] +; BDVER2-NEXT: fists (%edx) # sched: [1:0.50] +; BDVER2-NEXT: fistl (%ecx) # sched: [1:0.50] +; BDVER2-NEXT: fistps (%edx) # sched: [1:0.50] +; BDVER2-NEXT: fistpl (%ecx) # sched: [1:0.50] +; BDVER2-NEXT: fistpll (%eax) # sched: [1:0.50] +; BDVER2-NEXT: fisttps (%edx) # sched: [1:0.50] +; BDVER2-NEXT: fisttpl (%ecx) # sched: [1:0.50] +; BDVER2-NEXT: fisttpll (%eax) # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fist_fistp_fisttp: ; BTVER2: # %bb.0: @@ -2951,12 +2951,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fld %st(0) # sched: [1:1.00] -; BDVER2-NEXT: flds (%edx) # sched: [9:1.00] -; BDVER2-NEXT: fldl (%ecx) # sched: [9:1.00] -; BDVER2-NEXT: fldt (%eax) # sched: [9:1.00] +; BDVER2-NEXT: fld %st(0) # sched: [1:0.50] +; BDVER2-NEXT: flds (%edx) # sched: [5:0.50] +; BDVER2-NEXT: fldl (%ecx) # sched: [5:0.50] +; BDVER2-NEXT: fldt (%eax) # sched: [5:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fld: ; BTVER2: # %bb.0: @@ -3064,10 +3064,10 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fldcw (%eax) # sched: [8:2.00] -; BDVER2-NEXT: fldenv (%eax) # sched: [100:0.33] +; BDVER2-NEXT: fldcw (%eax) # sched: [5:0.50] +; BDVER2-NEXT: fldenv (%eax) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fldcw_fldenv: ; BTVER2: # %bb.0: @@ -3198,15 +3198,15 @@ ; BDVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fld1 # sched: [1:1.00] -; BDVER2-NEXT: fldl2e # sched: [1:1.00] -; BDVER2-NEXT: fldl2t # sched: [1:1.00] -; BDVER2-NEXT: fldlg2 # sched: [1:1.00] -; BDVER2-NEXT: fldln2 # sched: [1:1.00] -; BDVER2-NEXT: fldpi # sched: [1:1.00] -; BDVER2-NEXT: fldz # sched: [1:1.00] +; BDVER2-NEXT: fld1 # sched: [3:1.00] +; BDVER2-NEXT: fldl2e # sched: [3:1.00] +; BDVER2-NEXT: fldl2t # sched: [3:1.00] +; BDVER2-NEXT: fldlg2 # sched: [3:1.00] +; BDVER2-NEXT: fldln2 # sched: [3:1.00] +; BDVER2-NEXT: fldpi # sched: [3:1.00] +; BDVER2-NEXT: fldz # sched: [3:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz: ; BTVER2: # %bb.0: @@ -3341,10 +3341,10 @@ ; BDVER2-NEXT: #APP ; BDVER2-NEXT: fmul %st(0), %st(1) # sched: [5:1.00] ; BDVER2-NEXT: fmul %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fmuls (%ecx) # sched: [12:1.00] -; BDVER2-NEXT: fmull (%eax) # sched: [12:1.00] +; BDVER2-NEXT: fmuls (%ecx) # sched: [10:1.00] +; BDVER2-NEXT: fmull (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fmul: ; BTVER2: # %bb.0: @@ -3477,10 +3477,10 @@ ; BDVER2-NEXT: #APP ; BDVER2-NEXT: fmulp %st(1) # sched: [5:1.00] ; BDVER2-NEXT: fmulp %st(2) # sched: [5:1.00] -; BDVER2-NEXT: fimuls (%ecx) # sched: [15:1.00] -; BDVER2-NEXT: fimull (%eax) # sched: [15:1.00] +; BDVER2-NEXT: fimuls (%ecx) # sched: [10:1.00] +; BDVER2-NEXT: fimull (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fmulp_fimul: ; BTVER2: # %bb.0: @@ -3569,9 +3569,9 @@ ; BDVER2-LABEL: test_fnop: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fnop # sched: [1:1.00] +; BDVER2-NEXT: fnop # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fnop: ; BTVER2: # %bb.0: @@ -3650,9 +3650,9 @@ ; BDVER2-LABEL: test_fpatan: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fpatan # sched: [100:0.33] +; BDVER2-NEXT: fpatan # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fpatan: ; BTVER2: # %bb.0: @@ -3739,10 +3739,10 @@ ; BDVER2-LABEL: test_fprem_fprem1: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fprem # sched: [100:0.33] -; BDVER2-NEXT: fprem1 # sched: [100:0.33] +; BDVER2-NEXT: fprem # sched: [100:0.50] +; BDVER2-NEXT: fprem1 # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fprem_fprem1: ; BTVER2: # %bb.0: @@ -3823,9 +3823,9 @@ ; BDVER2-LABEL: test_fptan: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fptan # sched: [100:0.33] +; BDVER2-NEXT: fptan # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fptan: ; BTVER2: # %bb.0: @@ -3904,9 +3904,9 @@ ; BDVER2-LABEL: test_frndint: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: frndint # sched: [100:0.33] +; BDVER2-NEXT: frndint # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_frndint: ; BTVER2: # %bb.0: @@ -3994,9 +3994,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: frstor (%eax) # sched: [100:0.33] +; BDVER2-NEXT: frstor (%eax) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_frstor: ; BTVER2: # %bb.0: @@ -4094,10 +4094,10 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.33] -; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.33] +; BDVER2-NEXT: wait # sched: [100:0.50] +; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fsave: ; BTVER2: # %bb.0: @@ -4189,9 +4189,9 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.33] +; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fnsave: ; BTVER2: # %bb.0: @@ -4272,9 +4272,9 @@ ; BDVER2-LABEL: test_fscale: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fscale # sched: [100:0.33] +; BDVER2-NEXT: fscale # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fscale: ; BTVER2: # %bb.0: @@ -4353,9 +4353,9 @@ ; BDVER2-LABEL: test_fsin: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsin # sched: [100:0.33] +; BDVER2-NEXT: fsin # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fsin: ; BTVER2: # %bb.0: @@ -4434,9 +4434,9 @@ ; BDVER2-LABEL: test_fsincos: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsincos # sched: [100:0.33] +; BDVER2-NEXT: fsincos # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fsincos: ; BTVER2: # %bb.0: @@ -4515,9 +4515,9 @@ ; BDVER2-LABEL: test_fsqrt: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsqrt # sched: [24:24.00] +; BDVER2-NEXT: fsqrt # sched: [1:17.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fsqrt: ; BTVER2: # %bb.0: @@ -4671,15 +4671,15 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fst %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fsts (%edx) # sched: [6:1.00] -; BDVER2-NEXT: fstl (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: fstp %st(0) # sched: [1:1.00] -; BDVER2-NEXT: fstpl (%edx) # sched: [6:1.00] -; BDVER2-NEXT: fstpl (%ecx) # sched: [6:1.00] -; BDVER2-NEXT: fstpt (%eax) # sched: [6:1.00] +; BDVER2-NEXT: fst %st(0) # sched: [1:0.50] +; BDVER2-NEXT: fsts (%edx) # sched: [1:0.50] +; BDVER2-NEXT: fstl (%ecx) # sched: [1:0.50] +; BDVER2-NEXT: fstp %st(0) # sched: [1:0.50] +; BDVER2-NEXT: fstpl (%edx) # sched: [1:0.50] +; BDVER2-NEXT: fstpl (%ecx) # sched: [1:0.50] +; BDVER2-NEXT: fstpt (%eax) # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fst_fstp: ; BTVER2: # %bb.0: @@ -4825,14 +4825,14 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.33] -; BDVER2-NEXT: fnstcw (%eax) # sched: [7:1.00] -; BDVER2-NEXT: wait # sched: [100:0.33] -; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.33] -; BDVER2-NEXT: wait # sched: [100:0.33] -; BDVER2-NEXT: fnstsw (%eax) # sched: [7:1.00] +; BDVER2-NEXT: wait # sched: [100:0.50] +; BDVER2-NEXT: fnstcw (%eax) # sched: [1:0.50] +; BDVER2-NEXT: wait # sched: [100:0.50] +; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.50] +; BDVER2-NEXT: wait # sched: [100:0.50] +; BDVER2-NEXT: fnstsw (%eax) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fstcw_fstenv_fstsw: ; BTVER2: # %bb.0: @@ -4948,11 +4948,11 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fnstcw (%eax) # sched: [7:1.00] -; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.33] -; BDVER2-NEXT: fnstsw (%eax) # sched: [7:1.00] +; BDVER2-NEXT: fnstcw (%eax) # sched: [1:0.50] +; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.50] +; BDVER2-NEXT: fnstsw (%eax) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fnstcw_fnstenv_fnstsw: ; BTVER2: # %bb.0: @@ -5079,12 +5079,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsub %st(0), %st(1) # sched: [3:1.00] -; BDVER2-NEXT: fsub %st(2) # sched: [3:1.00] +; BDVER2-NEXT: fsub %st(0), %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fsub %st(2) # sched: [5:1.00] ; BDVER2-NEXT: fsubs (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: fsubl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fsub: ; BTVER2: # %bb.0: @@ -5215,12 +5215,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsubp %st(1) # sched: [3:1.00] -; BDVER2-NEXT: fsubp %st(2) # sched: [3:1.00] -; BDVER2-NEXT: fisubs (%ecx) # sched: [13:2.00] -; BDVER2-NEXT: fisubl (%eax) # sched: [13:2.00] +; BDVER2-NEXT: fsubp %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fsubp %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fisubs (%ecx) # sched: [10:1.00] +; BDVER2-NEXT: fisubl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fsubp_fisub: ; BTVER2: # %bb.0: @@ -5351,12 +5351,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00] -; BDVER2-NEXT: fsubr %st(2) # sched: [3:1.00] +; BDVER2-NEXT: fsubr %st(0), %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fsubr %st(2) # sched: [5:1.00] ; BDVER2-NEXT: fsubrs (%ecx) # sched: [10:1.00] ; BDVER2-NEXT: fsubrl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fsubr: ; BTVER2: # %bb.0: @@ -5487,12 +5487,12 @@ ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fsubrp %st(1) # sched: [3:1.00] -; BDVER2-NEXT: fsubrp %st(2) # sched: [3:1.00] -; BDVER2-NEXT: fisubrs (%ecx) # sched: [13:2.00] -; BDVER2-NEXT: fisubrl (%eax) # sched: [13:2.00] +; BDVER2-NEXT: fsubrp %st(1) # sched: [5:1.00] +; BDVER2-NEXT: fsubrp %st(2) # sched: [5:1.00] +; BDVER2-NEXT: fisubrs (%ecx) # sched: [10:1.00] +; BDVER2-NEXT: fisubrl (%eax) # sched: [10:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fsubrp_fisubr: ; BTVER2: # %bb.0: @@ -5581,9 +5581,9 @@ ; BDVER2-LABEL: test_ftst: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: ftst # sched: [3:1.00] +; BDVER2-NEXT: ftst # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_ftst: ; BTVER2: # %bb.0: @@ -5698,9 +5698,9 @@ ; BDVER2-NEXT: fucom %st(3) # sched: [1:1.00] ; BDVER2-NEXT: fucomp %st(1) # sched: [1:1.00] ; BDVER2-NEXT: fucomp %st(3) # sched: [1:1.00] -; BDVER2-NEXT: fucompp # sched: [3:1.00] +; BDVER2-NEXT: fucompp # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fucom_fucomp_fucompp: ; BTVER2: # %bb.0: @@ -5795,10 +5795,10 @@ ; BDVER2-LABEL: test_fucomi_fucomip: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fucomi %st(3) # sched: [3:1.00] -; BDVER2-NEXT: fucompi %st(3) # sched: [3:1.00] +; BDVER2-NEXT: fucomi %st(3) # sched: [1:1.00] +; BDVER2-NEXT: fucompi %st(3) # sched: [1:1.00] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fucomi_fucomip: ; BTVER2: # %bb.0: @@ -5879,9 +5879,9 @@ ; BDVER2-LABEL: test_fwait: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: wait # sched: [100:0.33] +; BDVER2-NEXT: wait # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fwait: ; BTVER2: # %bb.0: @@ -5960,9 +5960,9 @@ ; BDVER2-LABEL: test_fxam: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fxam # sched: [100:0.33] +; BDVER2-NEXT: fxam # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fxam: ; BTVER2: # %bb.0: @@ -6049,10 +6049,10 @@ ; BDVER2-LABEL: test_fxch: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fxch %st(1) # sched: [1:0.33] -; BDVER2-NEXT: fxch %st(3) # sched: [1:0.33] +; BDVER2-NEXT: fxch %st(1) # sched: [1:0.50] +; BDVER2-NEXT: fxch %st(3) # sched: [1:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fxch: ; BTVER2: # %bb.0: @@ -6150,10 +6150,10 @@ ; BDVER2: # %bb.0: ; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50] ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fxrstor (%eax) # sched: [5:2.00] -; BDVER2-NEXT: fxsave (%eax) # sched: [100:0.33] +; BDVER2-NEXT: fxrstor (%eax) # sched: [100:0.50] +; BDVER2-NEXT: fxsave (%eax) # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fxrstor_fxsave: ; BTVER2: # %bb.0: @@ -6236,9 +6236,9 @@ ; BDVER2-LABEL: test_fxtract: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fxtract # sched: [100:0.33] +; BDVER2-NEXT: fxtract # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fxtract: ; BTVER2: # %bb.0: @@ -6317,9 +6317,9 @@ ; BDVER2-LABEL: test_fyl2x: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fyl2x # sched: [100:0.33] +; BDVER2-NEXT: fyl2x # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fyl2x: ; BTVER2: # %bb.0: @@ -6398,9 +6398,9 @@ ; BDVER2-LABEL: test_fyl2xp1: ; BDVER2: # %bb.0: ; BDVER2-NEXT: #APP -; BDVER2-NEXT: fyl2xp1 # sched: [100:0.33] +; BDVER2-NEXT: fyl2xp1 # sched: [100:0.50] ; BDVER2-NEXT: #NO_APP -; BDVER2-NEXT: retl # sched: [6:1.00] +; BDVER2-NEXT: retl # sched: [5:1.00] ; ; BTVER2-LABEL: test_fyl2xp1: ; BTVER2: # %bb.0: Index: llvm/trunk/test/CodeGen/X86/xop-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xop-schedule.ll +++ llvm/trunk/test/CodeGen/X86/xop-schedule.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4 @@ -20,13 +20,13 @@ ; BDVER12-LABEL: test_vfrczpd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfrczpd %xmm0, %xmm0 # sched: [3:1.00] -; BDVER12-NEXT: vfrczpd %ymm1, %ymm1 # sched: [3:1.00] -; BDVER12-NEXT: vfrczpd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER12-NEXT: vfrczpd (%rsi), %ymm1 # sched: [10:1.00] +; BDVER12-NEXT: vfrczpd %xmm0, %xmm0 # sched: [10:1.00] +; BDVER12-NEXT: vfrczpd %ymm1, %ymm1 # sched: [10:2.00] +; BDVER12-NEXT: vfrczpd (%rdi), %xmm0 # sched: [15:1.00] +; BDVER12-NEXT: vfrczpd (%rsi), %ymm1 # sched: [15:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vfrczpd: ; BDVER3: # %bb.0: @@ -68,13 +68,13 @@ ; BDVER12-LABEL: test_vfrczps: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfrczps %xmm0, %xmm0 # sched: [3:1.00] -; BDVER12-NEXT: vfrczps %ymm1, %ymm1 # sched: [3:1.00] -; BDVER12-NEXT: vfrczps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER12-NEXT: vfrczps (%rsi), %ymm1 # sched: [10:1.00] +; BDVER12-NEXT: vfrczps %xmm0, %xmm0 # sched: [10:1.00] +; BDVER12-NEXT: vfrczps %ymm1, %ymm1 # sched: [10:2.00] +; BDVER12-NEXT: vfrczps (%rdi), %xmm0 # sched: [15:1.00] +; BDVER12-NEXT: vfrczps (%rsi), %ymm1 # sched: [15:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vfrczps: ; BDVER3: # %bb.0: @@ -113,10 +113,10 @@ ; BDVER12-LABEL: test_vfrczsd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfrczsd %xmm0, %xmm0 # sched: [3:1.00] -; BDVER12-NEXT: vfrczsd (%rdi), %xmm0 # sched: [9:1.00] +; BDVER12-NEXT: vfrczsd %xmm0, %xmm0 # sched: [10:1.00] +; BDVER12-NEXT: vfrczsd (%rdi), %xmm0 # sched: [15:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vfrczsd: ; BDVER3: # %bb.0: @@ -149,10 +149,10 @@ ; BDVER12-LABEL: test_vfrczss: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vfrczss %xmm0, %xmm0 # sched: [3:1.00] -; BDVER12-NEXT: vfrczss (%rdi), %xmm0 # sched: [9:1.00] +; BDVER12-NEXT: vfrczss %xmm0, %xmm0 # sched: [10:1.00] +; BDVER12-NEXT: vfrczss (%rdi), %xmm0 # sched: [15:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vfrczss: ; BDVER3: # %bb.0: @@ -186,11 +186,11 @@ ; BDVER12-LABEL: test_vpcmov_128: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER12-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER12-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpcmov_128: ; BDVER3: # %bb.0: @@ -227,12 +227,12 @@ ; BDVER12-LABEL: test_vpcmov_256: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER12-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] -; BDVER12-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; BDVER12-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.50] +; BDVER12-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [7:1.00] +; BDVER12-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpcmov_256: ; BDVER3: # %bb.0: @@ -275,16 +275,16 @@ ; BDVER12-LABEL: test_vpcom: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER12-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER12-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER12-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER12-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER12-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpcom: ; BDVER3: # %bb.0: @@ -335,16 +335,16 @@ ; BDVER12-LABEL: test_vpcomu: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER12-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER12-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER12-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; BDVER12-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50] ; BDVER12-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpcomu: ; BDVER3: # %bb.0: @@ -390,11 +390,11 @@ ; BDVER12-LABEL: test_vpermil2pd_128: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BDVER12-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BDVER12-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BDVER12-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [8:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpermil2pd_128: ; BDVER3: # %bb.0: @@ -431,12 +431,12 @@ ; BDVER12-LABEL: test_vpermil2pd_256: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER12-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER12-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] +; BDVER12-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] +; BDVER12-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] +; BDVER12-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:3.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpermil2pd_256: ; BDVER3: # %bb.0: @@ -474,11 +474,11 @@ ; BDVER12-LABEL: test_vpermil2ps_128: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00] +; BDVER12-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BDVER12-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; BDVER12-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [8:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpermil2ps_128: ; BDVER3: # %bb.0: @@ -515,12 +515,12 @@ ; BDVER12-LABEL: test_vpermil2ps_256: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; BDVER12-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; BDVER12-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00] +; BDVER12-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] +; BDVER12-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] +; BDVER12-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:3.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: vzeroupper # sched: [100:0.33] -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: vzeroupper # sched: [46:4.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpermil2ps_256: ; BDVER3: # %bb.0: @@ -557,10 +557,10 @@ ; BDVER12-LABEL: test_vphaddbd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddbd %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphaddbd (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphaddbd %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphaddbd (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphaddbd: ; BDVER3: # %bb.0: @@ -593,10 +593,10 @@ ; BDVER12-LABEL: test_vphaddbq: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddbq %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphaddbq (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphaddbq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphaddbq (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphaddbq: ; BDVER3: # %bb.0: @@ -629,10 +629,10 @@ ; BDVER12-LABEL: test_vphaddbw: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddbw %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphaddbw (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphaddbw %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphaddbw (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphaddbw: ; BDVER3: # %bb.0: @@ -665,10 +665,10 @@ ; BDVER12-LABEL: test_vphadddq: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphadddq %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphadddq (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphadddq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphadddq (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphadddq: ; BDVER3: # %bb.0: @@ -701,10 +701,10 @@ ; BDVER12-LABEL: test_vphaddubd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddubd %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphaddubd (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphaddubd %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphaddubd (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphaddubd: ; BDVER3: # %bb.0: @@ -737,10 +737,10 @@ ; BDVER12-LABEL: test_vphaddubq: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddubq %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphaddubq (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphaddubq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphaddubq (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphaddubq: ; BDVER3: # %bb.0: @@ -773,10 +773,10 @@ ; BDVER12-LABEL: test_vphaddubw: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddubw %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphaddubw (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphaddubw %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphaddubw (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphaddubw: ; BDVER3: # %bb.0: @@ -809,10 +809,10 @@ ; BDVER12-LABEL: test_vphaddudq: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddudq %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphaddudq (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphaddudq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphaddudq (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphaddudq: ; BDVER3: # %bb.0: @@ -845,10 +845,10 @@ ; BDVER12-LABEL: test_vphadduwd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphadduwd %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphadduwd (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphadduwd %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphadduwd (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphadduwd: ; BDVER3: # %bb.0: @@ -881,10 +881,10 @@ ; BDVER12-LABEL: test_vphadduwq: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphadduwq %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphadduwq (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphadduwq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphadduwq (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphadduwq: ; BDVER3: # %bb.0: @@ -917,10 +917,10 @@ ; BDVER12-LABEL: test_vphaddwd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddwd %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphaddwd (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphaddwd %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphaddwd (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphaddwd: ; BDVER3: # %bb.0: @@ -953,10 +953,10 @@ ; BDVER12-LABEL: test_vphaddwq: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphaddwq %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphaddwq (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphaddwq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphaddwq (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphaddwq: ; BDVER3: # %bb.0: @@ -989,10 +989,10 @@ ; BDVER12-LABEL: test_vphsubbw: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphsubbw %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphsubbw (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphsubbw %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphsubbw (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphsubbw: ; BDVER3: # %bb.0: @@ -1025,10 +1025,10 @@ ; BDVER12-LABEL: test_vphsubdq: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphsubdq %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphsubdq (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphsubdq %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphsubdq (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphsubdq: ; BDVER3: # %bb.0: @@ -1061,10 +1061,10 @@ ; BDVER12-LABEL: test_vphsubwd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vphsubwd %xmm0, %xmm0 # sched: [3:1.50] -; BDVER12-NEXT: vphsubwd (%rdi), %xmm0 # sched: [9:1.50] +; BDVER12-NEXT: vphsubwd %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vphsubwd (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vphsubwd: ; BDVER3: # %bb.0: @@ -1097,10 +1097,10 @@ ; BDVER12-LABEL: test_vpmacsdd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BDVER12-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacsdd: ; BDVER3: # %bb.0: @@ -1133,10 +1133,10 @@ ; BDVER12-LABEL: test_vpmacsdqh: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] +; BDVER12-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacsdqh: ; BDVER3: # %bb.0: @@ -1169,10 +1169,10 @@ ; BDVER12-LABEL: test_vpmacsdql: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] +; BDVER12-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacsdql: ; BDVER3: # %bb.0: @@ -1205,10 +1205,10 @@ ; BDVER12-LABEL: test_vpmacssdd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; BDVER12-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacssdd: ; BDVER3: # %bb.0: @@ -1241,10 +1241,10 @@ ; BDVER12-LABEL: test_vpmacssdqh: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] +; BDVER12-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacssdqh: ; BDVER3: # %bb.0: @@ -1277,10 +1277,10 @@ ; BDVER12-LABEL: test_vpmacssdql: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] +; BDVER12-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacssdql: ; BDVER3: # %bb.0: @@ -1313,10 +1313,10 @@ ; BDVER12-LABEL: test_vpmacsswd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER12-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacsswd: ; BDVER3: # %bb.0: @@ -1349,10 +1349,10 @@ ; BDVER12-LABEL: test_vpmacssww: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER12-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacssww: ; BDVER3: # %bb.0: @@ -1385,10 +1385,10 @@ ; BDVER12-LABEL: test_vpmacswd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER12-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacswd: ; BDVER3: # %bb.0: @@ -1421,10 +1421,10 @@ ; BDVER12-LABEL: test_vpmacsww: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER12-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmacsww: ; BDVER3: # %bb.0: @@ -1457,10 +1457,10 @@ ; BDVER12-LABEL: test_vpmadcsswd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER12-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmadcsswd: ; BDVER3: # %bb.0: @@ -1493,10 +1493,10 @@ ; BDVER12-LABEL: test_vpmadcswd: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER12-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; BDVER12-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; BDVER12-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpmadcswd: ; BDVER3: # %bb.0: @@ -1530,11 +1530,11 @@ ; BDVER12-LABEL: test_vpperm: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BDVER12-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50] -; BDVER12-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; BDVER12-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] +; BDVER12-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [8:2.00] +; BDVER12-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpperm: ; BDVER3: # %bb.0: @@ -1587,28 +1587,28 @@ ; BDVER12-LABEL: test_vprot: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vprotb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [7:1.00] +; BDVER12-NEXT: vprotb %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [2:0.50] +; BDVER12-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [7:0.50] +; BDVER12-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [7:0.50] +; BDVER12-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [7:0.50] +; BDVER12-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [7:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vprot: ; BDVER3: # %bb.0: @@ -1687,20 +1687,20 @@ ; BDVER12-LABEL: test_vpsha: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpshab %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [7:1.00] +; BDVER12-NEXT: vpshab %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [8:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpsha: ; BDVER3: # %bb.0: @@ -1763,20 +1763,20 @@ ; BDVER12-LABEL: test_vpshl: ; BDVER12: # %bb.0: ; BDVER12-NEXT: #APP -; BDVER12-NEXT: vpshlb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BDVER12-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [7:1.00] -; BDVER12-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [7:1.00] +; BDVER12-NEXT: vpshlb %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] +; BDVER12-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [8:0.50] +; BDVER12-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [8:0.50] ; BDVER12-NEXT: #NO_APP -; BDVER12-NEXT: retq # sched: [1:1.00] +; BDVER12-NEXT: retq # sched: [5:1.00] ; ; BDVER3-LABEL: test_vpshl: ; BDVER3: # %bb.0: Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/add-sequence.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/add-sequence.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/add-sequence.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1000 -timeline < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1000 -timeline < %s | FileCheck %s add %eax, %ecx add %esi, %eax @@ -7,13 +7,13 @@ # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 3000 -# CHECK-NEXT: Total Cycles: 1004 +# CHECK-NEXT: Total Cycles: 1504 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 2.99 -# CHECK-NEXT: IPC: 2.99 -# CHECK-NEXT: Block RThroughput: 1.0 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 1.99 +# CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -24,64 +24,76 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 1 0.33 addl %eax, %ecx -# CHECK-NEXT: 1 1 0.33 addl %esi, %eax -# CHECK-NEXT: 1 1 0.33 addl %eax, %edx +# CHECK-NEXT: 1 1 0.50 addl %eax, %ecx +# CHECK-NEXT: 1 1 0.50 addl %esi, %eax +# CHECK-NEXT: 1 1 0.50 addl %eax, %edx # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - - - - - 1.50 1.50 - - - - - - - - - - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - addl %eax, %ecx -# CHECK-NEXT: - - - - - 1.00 - - addl %esi, %eax -# CHECK-NEXT: - - 1.00 - - - - - addl %eax, %edx +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - addl %eax, %ecx +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - addl %esi, %eax +# CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - addl %eax, %edx # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . . addl %eax, %ecx -# CHECK-NEXT: [0,1] DeER . . . addl %esi, %eax -# CHECK-NEXT: [0,2] D=eER. . . addl %eax, %edx -# CHECK-NEXT: [1,0] D=eER. . . addl %eax, %ecx -# CHECK-NEXT: [1,1] .DeER. . . addl %esi, %eax -# CHECK-NEXT: [1,2] .D=eER . . addl %eax, %edx -# CHECK-NEXT: [2,0] .D=eER . . addl %eax, %ecx -# CHECK-NEXT: [2,1] .D=eER . . addl %esi, %eax -# CHECK-NEXT: [2,2] . D=eER . . addl %eax, %edx -# CHECK-NEXT: [3,0] . D=eER . . addl %eax, %ecx -# CHECK-NEXT: [3,1] . D=eER . . addl %esi, %eax -# CHECK-NEXT: [3,2] . D==eER . . addl %eax, %edx -# CHECK-NEXT: [4,0] . D=eER . . addl %eax, %ecx -# CHECK-NEXT: [4,1] . D=eER . . addl %esi, %eax -# CHECK-NEXT: [4,2] . D==eER . . addl %eax, %edx -# CHECK-NEXT: [5,0] . D==eER . . addl %eax, %ecx -# CHECK-NEXT: [5,1] . D=eER . . addl %esi, %eax -# CHECK-NEXT: [5,2] . D==eER. . addl %eax, %edx -# CHECK-NEXT: [6,0] . D==eER. . addl %eax, %ecx -# CHECK-NEXT: [6,1] . D==eER. . addl %esi, %eax -# CHECK-NEXT: [6,2] . D==eER . addl %eax, %edx -# CHECK-NEXT: [7,0] . D==eER . addl %eax, %ecx -# CHECK-NEXT: [7,1] . D==eER . addl %esi, %eax -# CHECK-NEXT: [7,2] . D===eER . addl %eax, %edx -# CHECK-NEXT: [8,0] . .D==eER . addl %eax, %ecx -# CHECK-NEXT: [8,1] . .D==eER . addl %esi, %eax -# CHECK-NEXT: [8,2] . .D===eER. addl %eax, %edx -# CHECK-NEXT: [9,0] . .D===eER. addl %eax, %ecx -# CHECK-NEXT: [9,1] . . D==eER. addl %esi, %eax -# CHECK-NEXT: [9,2] . . D===eER addl %eax, %edx +# CHECK: [0,0] DeER . . . . addl %eax, %ecx +# CHECK-NEXT: [0,1] DeER . . . . addl %esi, %eax +# CHECK-NEXT: [0,2] D=eER. . . . addl %eax, %edx +# CHECK-NEXT: [1,0] D==eER . . . addl %eax, %ecx +# CHECK-NEXT: [1,1] .DeE-R . . . addl %esi, %eax +# CHECK-NEXT: [1,2] .D=eER . . . addl %eax, %edx +# CHECK-NEXT: [2,0] .D==eER . . . addl %eax, %ecx +# CHECK-NEXT: [2,1] .D==eER . . . addl %esi, %eax +# CHECK-NEXT: [2,2] . D==eER . . . addl %eax, %edx +# CHECK-NEXT: [3,0] . D===eER . . . addl %eax, %ecx +# CHECK-NEXT: [3,1] . D==eE-R . . . addl %esi, %eax +# CHECK-NEXT: [3,2] . D===eER . . . addl %eax, %edx +# CHECK-NEXT: [4,0] . D===eER. . . addl %eax, %ecx +# CHECK-NEXT: [4,1] . D===eER. . . addl %esi, %eax +# CHECK-NEXT: [4,2] . D====eER . . addl %eax, %edx +# CHECK-NEXT: [5,0] . D=====eER . . addl %eax, %ecx +# CHECK-NEXT: [5,1] . D===eE-R . . addl %esi, %eax +# CHECK-NEXT: [5,2] . D====eER . . addl %eax, %edx +# CHECK-NEXT: [6,0] . D=====eER . . addl %eax, %ecx +# CHECK-NEXT: [6,1] . D=====eER . . addl %esi, %eax +# CHECK-NEXT: [6,2] . D=====eER . . addl %eax, %edx +# CHECK-NEXT: [7,0] . D======eER. . addl %eax, %ecx +# CHECK-NEXT: [7,1] . D=====eE-R. . addl %esi, %eax +# CHECK-NEXT: [7,2] . D======eER. . addl %eax, %edx +# CHECK-NEXT: [8,0] . .D======eER . addl %eax, %ecx +# CHECK-NEXT: [8,1] . .D======eER . addl %esi, %eax +# CHECK-NEXT: [8,2] . .D=======eER. addl %eax, %edx +# CHECK-NEXT: [9,0] . .D========eER addl %eax, %ecx +# CHECK-NEXT: [9,1] . . D======eE-R addl %esi, %eax +# CHECK-NEXT: [9,2] . . D=======eER addl %eax, %edx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -90,6 +102,6 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 10 2.5 0.1 0.0 addl %eax, %ecx -# CHECK-NEXT: 1. 10 2.2 0.1 0.0 addl %esi, %eax -# CHECK-NEXT: 2. 10 3.0 0.0 0.0 addl %eax, %edx +# CHECK-NEXT: 0. 10 5.0 0.6 0.0 addl %eax, %ecx +# CHECK-NEXT: 1. 10 4.2 0.5 0.5 addl %esi, %eax +# CHECK-NEXT: 2. 10 5.0 0.0 0.0 addl %eax, %edx Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s ## Sets register RAX. imulq $5, %rcx, %rax @@ -15,13 +15,13 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 803 -# CHECK-NEXT: Total uOps: 400 +# CHECK-NEXT: Total Cycles: 702 +# CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.50 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: uOps Per Cycle: 1.42 +# CHECK-NEXT: IPC: 0.57 +# CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -32,23 +32,23 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 imulq $5, %rcx, %rax -# CHECK-NEXT: 1 3 1.00 lzcntl %ecx, %eax -# CHECK-NEXT: 1 1 0.33 andq %rcx, %rax -# CHECK-NEXT: 1 3 1.00 bsfq %rax, %rcx +# CHECK-NEXT: 1 6 4.00 imulq $5, %rcx, %rax +# CHECK-NEXT: 2 2 0.50 lzcntl %ecx, %eax +# CHECK-NEXT: 1 1 0.50 andq %rcx, %rax +# CHECK-NEXT: 6 3 2.00 bsfq %rax, %rcx # CHECK: Timeline view: -# CHECK-NEXT: 012345678 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeER . . . imulq $5, %rcx, %rax -# CHECK-NEXT: [0,1] D=eeeER . . . lzcntl %ecx, %eax -# CHECK-NEXT: [0,2] D====eER . . . andq %rcx, %rax -# CHECK-NEXT: [0,3] D=====eeeER . . bsfq %rax, %rcx -# CHECK-NEXT: [1,0] .D=======eeeER . . imulq $5, %rcx, %rax -# CHECK-NEXT: [1,1] .D========eeeER. . lzcntl %ecx, %eax -# CHECK-NEXT: [1,2] .D===========eER . andq %rcx, %rax -# CHECK-NEXT: [1,3] .D============eeeER bsfq %rax, %rcx +# CHECK: [0,0] DeeeeeeER . . imulq $5, %rcx, %rax +# CHECK-NEXT: [0,1] DeeE----R . . lzcntl %ecx, %eax +# CHECK-NEXT: [0,2] D==eE---R . . andq %rcx, %rax +# CHECK-NEXT: [0,3] .D==eeeER . . bsfq %rax, %rcx +# CHECK-NEXT: [1,0] . D====eeeeeeER. imulq $5, %rcx, %rax +# CHECK-NEXT: [1,1] . D====eeE---R. lzcntl %ecx, %eax +# CHECK-NEXT: [1,2] . D======eE--R. andq %rcx, %rax +# CHECK-NEXT: [1,3] . D======eeeER bsfq %rax, %rcx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -57,7 +57,7 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 4.5 0.5 0.0 imulq $5, %rcx, %rax -# CHECK-NEXT: 1. 2 5.5 1.5 0.0 lzcntl %ecx, %eax -# CHECK-NEXT: 2. 2 8.5 0.0 0.0 andq %rcx, %rax -# CHECK-NEXT: 3. 2 9.5 0.0 0.0 bsfq %rax, %rcx +# CHECK-NEXT: 0. 2 3.0 0.5 0.0 imulq $5, %rcx, %rax +# CHECK-NEXT: 1. 2 3.0 1.0 3.5 lzcntl %ecx, %eax +# CHECK-NEXT: 2. 2 5.0 0.0 2.5 andq %rcx, %rax +# CHECK-NEXT: 3. 2 5.0 0.0 0.0 bsfq %rax, %rcx Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s # In this test, the VDIVPS takes 38 cycles to write to register YMM3. The first # VADDPS does not depend on the VDIVPS (the WAW dependency is eliminated at @@ -33,13 +33,13 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1800 -# CHECK-NEXT: Total Cycles: 2804 -# CHECK-NEXT: Total uOps: 2000 +# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total uOps: 3400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.71 -# CHECK-NEXT: IPC: 0.64 -# CHECK-NEXT: Block RThroughput: 28.0 +# CHECK-NEXT: uOps Per Cycle: 0.85 +# CHECK-NEXT: IPC: 0.45 +# CHECK-NEXT: Block RThroughput: 31.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -50,65 +50,63 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 3 29 28.00 vdivps %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm3 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 1 1 1.00 vandps %xmm4, %xmm1, %xmm0 +# CHECK-NEXT: 2 9 19.00 vdivps %ymm0, %ymm1, %ymm3 +# CHECK-NEXT: 1 5 1.00 vaddps %xmm0, %xmm1, %xmm3 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 2 5 2.00 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 1 2 0.50 vandps %xmm4, %xmm1, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 +# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 -# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: [0,1] DeeeE--------------------------R . . . . . . vaddps %xmm0, %xmm1, %xmm3 -# CHECK-NEXT: [0,2] .D==eeeE-----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,3] .D===eeeE----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,4] .D====eeeE---------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,5] .D=====eeeE--------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,6] . D=====eeeE-------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,7] . D======eeeE------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,8] . D=======eeeE-----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,9] . D========eeeE----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,10] . D========eeeE---------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,11] . D=========eeeE--------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,12] . D==========eeeE-------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,13] . D===========eeeE------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,14] . D===========eeeE-----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,15] . D============eeeE----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,16] . D=============eeeE---------R . . . . . . vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,17] . D================eE--------R . . . . . . vandps %xmm4, %xmm1, %xmm0 -# CHECK-NEXT: [1,0] . D=======================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeER vdivps %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: [1,1] . D================eeeE---------------------------------R vaddps %xmm0, %xmm1, %xmm3 -# CHECK-NEXT: [1,2] . .D==================eeeE------------------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,3] . .D===================eeeE-----------------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,4] . .D====================eeeE----------------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,5] . .D=====================eeeE---------------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,6] . . D=====================eeeE--------------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,7] . . D======================eeeE-------------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,8] . . D=======================eeeE------------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,9] . . D========================eeeE-----------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,10] . . D========================eeeE----------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,11] . . D=========================eeeE---------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,12] . . D==========================eeeE--------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,13] . . D===========================eeeE-------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,14] . . D===========================eeeE------------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,15] . . D============================eeeE-----------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,16] . . D=============================eeeE----------------R vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,17] . . D================================eE---------------R vandps %xmm4, %xmm1, %xmm0 +# CHECK: [0,0] DeeeeeeeeeER . . . . . . . . . . . . . . vdivps %ymm0, %ymm1, %ymm3 +# CHECK-NEXT: [0,1] DeeeeeE----R . . . . . . . . . . . . . . vaddps %xmm0, %xmm1, %xmm3 +# CHECK-NEXT: [0,2] .D====eeeeeER . . . . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,3] .D======eeeeeER. . . . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,4] . D=======eeeeeER . . . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,5] . D=========eeeeeER . . . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,6] . D==========eeeeeER . . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,7] . D============eeeeeER . . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,8] . D=============eeeeeER. . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,9] . D===============eeeeeER . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,10] . D================eeeeeER . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,11] . D==================eeeeeER . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,12] . .D===================eeeeeER . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,13] . .D=====================eeeeeER. . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,14] . . D======================eeeeeER . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,15] . . D========================eeeeeER . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,16] . . D=========================eeeeeER . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,17] . . D==============================eeER . . . . . . . . vandps %xmm4, %xmm1, %xmm0 +# CHECK-NEXT: [1,0] . . D===============================eeeeeeeeeER . . . . . . vdivps %ymm0, %ymm1, %ymm3 +# CHECK-NEXT: [1,1] . . D===============================eeeeeE----R . . . . . . vaddps %xmm0, %xmm1, %xmm3 +# CHECK-NEXT: [1,2] . . D===================================eeeeeER . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,3] . . D=====================================eeeeeER. . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,4] . . .D======================================eeeeeER . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,5] . . .D========================================eeeeeER . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,6] . . . D=========================================eeeeeER . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,7] . . . D===========================================eeeeeER . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,8] . . . D============================================eeeeeER. . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,9] . . . D==============================================eeeeeER . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,10] . . . D===============================================eeeeeER . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,11] . . . D=================================================eeeeeER . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,12] . . . D==================================================eeeeeER . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,13] . . . D====================================================eeeeeER. . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,14] . . . .D=====================================================eeeeeER . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,15] . . . .D=======================================================eeeeeER. vaddps %ymm3, %ymm1, %ymm4 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -117,21 +115,21 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 12.5 4.0 0.0 vdivps %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: 1. 2 9.0 0.5 29.5 vaddps %xmm0, %xmm1, %xmm3 -# CHECK-NEXT: 2. 2 11.0 0.0 26.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 3. 2 12.0 1.0 25.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 4. 2 13.0 2.0 24.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 5. 2 14.0 3.0 23.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 6. 2 14.0 4.0 22.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 7. 2 15.0 5.0 21.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 8. 2 16.0 6.0 20.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 9. 2 17.0 7.0 19.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 10. 2 17.0 8.0 18.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 11. 2 18.0 9.0 17.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 12. 2 19.0 10.0 16.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 13. 2 20.0 11.0 15.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 14. 2 20.0 12.0 14.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 15. 2 21.0 13.0 13.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 16. 2 22.0 14.0 12.5 vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: 17. 2 25.0 0.0 11.5 vandps %xmm4, %xmm1, %xmm0 +# CHECK-NEXT: 0. 2 16.5 0.5 0.0 vdivps %ymm0, %ymm1, %ymm3 +# CHECK-NEXT: 1. 2 16.5 0.5 4.0 vaddps %xmm0, %xmm1, %xmm3 +# CHECK-NEXT: 2. 2 20.5 0.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 3. 2 22.5 2.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 4. 2 23.5 4.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 5. 2 25.5 6.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 6. 2 26.5 8.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 7. 2 28.5 10.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 8. 2 29.5 12.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 9. 2 31.5 14.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 10. 2 32.5 16.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 11. 2 34.5 18.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 12. 2 35.5 20.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 13. 2 37.5 22.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 14. 2 38.5 23.5 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 15. 2 40.5 25.5 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 16. 2 41.5 27.0 0.0 vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: 17. 2 46.5 0.0 0.0 vandps %xmm4, %xmm1, %xmm0 Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s # Perf stat reports an IPC of 1.97 for this block of code. @@ -11,13 +11,13 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 3000 -# CHECK-NEXT: Total Cycles: 4503 -# CHECK-NEXT: Total uOps: 4500 +# CHECK-NEXT: Total Cycles: 1504 +# CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 0.67 -# CHECK-NEXT: Block RThroughput: 0.8 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 1.99 +# CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -28,38 +28,49 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 1 0.33 cmpl %eax, %eax -# CHECK-NEXT: 2 2 0.67 cmovael %ebx, %eax +# CHECK-NEXT: 1 1 0.50 cmpl %eax, %eax +# CHECK-NEXT: 1 1 0.50 cmovael %ebx, %eax # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - - - 1.00 - - cmpl %eax, %eax -# CHECK-NEXT: - - 1.00 1.00 - - - - cmovael %ebx, %eax +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - - - - - 1.00 - - - - - - - - - - - - - - cmpl %eax, %eax +# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - cmovael %ebx, %eax # CHECK: Timeline view: -# CHECK-NEXT: 01 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 0123456 -# CHECK: [0,0] DeER . .. cmpl %eax, %eax -# CHECK-NEXT: [0,1] D=eeER .. cmovael %ebx, %eax -# CHECK-NEXT: [1,0] D===eER .. cmpl %eax, %eax -# CHECK-NEXT: [1,1] .D===eeER .. cmovael %ebx, %eax -# CHECK-NEXT: [2,0] .D=====eER.. cmpl %eax, %eax -# CHECK-NEXT: [2,1] . D=====eeER cmovael %ebx, %eax +# CHECK: [0,0] DeER .. cmpl %eax, %eax +# CHECK-NEXT: [0,1] D=eER.. cmovael %ebx, %eax +# CHECK-NEXT: [1,0] DeE-R.. cmpl %eax, %eax +# CHECK-NEXT: [1,1] D==eER. cmovael %ebx, %eax +# CHECK-NEXT: [2,0] .DeE-R. cmpl %eax, %eax +# CHECK-NEXT: [2,1] .D==eER cmovael %ebx, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -68,5 +79,5 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 3.7 0.3 0.0 cmpl %eax, %eax -# CHECK-NEXT: 1. 3 4.0 0.0 0.0 cmovael %ebx, %eax +# CHECK-NEXT: 0. 3 1.0 1.0 0.7 cmpl %eax, %eax +# CHECK-NEXT: 1. 3 2.7 0.0 0.0 cmovael %ebx, %eax Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s # perf stat reports an IPC of 2.00 for this block of code. @@ -14,12 +14,12 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 6000 -# CHECK-NEXT: Total Cycles: 6003 +# CHECK-NEXT: Total Cycles: 3005 # CHECK-NEXT: Total uOps: 6000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 2.00 +# CHECK-NEXT: IPC: 2.00 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -31,48 +31,60 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: 1 2 0.50 vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: 1 2 0.50 vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 1 2 0.50 vpcmpeqq %xmm3, %xmm3, %xmm0 # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 2.00 - 2.00 - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - - - - - - - - - - 2.00 2.00 - - 2.00 2.00 - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: - - - 1.00 - - - - vpcmpeqw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: - - - 1.00 - - - - vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - 1.00 - - - - - vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - 1.00 - - - - vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 1.00 - - - 1.00 - - - - - vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - 1.00 - - - - vpcmpeqq %xmm3, %xmm3, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . . vpcmpeqb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [0,1] D=eER. . . vpcmpeqw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [0,2] D==eER . . vpcmpeqd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [0,3] D===eER . . vpcmpeqq %xmm3, %xmm3, %xmm0 -# CHECK-NEXT: [1,0] .D===eER . . vpcmpeqb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [1,1] .D====eER . . vpcmpeqw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [1,2] .D=====eER. . vpcmpeqd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [1,3] .D======eER . vpcmpeqq %xmm3, %xmm3, %xmm0 -# CHECK-NEXT: [2,0] . D======eER . vpcmpeqb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [2,1] . D=======eER . vpcmpeqw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [2,2] . D========eER. vpcmpeqd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [2,3] . D=========eER vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK: [0,0] DeeER. . vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] D=eeER . vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] DeeE-R . vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [0,3] D==eeER . vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [1,0] .DeeE-R . vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [1,1] .D==eeER . vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] .D=eeE-R . vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,3] .D===eeER . vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [2,0] . D=eeE-R . vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [2,1] . D===eeER. vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [2,2] . D==eeE-R. vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,3] . D====eeER vpcmpeqq %xmm3, %xmm3, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -81,7 +93,7 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 4.0 0.3 0.0 vpcmpeqb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: 1. 3 5.0 0.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: 2. 3 6.0 0.0 0.0 vpcmpeqd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: 3. 3 7.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: 0. 3 1.3 1.3 0.7 vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: 1. 3 3.0 3.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 2. 3 2.0 2.0 1.0 vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 3. 3 4.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0 Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s # perf stat reports an IPC of 2.00 for this block of code. @@ -15,12 +15,12 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 6000 -# CHECK-NEXT: Total Cycles: 1501 +# CHECK-NEXT: Total Cycles: 1504 # CHECK-NEXT: Total uOps: 6000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 4.00 -# CHECK-NEXT: IPC: 4.00 +# CHECK-NEXT: uOps Per Cycle: 3.99 +# CHECK-NEXT: IPC: 3.99 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: @@ -35,44 +35,56 @@ # CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm0, %xmm0, %xmm1 # CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm1, %xmm1, %xmm2 # CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: 1 2 0.50 vpcmpgtq %xmm3, %xmm3, %xmm0 # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - - - - - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - 0.50 0.50 - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - - - - - - vpcmpgtb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: - - - - - - - - vpcmpgtw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - vpcmpgtd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - 0.50 0.50 - - - - vpcmpgtq %xmm3, %xmm3, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: Index 0123 +# CHECK-NEXT: Index 0123456 -# CHECK: [0,0] DR . vpcmpgtb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [0,1] DR . vpcmpgtw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [0,2] DR . vpcmpgtd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [0,3] DR . vpcmpgtq %xmm3, %xmm3, %xmm0 -# CHECK-NEXT: [1,0] .DR. vpcmpgtb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [1,1] .DR. vpcmpgtw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [1,2] .DR. vpcmpgtd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [1,3] .DR. vpcmpgtq %xmm3, %xmm3, %xmm0 -# CHECK-NEXT: [2,0] . DR vpcmpgtb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [2,1] . DR vpcmpgtw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [2,2] . DR vpcmpgtd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [2,3] . DR vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK: [0,0] DR .. vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] DR .. vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] DR .. vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [0,3] DeeER.. vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [1,0] .D--R.. vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [1,1] .D--R.. vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] .D--R.. vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,3] .DeeER. vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [2,0] . D--R. vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [2,1] . D--R. vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [2,2] . D--R. vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,3] . DeeER vpcmpgtq %xmm3, %xmm3, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -81,7 +93,7 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 0.0 0.0 0.0 vpcmpgtb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: 1. 3 0.0 0.0 0.0 vpcmpgtw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: 2. 3 0.0 0.0 0.0 vpcmpgtd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: 3. 3 0.0 0.0 0.0 vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: 0. 3 0.0 0.0 1.3 vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: 1. 3 0.0 0.0 1.3 vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: 2. 3 0.0 0.0 1.3 vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 3. 3 1.0 1.0 0.0 vpcmpgtq %xmm3, %xmm3, %xmm0 Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s # perf stat reports an IPC of 1.00 for this code block. @@ -12,13 +12,13 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 3000 -# CHECK-NEXT: Total Cycles: 6003 -# CHECK-NEXT: Total uOps: 6000 +# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 4 # CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 0.50 -# CHECK-NEXT: Block RThroughput: 1.0 +# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -29,38 +29,49 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 2 2 0.67 sbbl %edx, %edx -# CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax +# CHECK-NEXT: 1 1 1.00 sbbl %edx, %edx +# CHECK-NEXT: 1 1 1.00 sbbl %eax, %eax # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 1.33 1.33 - 1.33 - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - - - - - 2.00 2.00 - - - - - - - - - - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %edx, %edx -# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %eax, %eax +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - - - - - - 2.00 - - - - - - - - - - - - - sbbl %edx, %edx +# CHECK-NEXT: - - - - - 2.00 - - - - - - - - - - - - - - sbbl %eax, %eax # CHECK: Timeline view: -# CHECK-NEXT: 01234 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . sbbl %edx, %edx -# CHECK-NEXT: [0,1] D==eeER . . sbbl %eax, %eax -# CHECK-NEXT: [1,0] .D===eeER . . sbbl %edx, %edx -# CHECK-NEXT: [1,1] .D=====eeER . sbbl %eax, %eax -# CHECK-NEXT: [2,0] . D======eeER . sbbl %edx, %edx -# CHECK-NEXT: [2,1] . D========eeER sbbl %eax, %eax +# CHECK: [0,0] DeER . . sbbl %edx, %edx +# CHECK-NEXT: [0,1] D=eER. . sbbl %eax, %eax +# CHECK-NEXT: [1,0] D==eER . sbbl %edx, %edx +# CHECK-NEXT: [1,1] D===eER . sbbl %eax, %eax +# CHECK-NEXT: [2,0] .D===eER. sbbl %edx, %edx +# CHECK-NEXT: [2,1] .D====eER sbbl %eax, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -69,5 +80,5 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 4.0 0.3 0.0 sbbl %edx, %edx -# CHECK-NEXT: 1. 3 6.0 0.0 0.0 sbbl %eax, %eax +# CHECK-NEXT: 0. 3 2.7 0.3 0.0 sbbl %edx, %edx +# CHECK-NEXT: 1. 3 3.7 0.0 0.0 sbbl %eax, %eax Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s # perf stat reports a throughput of 1.51 IPC for this block of code. @@ -13,13 +13,13 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 -# CHECK-NEXT: Total Cycles: 7503 -# CHECK-NEXT: Total uOps: 6000 +# CHECK-NEXT: Total Cycles: 3006 +# CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.80 -# CHECK-NEXT: IPC: 0.60 -# CHECK-NEXT: Block RThroughput: 1.0 +# CHECK-NEXT: uOps Per Cycle: 1.50 +# CHECK-NEXT: IPC: 1.50 +# CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -30,43 +30,55 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 imull %edx, %eax -# CHECK-NEXT: 1 1 0.33 addl %edx, %edx -# CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax +# CHECK-NEXT: 1 4 1.00 imull %edx, %eax +# CHECK-NEXT: 1 1 0.50 addl %edx, %edx +# CHECK-NEXT: 1 1 1.00 sbbl %eax, %eax # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 1.33 1.33 - 1.33 - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - - - - - 2.00 2.00 - - - - - - - - - - - - 1.00 # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - imull %edx, %eax -# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %edx, %edx -# CHECK-NEXT: - - 1.00 - - 1.00 - - sbbl %eax, %eax +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - 1.00 imull %edx, %eax +# CHECK-NEXT: - - - - - - 1.00 - - - - - - - - - - - - - addl %edx, %edx +# CHECK-NEXT: - - - - - 2.00 - - - - - - - - - - - - - - sbbl %eax, %eax # CHECK: Timeline view: -# CHECK-NEXT: 01234567 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeER . . . imull %edx, %eax -# CHECK-NEXT: [0,1] DeE--R . . . addl %edx, %edx -# CHECK-NEXT: [0,2] D===eeER . . . sbbl %eax, %eax -# CHECK-NEXT: [1,0] .D====eeeER . . imull %edx, %eax -# CHECK-NEXT: [1,1] .DeE------R . . addl %edx, %edx -# CHECK-NEXT: [1,2] .D=======eeER . . sbbl %eax, %eax -# CHECK-NEXT: [2,0] . D========eeeER . imull %edx, %eax -# CHECK-NEXT: [2,1] . DeE----------R . addl %edx, %edx -# CHECK-NEXT: [2,2] . D===========eeER sbbl %eax, %eax +# CHECK: [0,0] D=eeeeER .. imull %edx, %eax +# CHECK-NEXT: [0,1] DeE----R .. addl %edx, %edx +# CHECK-NEXT: [0,2] D==eE--R .. sbbl %eax, %eax +# CHECK-NEXT: [1,0] D===eeeeER.. imull %edx, %eax +# CHECK-NEXT: [1,1] .DeE-----R.. addl %edx, %edx +# CHECK-NEXT: [1,2] .D===eE--R.. sbbl %eax, %eax +# CHECK-NEXT: [2,0] .D====eeeeER imull %edx, %eax +# CHECK-NEXT: [2,1] .D=eE------R addl %edx, %edx +# CHECK-NEXT: [2,2] . D====eE--R sbbl %eax, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -75,6 +87,6 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 5.0 0.3 0.0 imull %edx, %eax -# CHECK-NEXT: 1. 3 1.0 0.3 6.0 addl %edx, %edx -# CHECK-NEXT: 2. 3 8.0 0.0 0.0 sbbl %eax, %eax +# CHECK-NEXT: 0. 3 3.7 0.7 0.0 imull %edx, %eax +# CHECK-NEXT: 1. 3 1.3 0.3 5.0 addl %edx, %edx +# CHECK-NEXT: 2. 3 4.0 2.0 2.0 sbbl %eax, %eax Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=500 -timeline < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=500 -timeline < %s | FileCheck %s vpmuld %xmm0, %xmm0, %xmm1 vpaddd %xmm1, %xmm1, %xmm0 @@ -7,7 +7,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 1500 -# CHECK-NEXT: Total Cycles: 3004 +# CHECK-NEXT: Total Cycles: 3005 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 4 @@ -24,64 +24,76 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 5 1.00 vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: 1 1 0.50 vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: 1 1 0.50 vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: 1 4 1.00 vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: 1 2 0.50 vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: 1 2 0.50 vpaddd %xmm0, %xmm0, %xmm3 # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - - - - - - - - - - - 2.00 1.00 - 1.50 1.50 - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - 1.00 - - - - - vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: - - - - - 1.00 - - vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: - - - 1.00 - - - - vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - - - - - - - - - - - - 1.00 - 1.00 - - - - - vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - 0.50 0.50 - - - - vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 - - - 1.00 - - - - vpaddd %xmm0, %xmm0, %xmm3 # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 01234 -# CHECK: [0,0] DeeeeeER . . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [0,1] D=====eER . . . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [0,2] D======eER. . . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [1,0] D======eeeeeER . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [1,1] .D==========eER. . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [1,2] .D===========eER . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [2,0] .D===========eeeeeER. . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [2,1] .D================eER . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [2,2] . D================eER . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [3,0] . D================eeeeeER . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [3,1] . D=====================eER . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [3,2] . D======================eER . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [4,0] . D=====================eeeeeER . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [4,1] . D==========================eER . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [4,2] . D===========================eER . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [5,0] . D===========================eeeeeER . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [5,1] . D===============================eER . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [5,2] . D================================eER. . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [6,0] . D================================eeeeeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [6,1] . D=====================================eER. . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [6,2] . D=====================================eER . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [7,0] . D=====================================eeeeeER. . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [7,1] . D==========================================eER . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [7,2] . D===========================================eER . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [8,0] . .D==========================================eeeeeER . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [8,1] . .D===============================================eER . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [8,2] . .D================================================eER . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [9,0] . .D================================================eeeeeER . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [9,1] . . D====================================================eER. vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [9,2] . . D=====================================================eER vpaddd %xmm0, %xmm0, %xmm3 +# CHECK: [0,0] DeeeeER . . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] D====eeER . . . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [0,2] D======eeER . . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [1,0] D======eeeeER . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [1,1] .D=========eeER. . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [1,2] .D===========eeER . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [2,0] .D===========eeeeER . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [2,1] .D===============eeER . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [2,2] . D================eeER . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [3,0] . D================eeeeER. . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [3,1] . D====================eeER . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [3,2] . D======================eeER . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [4,0] . D=====================eeeeER . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [4,1] . D=========================eeER . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [4,2] . D===========================eeER. . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [5,0] . D===========================eeeeER . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [5,1] . D==============================eeER . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [5,2] . D================================eeER . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [6,0] . D================================eeeeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [6,1] . D====================================eeER. . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [6,2] . D=====================================eeER . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [7,0] . D=====================================eeeeER . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [7,1] . D=========================================eeER . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [7,2] . D===========================================eeER . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [8,0] . .D==========================================eeeeER. . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [8,1] . .D==============================================eeER . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [8,2] . .D================================================eeER . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [9,0] . .D================================================eeeeER . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [9,1] . . D===================================================eeER . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [9,2] . . D=====================================================eeER vpaddd %xmm0, %xmm0, %xmm3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -91,5 +103,5 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 10 25.0 0.1 0.0 vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: 1. 10 29.7 0.0 0.0 vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: 1. 10 28.7 0.0 0.0 vpaddd %xmm1, %xmm1, %xmm0 # CHECK-NEXT: 2. 10 30.5 0.0 0.0 vpaddd %xmm0, %xmm0, %xmm3 Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dot-product.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dot-product.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/dot-product.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s vmulps %xmm0, %xmm1, %xmm2 vhaddps %xmm2, %xmm2, %xmm3 @@ -7,13 +7,13 @@ # CHECK: Iterations: 300 # CHECK-NEXT: Instructions: 900 -# CHECK-NEXT: Total Cycles: 1211 +# CHECK-NEXT: Total Cycles: 627 # CHECK-NEXT: Total uOps: 2100 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.73 -# CHECK-NEXT: IPC: 0.74 -# CHECK-NEXT: Block RThroughput: 4.0 +# CHECK-NEXT: uOps Per Cycle: 3.35 +# CHECK-NEXT: IPC: 1.44 +# CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -25,42 +25,54 @@ # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: 3 11 1.00 vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 3 11 1.00 vhaddps %xmm3, %xmm3, %xmm4 # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 1.00 2.00 - 4.00 - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - - - - - - - - 1.49 1.51 - - - - 2.00 1.00 - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: - - - - - - - - 0.49 0.51 - - - - 1.00 - - - - - vhaddps %xmm3, %xmm3, %xmm4 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 0123456789 012 +# CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DeeeeeER . . . . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,1] D=====eeeeeER . . . vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [0,2] .D==========eeeeeER . . vhaddps %xmm3, %xmm3, %xmm4 -# CHECK-NEXT: [1,0] .DeeeeeE----------R . . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,1] . D=====eeeeeE----R . . vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [1,2] . D==========eeeeeER . vhaddps %xmm3, %xmm3, %xmm4 -# CHECK-NEXT: [2,0] . DeeeeeE----------R . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [2,1] . D=====eeeeeE----R . vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [2,2] . D==========eeeeeER vhaddps %xmm3, %xmm3, %xmm4 +# CHECK: [0,0] DeeeeeER . . . . . . vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,1] D=====eeeeeeeeeeeER . . . . vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [0,2] .D===============eeeeeeeeeeeER. . vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: [1,0] .DeeeeeE---------------------R. . vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,1] . D====eeeeeeeeeeeE----------R. . vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,2] . D==============eeeeeeeeeeeER . vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: [2,0] . DeeeeeE--------------------R . vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [2,1] . D====eeeeeeeeeeeE---------R . vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,2] . D==============eeeeeeeeeeeER vhaddps %xmm3, %xmm3, %xmm4 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -69,6 +81,6 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 1.0 1.0 6.7 vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1. 3 6.0 0.7 2.7 vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: 2. 3 11.0 1.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: 0. 3 1.0 1.0 13.7 vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 3 5.3 0.0 6.3 vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 2. 3 15.3 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s @@ -1,18 +1,18 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s vshufps $0, %xmm0, %xmm1, %xmm1 vhaddps (%rdi), %xmm1, %xmm2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 15 +# CHECK-NEXT: Total Cycles: 20 # CHECK-NEXT: Total uOps: 5 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.33 -# CHECK-NEXT: IPC: 0.13 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: uOps Per Cycle: 0.25 +# CHECK-NEXT: IPC: 0.10 +# CHECK-NEXT: Block RThroughput: 1.3 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -23,15 +23,15 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: 4 11 2.00 * vhaddps (%rdi), %xmm1, %xmm2 +# CHECK-NEXT: 1 2 0.50 vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: 4 16 1.00 * vhaddps (%rdi), %xmm1, %xmm2 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: [0,1] .DeeeeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2 +# CHECK: [0,0] DeeER. . . . vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s @@ -1,18 +1,18 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s vshufps $0, %xmm0, %xmm1, %xmm1 vhaddps (%rdi), %ymm1, %ymm2 # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 16 -# CHECK-NEXT: Total uOps: 5 +# CHECK-NEXT: Total Cycles: 20 +# CHECK-NEXT: Total uOps: 11 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.31 -# CHECK-NEXT: IPC: 0.13 -# CHECK-NEXT: Block RThroughput: 3.0 +# CHECK-NEXT: uOps Per Cycle: 0.55 +# CHECK-NEXT: IPC: 0.10 +# CHECK-NEXT: Block RThroughput: 2.8 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -23,15 +23,15 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: 4 12 2.00 * vhaddps (%rdi), %ymm1, %ymm2 +# CHECK-NEXT: 1 2 0.50 vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: 10 16 2.00 * vhaddps (%rdi), %ymm1, %ymm2 # CHECK: Timeline view: -# CHECK-NEXT: 012345 +# CHECK-NEXT: 0123456789 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2 +# CHECK: [0,0] DeeER. . . . vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/instruction-info-view.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/instruction-info-view.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/instruction-info-view.s @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=true < %s | FileCheck %s --check-prefix=ENABLED -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=DISABLED -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info < %s | FileCheck %s -check-prefix=ENABLED -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false < %s | FileCheck %s -check-prefix=ENABLED +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -resource-pressure=false -instruction-info=true < %s | FileCheck %s --check-prefix=ENABLED +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=DISABLED +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -resource-pressure=false -instruction-info < %s | FileCheck %s -check-prefix=ENABLED +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -resource-pressure=false < %s | FileCheck %s -check-prefix=ENABLED vmulps %xmm0, %xmm1, %xmm2 vhaddps %xmm2, %xmm2, %xmm3 @@ -13,14 +13,14 @@ # ENABLED: Iterations: 100 # ENABLED-NEXT: Instructions: 300 -# ENABLED-NEXT: Total Cycles: 414 +# ENABLED-NEXT: Total Cycles: 228 # ENABLED-NEXT: Total uOps: 700 # ENABLED: Dispatch Width: 4 -# ENABLED-NEXT: uOps Per Cycle: 1.69 -# ENABLED-NEXT: IPC: 0.72 -# ENABLED-NEXT: Block RThroughput: 4.0 +# ENABLED-NEXT: uOps Per Cycle: 3.07 +# ENABLED-NEXT: IPC: 1.32 +# ENABLED-NEXT: Block RThroughput: 2.0 # ENABLED: Instruction Info: # ENABLED-NEXT: [1]: #uOps @@ -32,5 +32,5 @@ # ENABLED: [1] [2] [3] [4] [5] [6] Instructions: # ENABLED-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2 -# ENABLED-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3 -# ENABLED-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4 +# ENABLED-NEXT: 3 11 1.00 vhaddps %xmm2, %xmm2, %xmm3 +# ENABLED-NEXT: 3 11 1.00 vhaddps %xmm3, %xmm3, %xmm4 Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s vmovaps (%rsi), %xmm0 vmovaps %xmm0, (%rdi) @@ -12,12 +12,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 2803 +# CHECK-NEXT: Total Cycles: 2403 # CHECK-NEXT: Total uOps: 800 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.29 -# CHECK-NEXT: IPC: 0.29 +# CHECK-NEXT: uOps Per Cycle: 0.33 +# CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -29,52 +29,64 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 6 0.50 * vmovaps (%rsi), %xmm0 +# CHECK-NEXT: 1 5 0.50 * vmovaps (%rsi), %xmm0 # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi) -# CHECK-NEXT: 1 6 0.50 * vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: 1 5 0.50 * vmovaps 16(%rsi), %xmm0 # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: 1 6 0.50 * vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: 1 5 0.50 * vmovaps 32(%rsi), %xmm0 # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: 1 6 0.50 * vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: 1 5 0.50 * vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi) # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - - 4.00 - - 8.00 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - 8.00 - - - - - - - 4.00 - - - 4.00 3.99 4.01 - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - - - - - 1.00 vmovaps (%rsi), %xmm0 -# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, (%rdi) -# CHECK-NEXT: - - - - - - - 1.00 vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: - - - - - - - 1.00 vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: - - - - - - - 1.00 vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - 0.99 0.01 - - - - vmovaps (%rsi), %xmm0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %xmm0, (%rdi) +# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - 1.00 - - - - - vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - 1.00 - - - - - vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: - 1.00 - - - - - - - 1.00 - - - - 1.00 - - - - - vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: - 1.00 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %xmm0, 48(%rdi) # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0123456 -# CHECK: [0,0] DeeeeeeER . . . . . vmovaps (%rsi), %xmm0 -# CHECK-NEXT: [0,1] D======eER. . . . . vmovaps %xmm0, (%rdi) -# CHECK-NEXT: [0,2] D=======eeeeeeER . . . vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: [0,3] D=============eER . . . vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: [0,4] .D=============eeeeeeER . . vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: [0,5] .D===================eER . . vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: [0,6] .D====================eeeeeeER. vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: [0,7] .D==========================eER vmovaps %xmm0, 48(%rdi) +# CHECK: [0,0] DeeeeeER . . . .. vmovaps (%rsi), %xmm0 +# CHECK-NEXT: [0,1] D=====eER . . . .. vmovaps %xmm0, (%rdi) +# CHECK-NEXT: [0,2] D======eeeeeER . . .. vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: [0,3] D===========eER. . .. vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: [0,4] .D===========eeeeeER. .. vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: [0,5] .D================eER .. vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: [0,6] .D=================eeeeeER. vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: [0,7] .D======================eER vmovaps %xmm0, 48(%rdi) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -84,10 +96,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 vmovaps %xmm0, (%rdi) -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: 3. 1 14.0 0.0 0.0 vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: 4. 1 14.0 0.0 0.0 vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: 5. 1 20.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: 6. 1 21.0 0.0 0.0 vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: 7. 1 27.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: 1. 1 6.0 0.0 0.0 vmovaps %xmm0, (%rdi) +# CHECK-NEXT: 2. 1 7.0 0.0 0.0 vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: 3. 1 12.0 0.0 0.0 vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: 4. 1 12.0 0.0 0.0 vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: 5. 1 17.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: 6. 1 18.0 0.0 0.0 vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: 7. 1 23.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s vmovaps (%rsi), %xmm0 vmovaps %xmm0, (%rdi) @@ -12,7 +12,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 409 +# CHECK-NEXT: Total Cycles: 408 # CHECK-NEXT: Total uOps: 800 # CHECK: Dispatch Width: 4 @@ -29,52 +29,64 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 6 0.50 * vmovaps (%rsi), %xmm0 +# CHECK-NEXT: 1 5 0.50 * vmovaps (%rsi), %xmm0 # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi) -# CHECK-NEXT: 1 6 0.50 * vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: 1 5 0.50 * vmovaps 16(%rsi), %xmm0 # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: 1 6 0.50 * vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: 1 5 0.50 * vmovaps 32(%rsi), %xmm0 # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: 1 6 0.50 * vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: 1 5 0.50 * vmovaps 48(%rsi), %xmm0 # CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi) # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - - 4.00 - 3.94 4.06 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: 4.05 3.95 - - - - - - 3.95 0.05 - - - 4.00 3.95 4.05 - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - - - - 0.97 0.03 vmovaps (%rsi), %xmm0 -# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, (%rdi) -# CHECK-NEXT: - - - - - - 0.03 0.97 vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: - - - - 1.00 - 0.97 0.03 vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: - - - - - - 1.00 - vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: - - - - - - - 1.00 vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: - - - - 1.00 - 0.97 0.03 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - 1.00 - - - - - - 0.97 0.03 - - - - 0.97 0.03 - - - - vmovaps (%rsi), %xmm0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %xmm0, (%rdi) +# CHECK-NEXT: 1.00 - - - - - - - 1.00 - - - - - 1.00 - - - - - vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: 0.02 0.98 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: 0.02 0.98 - - - - - - 1.00 - - - - - 0.98 0.02 - - - - vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: 0.98 0.02 - - - - - - 0.98 0.02 - - - - 1.00 - - - - - vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: 0.03 0.97 - - - - - - - - - - - 1.00 - 1.00 - - - - vmovaps %xmm0, 48(%rdi) # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . vmovaps (%rsi), %xmm0 -# CHECK-NEXT: [0,1] D======eER. . vmovaps %xmm0, (%rdi) -# CHECK-NEXT: [0,2] DeeeeeeE-R. . vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: [0,3] D=======eER . vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: [0,4] .DeeeeeeE-R . vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: [0,5] .D=======eER. vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: [0,6] .DeeeeeeE--R. vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: [0,7] .D========eER vmovaps %xmm0, 48(%rdi) +# CHECK: [0,0] DeeeeeER .. vmovaps (%rsi), %xmm0 +# CHECK-NEXT: [0,1] D=====eER .. vmovaps %xmm0, (%rdi) +# CHECK-NEXT: [0,2] DeeeeeE-R .. vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: [0,3] D======eER.. vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: [0,4] .DeeeeeE-R.. vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: [0,5] .D======eER. vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: [0,6] .DeeeeeE--R. vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: [0,7] .D=======eER vmovaps %xmm0, 48(%rdi) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -84,10 +96,10 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 vmovaps %xmm0, (%rdi) +# CHECK-NEXT: 1. 1 6.0 0.0 0.0 vmovaps %xmm0, (%rdi) # CHECK-NEXT: 2. 1 1.0 1.0 1.0 vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: 3. 1 8.0 0.0 0.0 vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: 3. 1 7.0 0.0 0.0 vmovaps %xmm0, 16(%rdi) # CHECK-NEXT: 4. 1 1.0 1.0 1.0 vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: 5. 1 8.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: 5. 1 7.0 0.0 0.0 vmovaps %xmm0, 32(%rdi) # CHECK-NEXT: 6. 1 1.0 1.0 2.0 vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: 7. 1 9.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) +# CHECK-NEXT: 7. 1 8.0 0.0 0.0 vmovaps %xmm0, 48(%rdi) Index: llvm/trunk/test/tools/llvm-mca/X86/BdVer2/one-idioms.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BdVer2/one-idioms.s +++ llvm/trunk/test/tools/llvm-mca/X86/BdVer2/one-idioms.s @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py -# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=1 -register-file-stats < %s | FileCheck %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -timeline -timeline-max-iterations=1 -register-file-stats < %s | FileCheck %s # These are dependency-breaking one-idioms. # Much like zero-idioms, but they produce ones, and do consume resources. @@ -29,13 +29,13 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1500 -# CHECK-NEXT: Total Cycles: 903 +# CHECK-NEXT: Total Cycles: 754 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.66 -# CHECK-NEXT: IPC: 1.66 -# CHECK-NEXT: Block RThroughput: 6.0 +# CHECK-NEXT: uOps Per Cycle: 1.99 +# CHECK-NEXT: IPC: 1.99 +# CHECK-NEXT: Block RThroughput: 7.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -46,77 +46,99 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 pcmpeqb %mm2, %mm2 -# CHECK-NEXT: 1 3 1.00 pcmpeqd %mm2, %mm2 -# CHECK-NEXT: 1 3 1.00 pcmpeqw %mm2, %mm2 -# CHECK-NEXT: 1 1 0.50 pcmpeqb %xmm2, %xmm2 -# CHECK-NEXT: 1 1 0.50 pcmpeqd %xmm2, %xmm2 -# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm2, %xmm2 -# CHECK-NEXT: 1 1 0.50 pcmpeqw %xmm2, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 2 0.50 pcmpeqb %mm2, %mm2 +# CHECK-NEXT: 1 2 0.50 pcmpeqd %mm2, %mm2 +# CHECK-NEXT: 1 2 0.50 pcmpeqw %mm2, %mm2 +# CHECK-NEXT: 1 2 0.50 pcmpeqb %xmm2, %xmm2 +# CHECK-NEXT: 1 2 0.50 pcmpeqd %xmm2, %xmm2 +# CHECK-NEXT: 1 2 0.50 pcmpeqq %xmm2, %xmm2 +# CHECK-NEXT: 1 2 0.50 pcmpeqw %xmm2, %xmm2 +# CHECK-NEXT: 1 2 0.50 vpcmpeqb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 2 0.50 vpcmpeqd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 2 0.50 vpcmpeqq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 2 0.50 vpcmpeqw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 2 0.50 vpcmpeqb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 2 0.50 vpcmpeqd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 2 0.50 vpcmpeqq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 2 0.50 vpcmpeqw %xmm3, %xmm3, %xmm5 # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 1500 -# CHECK-NEXT: Max number of mappings used: 168 +# CHECK-NEXT: Max number of mappings used: 72 + +# CHECK: * Register File #1 -- PdFpuPRF: +# CHECK-NEXT: Number of physical registers: 160 +# CHECK-NEXT: Total number of mappings created: 1500 +# CHECK-NEXT: Max number of mappings used: 72 + +# CHECK: * Register File #2 -- PdIntegerPRF: +# CHECK-NEXT: Number of physical registers: 96 +# CHECK-NEXT: Total number of mappings created: 0 +# CHECK-NEXT: Max number of mappings used: 0 # CHECK: Resources: -# CHECK-NEXT: [0] - SBDivider -# CHECK-NEXT: [1] - SBFPDivider -# CHECK-NEXT: [2] - SBPort0 -# CHECK-NEXT: [3] - SBPort1 -# CHECK-NEXT: [4] - SBPort4 -# CHECK-NEXT: [5] - SBPort5 -# CHECK-NEXT: [6.0] - SBPort23 -# CHECK-NEXT: [6.1] - SBPort23 +# CHECK-NEXT: [0.0] - PdAGLU01 +# CHECK-NEXT: [0.1] - PdAGLU01 +# CHECK-NEXT: [1] - PdBranch +# CHECK-NEXT: [2] - PdCount +# CHECK-NEXT: [3] - PdDiv +# CHECK-NEXT: [4] - PdEX0 +# CHECK-NEXT: [5] - PdEX1 +# CHECK-NEXT: [6] - PdFPCVT +# CHECK-NEXT: [7.0] - PdFPFMA +# CHECK-NEXT: [7.1] - PdFPFMA +# CHECK-NEXT: [8.0] - PdFPMAL +# CHECK-NEXT: [8.1] - PdFPMAL +# CHECK-NEXT: [9] - PdFPMMA +# CHECK-NEXT: [10] - PdFPSTO +# CHECK-NEXT: [11] - PdFPU0 +# CHECK-NEXT: [12] - PdFPU1 +# CHECK-NEXT: [13] - PdFPU2 +# CHECK-NEXT: [14] - PdFPU3 +# CHECK-NEXT: [15] - PdFPXBR +# CHECK-NEXT: [16] - PdMul # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 7.65 - 7.35 - - +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] +# CHECK-NEXT: - - - - - - - - - - 7.50 7.50 - - 7.50 7.50 - - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: -# CHECK-NEXT: - - - 1.00 - - - - pcmpeqb %mm2, %mm2 -# CHECK-NEXT: - - - 1.00 - - - - pcmpeqd %mm2, %mm2 -# CHECK-NEXT: - - - 1.00 - - - - pcmpeqw %mm2, %mm2 -# CHECK-NEXT: - - - 0.75 - 0.25 - - pcmpeqb %xmm2, %xmm2 -# CHECK-NEXT: - - - 0.49 - 0.51 - - pcmpeqd %xmm2, %xmm2 -# CHECK-NEXT: - - - 0.64 - 0.36 - - pcmpeqq %xmm2, %xmm2 -# CHECK-NEXT: - - - 0.21 - 0.79 - - pcmpeqw %xmm2, %xmm2 -# CHECK-NEXT: - - - 0.44 - 0.56 - - vpcmpeqb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: - - - 0.26 - 0.74 - - vpcmpeqd %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: - - - 0.25 - 0.75 - - vpcmpeqq %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: - - - 0.25 - 0.75 - - vpcmpeqb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: - - - 0.55 - 0.45 - - vpcmpeqd %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: - - - 0.44 - 0.56 - - vpcmpeqq %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: - - - 0.37 - 0.63 - - vpcmpeqw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16] Instructions: +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - pcmpeqb %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - pcmpeqd %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - pcmpeqw %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - pcmpeqb %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - pcmpeqd %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - - 1.00 - - - - pcmpeqq %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - pcmpeqw %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - vpcmpeqb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 1.00 - - - - - vpcmpeqd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - vpcmpeqq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - vpcmpeqw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - vpcmpeqb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - vpcmpeqd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - vpcmpeqq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - 0.50 0.50 - - 0.50 0.50 - - - - vpcmpeqw %xmm3, %xmm3, %xmm5 # CHECK: Timeline view: # CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeER .. pcmpeqb %mm2, %mm2 -# CHECK-NEXT: [0,1] D===eeeER .. pcmpeqd %mm2, %mm2 -# CHECK-NEXT: [0,2] D======eeeER pcmpeqw %mm2, %mm2 -# CHECK-NEXT: [0,3] DeE--------R pcmpeqb %xmm2, %xmm2 -# CHECK-NEXT: [0,4] .DeE-------R pcmpeqd %xmm2, %xmm2 -# CHECK-NEXT: [0,5] .D=eE------R pcmpeqq %xmm2, %xmm2 -# CHECK-NEXT: [0,6] .D==eE-----R pcmpeqw %xmm2, %xmm2 -# CHECK-NEXT: [0,7] .DeE-------R vpcmpeqb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,8] . DeE------R vpcmpeqd %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,9] . D==eE----R vpcmpeqq %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,10] . D===eE---R vpcmpeqw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,11] . D====eE--R vpcmpeqb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,12] . D====eE-R vpcmpeqd %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,13] . D====eE-R vpcmpeqq %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,14] . D=====eER vpcmpeqw %xmm3, %xmm3, %xmm5 +# CHECK: [0,0] DeeER. .. pcmpeqb %mm2, %mm2 +# CHECK-NEXT: [0,1] DeeER. .. pcmpeqd %mm2, %mm2 +# CHECK-NEXT: [0,2] D=eeER .. pcmpeqw %mm2, %mm2 +# CHECK-NEXT: [0,3] D==eeER .. pcmpeqb %xmm2, %xmm2 +# CHECK-NEXT: [0,4] .DeeE-R .. pcmpeqd %xmm2, %xmm2 +# CHECK-NEXT: [0,5] .D==eeER .. pcmpeqq %xmm2, %xmm2 +# CHECK-NEXT: [0,6] .D=eeE-R .. pcmpeqw %xmm2, %xmm2 +# CHECK-NEXT: [0,7] .D===eeER .. vpcmpeqb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,8] . D=eeE-R .. vpcmpeqd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,9] . D===eeER.. vpcmpeqq %x