Index: llvm/trunk/lib/Target/X86/X86.td =================================================================== --- llvm/trunk/lib/Target/X86/X86.td +++ llvm/trunk/lib/Target/X86/X86.td @@ -640,8 +640,7 @@ FeatureCLWB ]>; -// FIXME: define SKX model -class SkylakeServerProc : ProcModel : ProcModel; Index: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td +++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td @@ -0,0 +1,6949 @@ +//=- X86SchedSkylake.td - X86 Skylake Server Scheduling ------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Skylake Server to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def SkylakeServerModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and SKylake can + // decode 6 instructions per cycle. + let IssueWidth = 6; + let MicroOpBufferSize = 224; // Based on the reorder buffer. + let LoadLatency = 5; + let MispredictPenalty = 14; + + // Based on the LSD (loop-stream detector) queue size and benchmarking data. + let LoopMicroOpBufferSize = 50; + + // This flag is set to allow the scheduler to assign a default model to + // unrecognized opcodes. + let CompleteModel = 0; +} + +let SchedModel = SkylakeServerModel in { + +// Skylake Server can issue micro-ops to 8 different ports in one cycle. + +// Ports 0, 1, 5, and 6 handle all computation. +// Port 4 gets the data half of stores. Store data can be available later than +// the store address, but since we don't model the latency of stores, we can +// ignore that. +// Ports 2 and 3 are identical. They handle loads and the address half of +// stores. Port 7 can handle address calculations. +def SKXPort0 : ProcResource<1>; +def SKXPort1 : ProcResource<1>; +def SKXPort2 : ProcResource<1>; +def SKXPort3 : ProcResource<1>; +def SKXPort4 : ProcResource<1>; +def SKXPort5 : ProcResource<1>; +def SKXPort6 : ProcResource<1>; +def SKXPort7 : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def SKXPort01 : ProcResGroup<[SKXPort0, SKXPort1]>; +def SKXPort23 : ProcResGroup<[SKXPort2, SKXPort3]>; +def SKXPort237 : ProcResGroup<[SKXPort2, SKXPort3, SKXPort7]>; +def SKXPort04 : ProcResGroup<[SKXPort0, SKXPort4]>; +def SKXPort05 : ProcResGroup<[SKXPort0, SKXPort5]>; +def SKXPort06 : ProcResGroup<[SKXPort0, SKXPort6]>; +def SKXPort15 : ProcResGroup<[SKXPort1, SKXPort5]>; +def SKXPort16 : ProcResGroup<[SKXPort1, SKXPort6]>; +def SKXPort56 : ProcResGroup<[SKXPort5, SKXPort6]>; +def SKXPort015 : ProcResGroup<[SKXPort0, SKXPort1, SKXPort5]>; +def SKXPort056 : ProcResGroup<[SKXPort0, SKXPort5, SKXPort6]>; +def SKXPort0156: ProcResGroup<[SKXPort0, SKXPort1, SKXPort5, SKXPort6]>; + +// 60 Entry Unified Scheduler +def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4, + SKXPort5, SKXPort6, SKXPort7]> { + let BufferSize=60; +} + +// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 +// cycles after the memory operand. +def : ReadAdvance; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass SKXWriteResPair { + // Register variant is using a single cycle on ExePort. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the + // latency. + def : WriteRes { + let Latency = !add(Lat, 5); + } +} + +// A folded store needs a cycle on port 4 for the store data, but it does not +// need an extra port 2/3 cycle to recompute the address. +def : WriteRes; + +// Arithmetic. +defm : SKXWriteResPair; // Simple integer ALU op. +defm : SKXWriteResPair; // Integer multiplication. +def : WriteRes { let Latency = 3; } // Integer multiplication, high part. +def SKXDivider : ProcResource<1>; // Integer division issued on port 0. +def : WriteRes { // Integer division. + let Latency = 25; + let ResourceCycles = [1, 10]; +} +def : WriteRes { + let Latency = 29; + let ResourceCycles = [1, 1, 10]; +} + +def : WriteRes; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm : SKXWriteResPair; + +// Loads, stores, and moves, not folded with other operations. +def : WriteRes { let Latency = 5; } +def : WriteRes; +def : WriteRes; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def : WriteRes; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm : SKXWriteResPair; + +// Floating point. This covers both scalar and vector operations. +defm : SKXWriteResPair; // Floating point add/sub/compare. +defm : SKXWriteResPair; // Floating point multiplication. +defm : SKXWriteResPair; // 10-14 cycles. // Floating point division. +defm : SKXWriteResPair; // Floating point square root. +defm : SKXWriteResPair; // Floating point reciprocal estimate. +defm : SKXWriteResPair; // Floating point reciprocal square root estimate. +// defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm : SKXWriteResPair; // Floating point vector shuffles. +defm : SKXWriteResPair; // Floating point vector blends. +def : WriteRes { // Fp vector variable blends. + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +// FMA Scheduling helper class. +// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } + +// Vector integer operations. +defm : SKXWriteResPair; // Vector integer ALU op, no logicals. +defm : SKXWriteResPair; // Vector integer shifts. +defm : SKXWriteResPair; // Vector integer multiply. +defm : SKXWriteResPair; // Vector shuffles. +defm : SKXWriteResPair; // Vector blends. + +def : WriteRes { // Vector variable blends. + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +def : WriteRes { // Vector MPSAD. + let Latency = 6; + let ResourceCycles = [1, 2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1, 2]; +} + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm : SKXWriteResPair; // Vector and/or/xor. + +// Conversion between integer and float. +defm : SKXWriteResPair; // Float -> Integer. +defm : SKXWriteResPair; // Integer -> Float. +defm : SKXWriteResPair; // Float -> Float size conversion. + +// Strings instructions. +// Packed Compare Implicit Length Strings, Return Mask +// String instructions. +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3, 1]; +} +// Packed Compare Explicit Length Strings, Return Mask +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3, 2, 4]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [6, 2, 1]; +} + // Packed Compare Implicit Length Strings, Return Index +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3]; +} +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3, 1]; +} +// Packed Compare Explicit Length Strings, Return Index +def : WriteRes { + let Latency = 11; + let ResourceCycles = [6, 2]; +} +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3, 2, 2, 1]; +} + +// AES instructions. +def : WriteRes { // Decryption, encryption. + let Latency = 7; + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} +def : WriteRes { // InvMixColumn. + let Latency = 14; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 14; + let ResourceCycles = [2, 1]; +} +def : WriteRes { // Key Generation. + let Latency = 10; + let ResourceCycles = [2, 8]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [2, 7, 1]; +} + +// Carry-less multiplication instructions. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [2, 1]; +} +def : WriteRes { + let Latency = 7; + let ResourceCycles = [2, 1, 1]; +} + +// Catch-all for expensive system instructions. +def : WriteRes { let Latency = 100; } // def WriteSystem : SchedWrite; + +// AVX2. +defm : SKXWriteResPair; // Fp 256-bit width vector shuffles. +defm : SKXWriteResPair; // 256-bit width vector shuffles. +def : WriteRes { // Variable vector shifts. + let Latency = 2; + let ResourceCycles = [2, 1]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1, 1]; +} + +// Old microcoded instructions that nobody use. +def : WriteRes { let Latency = 100; } // def WriteMicrocoded : SchedWrite; + +// Fence instructions. +def : WriteRes; + +// Nop, not very useful expect it provides a model for nops! +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + +// Remaining instrs. + +def SKXWriteResGroup1 : SchedWriteRes<[SKXPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup1], (instregex "KANDBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDNBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDNDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDNQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDNWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KMOVBkk")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KMOVDkk")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KMOVQkk")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KMOVWkk")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KNOTBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KNOTDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KNOTQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KNOTWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KORBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KORDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KORQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KORWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXNORBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXNORDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXNORQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXNORWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXORBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXORDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXORQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXORWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PADDSBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PADDSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PADDUSBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PADDUSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PAVGBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PAVGWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPEQBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPEQDirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPEQWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPGTBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPGTDirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPGTWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PMAXSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PMAXUBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PMINSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PMINUBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLDri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLQri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLWri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRADri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRADrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRAWri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRAWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLDri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLQri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLWri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSUBSBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSUBSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSUBUSBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSUBUSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVB2MZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVB2MZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVB2MZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVD2MZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVD2MZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVD2MZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVQ2MZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVQ2MZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVQ2MZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVW2MZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVW2MZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVW2MZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup2 : SchedWriteRes<[SKXPort1]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup2], (instregex "MMX_MASKMOVQ64")>; + +def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r")>; +def: InstRW<[SKXWriteResGroup3], (instregex "COM_FST0r")>; +def: InstRW<[SKXWriteResGroup3], (instregex "INSERTPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "KMOVBkr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "KMOVDkr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "KMOVQkr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "KMOVWkr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_MOVD64rr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_MOVD64to64rr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PSHUFWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKHBWirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKHDQirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKHWDirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKLBWirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKLDQirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKLWDirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOV64toPQIrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVDDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVDI2PDIrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVHLPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVLHPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVSHDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVSLDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVUPDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PACKSSDWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PACKSSWBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PACKUSDWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PACKUSWBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PALIGNRrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PBLENDWrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXBDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXBQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXWQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXBDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXBQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXWQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSHUFBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSHUFDri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSHUFHWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSHUFLWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSLLDQri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSRLDQri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKHBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKHDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKHWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKLBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKLDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKLWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "SHUFPDrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "SHUFPSrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UCOM_FPr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UCOM_Fr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UNPCKHPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UNPCKHPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UNPCKLPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UNPCKLPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VBROADCASTI32X2Z128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VBROADCASTSSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VINSERTPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VINSERTPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOV64toPQIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDI2PDIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDI2PDIrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVHLPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVHLPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVLHPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVLHPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSSZrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPDYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPSYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRYrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPBLENDWYrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPBLENDWrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPBROADCASTDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPBROADCASTQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXBDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXBQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXWQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXBDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXBQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXWQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ512rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ512rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDYrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSYrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSrr")>; + +def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>; + +def SKXWriteResGroup5 : SchedWriteRes<[SKXPort01]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup5], (instregex "PABSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PABSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PABSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PADDSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PADDSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PADDUSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PADDUSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PAVGBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PAVGWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPEQBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPEQDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPEQQrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPEQWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPGTBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPGTDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPGTWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXUBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXUDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXUWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINUBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINUDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINUWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSIGNBrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSIGNDrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSIGNWrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSLLDri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSLLQri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSLLWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRADri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRAWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRLDri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRLQri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRLWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSUBSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSUBSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSUBUSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSUBUSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQQYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQQrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNBYrr256")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNBrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNDYrr256")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNDrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNWYrr256")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNWrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWZ128ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWZ256ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWZ128ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWZ256ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWZ128ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWZ256ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWrr")>; + +def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup6], (instregex "FINCSTP")>; +def: InstRW<[SKXWriteResGroup6], (instregex "FNOP")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PABSBrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PABSDrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PABSWrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PADDBirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PADDDirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PADDQirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PADDWirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PANDNirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PANDirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PORirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSUBBirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSUBDirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSUBQirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSUBWirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PXORirr")>; + +def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup7], (instregex "ADC(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADC(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADC8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADCX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADCX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADOX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADOX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTC(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTC(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTR(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTR(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTS(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTS(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CDQ")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CLAC")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVAE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVB(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVG(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVGE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVL(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVLE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNO(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNP(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNS(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVO(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVP(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CQO")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JAE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JAE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JA_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JA_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JBE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JBE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JB_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JB_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JGE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JGE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JG_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JG_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JLE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JLE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JL_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JL_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JMP_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JMP_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNO_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNO_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNP_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNP_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNS_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNS_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JO_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JO_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JP_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JP_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JS_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JS_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "RORX32ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "RORX64ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SAR(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SAR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SAR8r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SAR8ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SARX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SARX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SBB(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SBB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SBB8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETAEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETBr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETGEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETGr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETLEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETLr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETNEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETNOr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETNPr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETNSr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETOr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETPr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETSr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHL(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHL(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHL8r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHL8ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHLX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHLX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHR(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHR8r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHR8ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHRX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHRX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "STAC")>; + +def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup8], (instregex "ANDN32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "ANDN64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSI32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSI64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSMSK32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSMSK64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSR32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSR64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BZHI32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BZHI64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "LEA(16|32|64)r")>; + +def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ANDPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ANDPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "BLENDPDrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "BLENDPSrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVAPDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVAPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVDQArr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVPQI2QIrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVSSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ORPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ORPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PADDBrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PADDDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PADDQrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PADDWrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PANDNrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PANDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PORrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PSUBBrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PSUBDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PSUBQrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PSUBWrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PXORrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPDYrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPDrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPSYrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPSrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZ128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZ256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQAYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQArr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQUYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVPQI(2Q|Lo2PQ)IZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVSSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZ128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZ256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZ128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZ256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDDYrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDDrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBWYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBWrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGQZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGQZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGQZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "XORPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "XORPSrr")>; + +def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup10], (instregex "ADD(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CBW")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CLC")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMC")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CWDE")>; +def: InstRW<[SKXWriteResGroup10], (instregex "DEC(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "DEC8r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "INC(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "INC8r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "LAHF")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV8ri_alt")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVSX(16|32|64)rr16")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVSX(16|32|64)rr32")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVSX(16|32|64)rr8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVZX(16|32|64)rr16")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVZX(16|32|64)rr8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NEG(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NEG8r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NOOP")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NOT(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NOT8r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SAHF")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SIDT64m")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SLDT64m")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SMSW16m")>; +def: InstRW<[SKXWriteResGroup10], (instregex "STC")>; +def: InstRW<[SKXWriteResGroup10], (instregex "STRm")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SYSCALL")>; +def: InstRW<[SKXWriteResGroup10], (instregex "TEST(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup10], (instregex "TEST8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "TEST8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "TEST8rr")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XCHG(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR8rr(_REV?)")>; + +def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> { + let Latency = 1; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm")>; +def: InstRW<[SKXWriteResGroup11], (instregex "KMOVBmk")>; +def: InstRW<[SKXWriteResGroup11], (instregex "KMOVDmk")>; +def: InstRW<[SKXWriteResGroup11], (instregex "KMOVQmk")>; +def: InstRW<[SKXWriteResGroup11], (instregex "KMOVWmk")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MMX_MOVD64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MMX_MOVNTQmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MMX_MOVQ64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOV(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOV8mi")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOV8mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVAPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVAPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVDQAmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVDQUmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVHPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVHPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVLPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVLPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTDQmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTI_64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVPDI2DImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVPQI2QImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVPQIto64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVSSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVUPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVUPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "ST_FP32m")>; +def: InstRW<[SKXWriteResGroup11], (instregex "ST_FP64m")>; +def: InstRW<[SKXWriteResGroup11], (instregex "ST_FP80m")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF128mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF32x4Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF32x4Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF32x8Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF64x2Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF64x2Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF64x4Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI128mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI32x4Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI32x4Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI32x8Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI64x2Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI64x2Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI64x4Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA32Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA32Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA32Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA64Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA64Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA64Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQAYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQAmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU16Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU16Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU16Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU32Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU32Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU32Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU64Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU64Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU64Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU8Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU8Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQUYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQUmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVHPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVHPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVHPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVHPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVLPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVLPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVLPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVLPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPDI2DIZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPDI2DImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPQI(2QI|to64)Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPQI2QImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPQIto64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVSDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVSDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVSSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVSSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMPTRSTm")>; + +def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> { + let Latency = 2; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "COMISSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64grr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MMX_PMOVMSKBrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MOVMSKPDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MOVMSKPSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MOVPDI2DIrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MOVPQIto64rr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "PMOVMSKBrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "UCOMISDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "UCOMISSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISDZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISSZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVPDI2DIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVPQIto64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVPQIto64rr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VPMOVMSKBYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VPMOVMSKBrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPDYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPSYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISDZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISSZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISSrr")>; + +def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "MMX_PINSRWirri")>; +def: InstRW<[SKXWriteResGroup13], (instregex "PINSRBrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "PINSRDrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "PINSRQrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "PINSRWrri")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRBrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRDrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRQrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRWrri")>; + +def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup14], (instregex "FDECSTP")>; +def: InstRW<[SKXWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>; + +def SKXWriteResGroup15 : SchedWriteRes<[SKXPort06]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup15], (instregex "CMOVA(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup15], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROL(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROL(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROL8r1")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROL8ri")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROR(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROR8r1")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROR8ri")>; +def: InstRW<[SKXWriteResGroup15], (instregex "SETAr")>; +def: InstRW<[SKXWriteResGroup15], (instregex "SETBEr")>; + +def SKXWriteResGroup16 : SchedWriteRes<[SKXPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup16], (instregex "BLENDVPDrr0")>; +def: InstRW<[SKXWriteResGroup16], (instregex "BLENDVPSrr0")>; +def: InstRW<[SKXWriteResGroup16], (instregex "PBLENDVBrr0")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VBLENDVPDYrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VBLENDVPDrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VBLENDVPSYrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VBLENDVPSrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VPBLENDVBYrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VPBLENDVBrr")>; + +def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup17], (instregex "LFENCE")>; +def: InstRW<[SKXWriteResGroup17], (instregex "WAIT")>; +def: InstRW<[SKXWriteResGroup17], (instregex "XGETBV")>; + +def SKXWriteResGroup18 : SchedWriteRes<[SKXPort0,SKXPort237]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup18], (instregex "MMX_MASKMOVQ64")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVDQU")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPDYmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPDmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPSYmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPSmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VPMASKMOVDYmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VPMASKMOVDmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VPMASKMOVQYmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VPMASKMOVQmr")>; + +def SKXWriteResGroup19 : SchedWriteRes<[SKXPort5,SKXPort01]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup19], (instregex "PSLLDrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSLLQrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSLLWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRADrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRAWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRLDrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRLQrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRLWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLDrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLQrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRADZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRADrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRAQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRAWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRAWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLDrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLQrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLWrr")>; + +def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup20], (instregex "CLFLUSH")>; + +def SKXWriteResGroup21 : SchedWriteRes<[SKXPort237,SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup21], (instregex "SFENCE")>; + +def SKXWriteResGroup22 : SchedWriteRes<[SKXPort06,SKXPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup22], (instregex "BEXTR32rr")>; +def: InstRW<[SKXWriteResGroup22], (instregex "BEXTR64rr")>; +def: InstRW<[SKXWriteResGroup22], (instregex "BSWAP(16|32|64)r")>; + +def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup23], (instregex "ADC8i8")>; +def: InstRW<[SKXWriteResGroup23], (instregex "ADC8ri")>; +def: InstRW<[SKXWriteResGroup23], (instregex "CWD")>; +def: InstRW<[SKXWriteResGroup23], (instregex "JRCXZ")>; +def: InstRW<[SKXWriteResGroup23], (instregex "SBB8i8")>; +def: InstRW<[SKXWriteResGroup23], (instregex "SBB8ri")>; + +def SKXWriteResGroup24 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup24], (instregex "EXTRACTPSmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "PEXTRBmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "PEXTRDmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "PEXTRQmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "PEXTRWmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "STMXCSR")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VEXTRACTPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VEXTRACTPSmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRBmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRDmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRQZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRQmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRWmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VSTMXCSR")>; + +def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup25], (instregex "FNSTCW16m")>; + +def SKXWriteResGroup26 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup26], (instregex "SETAEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETBm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETGEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETGm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETLEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETLm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETNEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETNOm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETNPm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETNSm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETOm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETPm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETSm")>; + +def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; + +def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; +def: InstRW<[SKXWriteResGroup28], (instregex "PUSH64i8")>; +def: InstRW<[SKXWriteResGroup28], (instregex "STOSB")>; +def: InstRW<[SKXWriteResGroup28], (instregex "STOSL")>; +def: InstRW<[SKXWriteResGroup28], (instregex "STOSQ")>; +def: InstRW<[SKXWriteResGroup28], (instregex "STOSW")>; + +def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { + let Latency = 2; + let NumMicroOps = 5; + let ResourceCycles = [2,2,1]; +} +def: InstRW<[SKXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup30], (instregex "KADDBrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KADDDrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KADDQrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KADDWrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KMOVBrk")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KMOVDrk")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KMOVQrk")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KMOVWrk")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KORTESTBrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KORTESTDrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KORTESTQrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KORTESTWrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KTESTBrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KTESTDrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KTESTQrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KTESTWrr")>; + +def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup31], (instregex "BSF(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "BSR(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "IMUL64rr(i8?)")>; +def: InstRW<[SKXWriteResGroup31], (instregex "IMUL8r")>; +def: InstRW<[SKXWriteResGroup31], (instregex "LZCNT(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "MUL8r")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PDEP32rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PDEP64rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PEXT32rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PEXT64rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "POPCNT(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "SHLD(16|32|64)rri8")>; +def: InstRW<[SKXWriteResGroup31], (instregex "SHRD(16|32|64)rri8")>; +def: InstRW<[SKXWriteResGroup31], (instregex "TZCNT(16|32|64)rr")>; + +def SKXWriteResGroup31_16 : SchedWriteRes<[SKXPort1, SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup31_16], (instregex "IMUL16rr(i8?)")>; + +def SKXWriteResGroup31_32 : SchedWriteRes<[SKXPort1]> { + let Latency = 3; + let NumMicroOps = 1; +} +def: InstRW<[SKXWriteResGroup31_32], (instregex "IMUL32rr(i8?)")>; + +def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup32], (instregex "ADD_FPrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "ADD_FST0r")>; +def: InstRW<[SKXWriteResGroup32], (instregex "ADD_FrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTLBri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTLDri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTLQri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTLWri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTRBri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTRDri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTRQri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTRWri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KUNPCKBWrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KUNPCKDQrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KUNPCKWDrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "MMX_PSADBWirr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "PCMPGTQrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "PSADBWrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUBR_FPrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUBR_FST0r")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUBR_FrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUB_FPrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUB_FST0r")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUB_FrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNQZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNQZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNQZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTF32X2Z256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTF32X2Zr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTI32X2Z256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTI32X2Zr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSSYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VDBPSADBWZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VDBPSADBWZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VDBPSADBWZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF32x4Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF32x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF32x8Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF64x2Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF64x2Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF64x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI32x4Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI32x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI32x8Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI64x2Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI64x2Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI64x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSSDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSSSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF32x4Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF32x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF32x8Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF64x2Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF64x2Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF64x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI32x4Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI32x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI32x8Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI64x2Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI64x2Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI64x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDrZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDrZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDrZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQrZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQrZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQrZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPBZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPBZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPBZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPQZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPQZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPQZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUBZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUBZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUBZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUQZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUQZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUQZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUWZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUWZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUWZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPWZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPWZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPWZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERM2F128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERM2I128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2D128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2D256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2Drr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PD128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PD256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PS128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PS256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2Q128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2Q256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2Qrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDYri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPSYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQYri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2D128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2D256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2Drr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PD128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PD256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PS128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PS256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2Q128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2Q256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2Qrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXSQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXSQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXSQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXUQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXUQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXUQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINSQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINSQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINSQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINUQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINUQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINUQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVQDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVQDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVQDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBWYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXDQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBWYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXDQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFF32X4Z256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFF32X4Zrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFF64X2Z256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFF64X2Zrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFI32X4Z256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFI32X4Zrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFI64X2Z256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFI64X2Zrri(b?)(k?)(z?)")>; + +def SKXWriteResGroup33 : SchedWriteRes<[SKXPort0,SKXPort5]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup33], (instregex "EXTRACTPSrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "MMX_PEXTRWirri")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRBrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRDrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRQrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRWri")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRWrr_REV")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PTESTrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VEXTRACTPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VEXTRACTPSrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRBrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRDrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRQrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWZrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWri")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWrr_REV")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPTESTYrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPTESTrr")>; + +def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup34], (instregex "FNSTSW16r")>; + +def SKXWriteResGroup35 : SchedWriteRes<[SKXPort06]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup35], (instregex "ROL(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "ROL8rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "ROR(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "ROR8rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SAR(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SAR8rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SHL(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SHL8rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SHR(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SHR8rCL")>; + +def SKXWriteResGroup36 : SchedWriteRes<[SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup36], (instregex "XADD(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup36], (instregex "XADD8rr")>; +def: InstRW<[SKXWriteResGroup36], (instregex "XCHG8rr")>; + +def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PHSUBSWrr64")>; + +def SKXWriteResGroup38 : SchedWriteRes<[SKXPort5,SKXPort01]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup38], (instregex "PHADDSWrr128")>; +def: InstRW<[SKXWriteResGroup38], (instregex "PHSUBSWrr128")>; +def: InstRW<[SKXWriteResGroup38], (instregex "VPHADDSWrr128")>; +def: InstRW<[SKXWriteResGroup38], (instregex "VPHADDSWrr256")>; +def: InstRW<[SKXWriteResGroup38], (instregex "VPHSUBSWrr128")>; +def: InstRW<[SKXWriteResGroup38], (instregex "VPHSUBSWrr256")>; + +def SKXWriteResGroup39 : SchedWriteRes<[SKXPort5,SKXPort05]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PHADDrr64")>; +def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PHSUBWrr64")>; + +def SKXWriteResGroup40 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup40], (instregex "PHADDDrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "PHADDWrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "PHSUBDrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "PHSUBWrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHADDDYrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHADDDrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHADDWYrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHADDWrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHSUBDYrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHSUBDrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHSUBWYrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHSUBWrr")>; + +def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup41], (instregex "MMX_PACKSSDWirr")>; +def: InstRW<[SKXWriteResGroup41], (instregex "MMX_PACKSSWBirr")>; +def: InstRW<[SKXWriteResGroup41], (instregex "MMX_PACKUSWBirr")>; + +def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup42], (instregex "CLD")>; + +def SKXWriteResGroup43 : SchedWriteRes<[SKXPort237,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup43], (instregex "MFENCE")>; + +def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup44], (instregex "RCL(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCL(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCL8r1")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCL8ri")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCR(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCR8r1")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCR8ri")>; + +def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup45], (instregex "FNSTSWm")>; + +def SKXWriteResGroup46 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SKXWriteResGroup46], (instregex "SETAm")>; +def: InstRW<[SKXWriteResGroup46], (instregex "SETBEm")>; + +def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup47], (instregex "CALL(16|32|64)r")>; + +def SKXWriteResGroup48 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup48], (instregex "CALL64pcrel32")>; + +def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> { + let Latency = 4; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup49], (instregex "AESDECLASTrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "AESDECrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "AESENCLASTrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "AESENCrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMADDWDirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULHUWirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULHWirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULLWirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MUL_FPrST0")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MUL_FST0r")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MUL_FrST0")>; +def: InstRW<[SKXWriteResGroup49], (instregex "RCPPSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "RCPSSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "RSQRTPSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "RSQRTSSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VAESDECLASTrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VAESDECrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VAESENCLASTrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VAESENCrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14SDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14SSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCPPSYr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCPPSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCPSSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14SDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14SSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRTPSYr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRTPSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRTSSr")>; + +def SKXWriteResGroup50 : SchedWriteRes<[SKXPort015]> { + let Latency = 4; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDSUBPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDSUBPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CMPPDrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CMPPSrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CMPSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CVTDQ2PSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CVTPS2DQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CVTTPS2DQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MAXPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MAXPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MAXSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MAXSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MINPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MINPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MINSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MINSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MULPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MULPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MULSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MULSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMADDUBSWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMADDWDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULDQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULHRSWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULHUWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULHWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULLWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULUDQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "SUBPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "SUBPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "SUBSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "SUBSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPPDYrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPPDrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPPSYrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPPSrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2UQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2UQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2UQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2UDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2UDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2UDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTQQ2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTQQ2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTQQ2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2QQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2QQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2QQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2UQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2UQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2UQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2UDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2UDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2UDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUDQ2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUDQ2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUDQ2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUQQ2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUQQ2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUQQ2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMSDrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMSSrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPDr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPSr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPSDr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPSSr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTSDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTSSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGESDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGESSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCESDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCESSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFSSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSSrr")>; + +def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup51], (instregex "MPSADBWrri")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VMPSADBWYrri")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VMPSADBWrri")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSWBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSWBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVWBZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup52 : SchedWriteRes<[SKXPort1,SKXPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup52], (instregex "IMUL(32|64)r")>; +def: InstRW<[SKXWriteResGroup52], (instregex "MUL(32|64)r")>; +def: InstRW<[SKXWriteResGroup52], (instregex "MULX64rr")>; + +def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 4; + let NumMicroOps = 4; +} +def: InstRW<[SKXWriteResGroup52_16], (instregex "IMUL16r")>; +def: InstRW<[SKXWriteResGroup52_16], (instregex "MUL16r")>; + +def SKXWriteResGroup53 : SchedWriteRes<[SKXPort5,SKXPort01]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLDYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLQYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLWYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRADYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRADZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRADZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAWYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLDYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLQYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLWYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLWZ256rr(b?)(k?)(z?)")>; + +def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup54], (instregex "ISTT_FP16m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "ISTT_FP32m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "ISTT_FP64m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_F16m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_F32m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_FP16m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_FP32m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_FP64m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "VPMOVQDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup54], (instregex "VPMOVQDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup54], (instregex "VPMOVQDZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [4]; +} +def: InstRW<[SKXWriteResGroup55], (instregex "FNCLEX")>; + +def SKXWriteResGroup56 : SchedWriteRes<[SKXPort015,SKXPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup56], (instregex "VZEROUPPER")>; + +def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; + +def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup58], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MMX_MOVD64rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MMX_MOVD64to64rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MMX_MOVQ64rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOV(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOV64toPQIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOV8rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVDDUPrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVDI2PDIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVSSrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm16")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm32")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm8")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVZX(16|32|64)rm16")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVZX(16|32|64)rm8")>; +def: InstRW<[SKXWriteResGroup58], (instregex "PREFETCHNTA")>; +def: InstRW<[SKXWriteResGroup58], (instregex "PREFETCHT0")>; +def: InstRW<[SKXWriteResGroup58], (instregex "PREFETCHT1")>; +def: InstRW<[SKXWriteResGroup58], (instregex "PREFETCHT2")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOV64toPQIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVDDUPrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVSDrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVSSrm")>; + +def SKXWriteResGroup59 : SchedWriteRes<[SKXPort015]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup59], (instregex "VCVTSD2SSZrr_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup60 : SchedWriteRes<[SKXPort0,SKXPort5]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup60], (instregex "CVTDQ2PDrr")>; +def: InstRW<[SKXWriteResGroup60], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[SKXWriteResGroup60], (instregex "VCVTDQ2PDrr")>; + +def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup61], (instregex "CVTPD2DQrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTPD2PSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTPS2PDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSD2SSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI2SDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI2SSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSS2SDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTTPD2DQrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTDQ2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2DQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2DQrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2PSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2UDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPH2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPH2PSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2PDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2PHZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2PHrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2QQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2UQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTQQ2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSD2SSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI642SDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSS2SDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSS2SDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPD2DQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPD2DQrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPD2UDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPS2QQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPS2UQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUDQ2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUQQ2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUSI2SDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUSI2SSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUSI642SDZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup62 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup62], (instregex "VPCONFLICTQZ128rr(b?)(k?)(z?)")>; + +def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>; + +def SKXWriteResGroup64 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup64], (instregex "MULX32rr")>; + +def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup66 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSWBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSWBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSWBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSWBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSWBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSWBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVWBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVWBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVWBZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,4]; +} +def: InstRW<[SKXWriteResGroup67], (instregex "XSETBV")>; + +def SKXWriteResGroup68 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [2,3]; +} +def: InstRW<[SKXWriteResGroup68], (instregex "CMPXCHG(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup68], (instregex "CMPXCHG8rr")>; + +def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 5; + let NumMicroOps = 6; + let ResourceCycles = [1,1,4]; +} +def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF16")>; +def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF64")>; + +def SKXWriteResGroup70 : SchedWriteRes<[SKXPort5]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup70], (instregex "PCLMULQDQrr")>; +def: InstRW<[SKXWriteResGroup70], (instregex "VPCLMULQDQrr")>; + +def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup71], (instregex "LDDQUrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVAPDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVAPSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVDQArm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVDQUrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVNTDQArm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVSHDUPrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVSLDUPrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVUPDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVUPSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VLDDQUrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVAPDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVAPSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVDQArm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVDQUrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVNTDQArm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVSHDUPrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVSLDUPrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVUPDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVUPSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VPBROADCASTDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VPBROADCASTQrm")>; + +def SKXWriteResGroup72 : SchedWriteRes<[SKXPort0]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup72], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPERMWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPERMWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPERMWZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDSBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDUSBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDUSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PAVGBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PAVGWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPEQBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPEQDirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPEQWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPGTBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPGTDirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPGTWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PMAXSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PMAXUBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PMINSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PMINUBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSLLDrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSLLQrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSLLWrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRADrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRAWrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRLDrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRLQrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRLWrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSUBSBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSUBSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSUBUSBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSUBUSWirm")>; + +def SKXWriteResGroup74 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup74], (instregex "CVTSD2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTSD2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTSS2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTSS2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTTSD2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2SI64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2SIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2USI64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2USIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SI64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2USIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SI64Zrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SIZrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2USI64Zrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2USIZrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSS2USIZrb")>; + +def SKXWriteResGroup75 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PINSRWirmi")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PSHUFWmi")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKHBWirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKHDQirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKHWDirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKLBWirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKLDQirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKLWDirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MOVHPDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MOVHPSrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MOVLPDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MOVLPSrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PINSRBrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PINSRDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PINSRQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PINSRWrmi")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXBDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXBQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXBWrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXDQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXWDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXWQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXBDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXBQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXBWrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXDQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXWDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXWQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVHPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVHPDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVHPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVHPSrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVLPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVLPDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVLPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVLPSrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRBrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRWrmi")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXBDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXBQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXBWrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXDQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXWDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXWQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXBDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXBQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXBWrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXDQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXWDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXWQrm")>; + +def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup76], (instregex "FARJMP64")>; +def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>; + +def SKXWriteResGroup77 : SchedWriteRes<[SKXPort23,SKXPort05]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PABSBrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PABSDrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PABSWrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PADDBirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PADDDirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PADDQirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PADDWirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PANDNirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PANDirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PORirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSIGNWrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSUBBirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSUBDirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSUBQirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSUBWirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PXORirm")>; + +def SKXWriteResGroup78 : SchedWriteRes<[SKXPort23,SKXPort06]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup78], (instregex "ADC(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADC8rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADCX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADCX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADOX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADOX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "BT(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVAE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVB(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVG(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVGE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVL(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVLE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNO(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNP(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNS(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVO(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVP(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVS(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "RORX32mi")>; +def: InstRW<[SKXWriteResGroup78], (instregex "RORX64mi")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SARX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SARX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SBB(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SBB8rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHLX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHLX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHRX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHRX64rm")>; + +def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup79], (instregex "ANDN32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "ANDN64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSI32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSI64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSMSK32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSMSK64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSR32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSR64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BZHI32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BZHI64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "MOVBE(16|32|64)rm")>; + +def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup80], (instregex "VMOVDI2PDIZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup81], (instregex "ADD(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "ADD8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "AND(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "AND8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP8mi")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP8mr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "OR(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "OR8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "SUB(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "SUB8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "TEST(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "TEST8mi")>; +def: InstRW<[SKXWriteResGroup81], (instregex "TEST8mr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "XOR(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "XOR8rm")>; + +def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup82], (instregex "CVTSI2SS64rr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "HADDPDrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "HADDPSrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "HSUBPDrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "HSUBPSrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VCVTSI642SSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VCVTUSI642SSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHADDPDYrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHADDPDrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHADDPSYrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHADDPSrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHSUBPDYrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHSUBPDrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHSUBPSYrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHSUBPSrr")>; + +def SKXWriteResGroup83 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKXWriteResGroup83], (instregex "SHLD(16|32|64)rrCL")>; +def: InstRW<[SKXWriteResGroup83], (instregex "SHRD(16|32|64)rrCL")>; + +def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>; + +def SKXWriteResGroup85 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup85], (instregex "VCVTPS2PHmr")>; + +def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup86], (instregex "BTC(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup86], (instregex "BTR(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup86], (instregex "BTS(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SAR(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SAR(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SAR8m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SAR8mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHL(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHL(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHL8m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHL8mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHR(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHR(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHR8m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHR8mi")>; + +def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup87], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "ADD(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "ADD8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "ADD8mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "AND(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "AND8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "AND8mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "DEC(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "DEC8m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "INC(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "INC8m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "NEG(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "NEG8m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "NOT(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "NOT8m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "OR(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "OR8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "OR8mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm")>; +def: InstRW<[SKXWriteResGroup87], (instregex "PUSH(16|32|64)rmm")>; +def: InstRW<[SKXWriteResGroup87], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "SUB(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "SUB8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "SUB8mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "XOR(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "XOR8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "XOR8mr")>; + +def SKXWriteResGroup88 : SchedWriteRes<[SKXPort6,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 6; + let ResourceCycles = [1,5]; +} +def: InstRW<[SKXWriteResGroup88], (instregex "STD")>; + +def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup89], (instregex "LD_F32m")>; +def: InstRW<[SKXWriteResGroup89], (instregex "LD_F64m")>; +def: InstRW<[SKXWriteResGroup89], (instregex "LD_F80m")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VBROADCASTF128")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VBROADCASTI128")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VBROADCASTSSYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VLDDQUYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVAPDYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVAPSYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVDDUPYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVDQAYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVDQUYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVNTDQAYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVNTDQAZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVUPDYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVUPSYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VPBROADCASTDYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VPBROADCASTQYrm")>; + +def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>; + +def SKXWriteResGroup91 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup91], (instregex "COMISDrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "COMISSrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "UCOMISDrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "UCOMISSrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VCOMISDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VCOMISDrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VCOMISSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VCOMISSrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VUCOMISDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VUCOMISDrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VUCOMISSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VUCOMISSrm")>; + +def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup92], (instregex "INSERTPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PACKSSDWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PACKSSWBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PACKUSDWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PACKUSWBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PALIGNRrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PBLENDWrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PSHUFBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PSHUFDmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PSHUFHWmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PSHUFLWmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKHBWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKHDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKHWDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKLBWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKLDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKLWDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "SHUFPDrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "SHUFPSrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "UNPCKHPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "UNPCKHPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "UNPCKLPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "UNPCKLPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKSSDWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKSSDWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKSSWBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKSSWBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKUSDWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKUSDWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKUSWBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKUSWBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPALIGNRZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPALIGNRrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBLENDWrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBROADCASTBZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBROADCASTBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBROADCASTWZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBROADCASTWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPDmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPSZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPSmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFDmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFHWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFHWmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFLWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFLWmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSLLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSRLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHQDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLQDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLWDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VSHUFPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VSHUFPDrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VSHUFPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VSHUFPSrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKHPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKHPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKLPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKLPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKLPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKLPSrm")>; + +def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2DQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2DQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2UDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2UDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPH2PSYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPH2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPH2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PHYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PHZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PHZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2QQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2QQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2UQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2UQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTQQ2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTQQ2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2DQYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2DQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2DQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2UDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2UDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPS2QQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPS2QQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPS2UQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPS2UQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTUDQ2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTUDQ2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTUQQ2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTUQQ2PSZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup94 : SchedWriteRes<[SKXPort01,SKXPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup94], (instregex "PABSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PABSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PABSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PADDSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PADDSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PADDUSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PADDUSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PAVGBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PAVGWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPEQBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPEQDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPEQQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPEQWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPGTBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPGTDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPGTWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXUBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXUDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXUWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINUBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINUDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINUWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSIGNBrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSIGNDrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSIGNWrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSLLDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSLLQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSLLWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRADrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRAWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRLDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRLQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRLWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSUBSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSUBSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSUBUSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSUBUSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDUSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDUSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDUSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDUSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPAVGBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPAVGBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPAVGWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPAVGWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPEQBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPEQDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPEQQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPEQWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPGTBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPGTDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPGTWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPROLDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPROLQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPROLVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPROLVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPRORDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPRORQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPRORVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPRORVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSIGNBrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSIGNDrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSIGNWrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRADZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRADZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRADrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAVDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAVWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBUSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBUSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBUSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBUSWrm")>; + +def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ANDPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ANDPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "BLENDPDrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "BLENDPSrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ORPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ORPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PADDBrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PADDDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PADDQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PADDWrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PANDNrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PANDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PORrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PSUBBrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PSUBDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PSUBQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PSUBWrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PXORrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDNPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDNPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDNPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDNPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDPDrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDPSrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBROADCASTI32X2Z128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBROADCASTSSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VINSERTF128rm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VINSERTI128rm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMASKMOVPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVAPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVAPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDDUPZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQA32Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQA64Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQU16Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQU32Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQU64Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQU8Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVNTDQAZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVSHDUPZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVSLDUPZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVUPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVUPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VORPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VORPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VORPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VORPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDBrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDWrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDNDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDNQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDNrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDDrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDMBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDMDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDMQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDMWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBROADCASTDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBROADCASTQZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPMASKMOVDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPMASKMOVQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPORDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPORQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPORrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBBrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBWrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPTERNLOGDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPTERNLOGQZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPXORDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPXORQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPXORrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VXORPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VXORPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VXORPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VXORPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "XORPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "XORPSrm")>; + +def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup96], (instregex "MMX_PACKSSDWirm")>; +def: InstRW<[SKXWriteResGroup96], (instregex "MMX_PACKSSWBirm")>; +def: InstRW<[SKXWriteResGroup96], (instregex "MMX_PACKUSWBirm")>; + +def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2W128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2W256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2Wrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMT2W128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMT2W256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMT2Wrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup98 : SchedWriteRes<[SKXPort23,SKXPort06]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup98], (instregex "CMOVA(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup98], (instregex "CMOVBE(16|32|64)rm")>; + +def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup99], (instregex "LEAVE64")>; +def: InstRW<[SKXWriteResGroup99], (instregex "SCASB")>; +def: InstRW<[SKXWriteResGroup99], (instregex "SCASL")>; +def: InstRW<[SKXWriteResGroup99], (instregex "SCASQ")>; +def: InstRW<[SKXWriteResGroup99], (instregex "SCASW")>; + +def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup100], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[SKXWriteResGroup100], (instregex "CVTTSS2SIrr")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTSS2USI64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SI64Zrb")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SIZrb")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SIrr")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2USI64Zrb")>; + +def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup101], (instregex "FLDCW16m")>; + +def SKXWriteResGroup102 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup102], (instregex "LDMXCSR")>; +def: InstRW<[SKXWriteResGroup102], (instregex "VLDMXCSR")>; + +def SKXWriteResGroup103 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup103], (instregex "KMOVBkm")>; +def: InstRW<[SKXWriteResGroup103], (instregex "KMOVDkm")>; +def: InstRW<[SKXWriteResGroup103], (instregex "KMOVQkm")>; +def: InstRW<[SKXWriteResGroup103], (instregex "KMOVWkm")>; + +def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup104], (instregex "LRETQ")>; +def: InstRW<[SKXWriteResGroup104], (instregex "RETQ")>; + +def SKXWriteResGroup105 : SchedWriteRes<[SKXPort23,SKXPort06,SKXPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup105], (instregex "BEXTR32rm")>; +def: InstRW<[SKXWriteResGroup105], (instregex "BEXTR64rm")>; + +def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSQZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSQZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSQZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKXWriteResGroup107], (instregex "ROL(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROL(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROL8m1")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROL8mi")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROR(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROR(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROR8m1")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROR8mi")>; + +def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKXWriteResGroup108], (instregex "XADD(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup108], (instregex "XADD8rm")>; + +def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup109], (instregex "FARCALL64")>; + +def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 7; + let ResourceCycles = [1,2,2,2]; +} +def: InstRW<[SKXWriteResGroup110], (instregex "VPSCATTERDQZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup110], (instregex "VPSCATTERQQZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup110], (instregex "VSCATTERDPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup110], (instregex "VSCATTERQPDZ128mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup111 : SchedWriteRes<[SKXPort6,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 7; + let ResourceCycles = [1,3,1,2]; +} +def: InstRW<[SKXWriteResGroup111], (instregex "LOOP")>; + +def SKXWriteResGroup112 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 11; + let ResourceCycles = [1,4,4,2]; +} +def: InstRW<[SKXWriteResGroup112], (instregex "VPSCATTERDQZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup112], (instregex "VPSCATTERQQZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup112], (instregex "VSCATTERDPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup112], (instregex "VSCATTERQPDZ256mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 19; + let ResourceCycles = [1,8,8,2]; +} +def: InstRW<[SKXWriteResGroup113], (instregex "VPSCATTERDQZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup113], (instregex "VPSCATTERQQZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup113], (instregex "VSCATTERDPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup113], (instregex "VSCATTERQPDZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 36; + let ResourceCycles = [1,16,1,16,2]; +} +def: InstRW<[SKXWriteResGroup114], (instregex "VSCATTERDPSZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup115 : SchedWriteRes<[SKXPort0]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup115], (instregex "AESIMCrr")>; +def: InstRW<[SKXWriteResGroup115], (instregex "VAESIMCrr")>; + +def SKXWriteResGroup116 : SchedWriteRes<[SKXPort015]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup116], (instregex "PMULLDrr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPSr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDSDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDSSr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDYrr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDrr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALESDr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALESSr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDPDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDPSr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDSDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDSSr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDYPDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDYPSr")>; + +def SKXWriteResGroup117 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup117], (instregex "VTESTPDrm")>; +def: InstRW<[SKXWriteResGroup117], (instregex "VTESTPSrm")>; + +def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup118], (instregex "BSF(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "BSR(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "IMUL64m")>; +def: InstRW<[SKXWriteResGroup118], (instregex "IMUL(32|64)rm(i8?)")>; +def: InstRW<[SKXWriteResGroup118], (instregex "IMUL8m")>; +def: InstRW<[SKXWriteResGroup118], (instregex "LZCNT(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "MUL(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup118], (instregex "MUL8m")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PDEP32rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PDEP64rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PEXT32rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PEXT64rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "POPCNT(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "TZCNT(16|32|64)rm")>; + +def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup118_16_1], (instregex "IMUL16rm(i8?)")>; + +def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { + let Latency = 8; + let NumMicroOps = 5; +} +def: InstRW<[SKXWriteResGroup118_16_2], (instregex "IMUL16m")>; +def: InstRW<[SKXWriteResGroup118_16_2], (instregex "MUL16m")>; + +def SKXWriteResGroup118_32 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup118_32], (instregex "IMUL32m")>; +def: InstRW<[SKXWriteResGroup118_32], (instregex "MUL32m")>; + +def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup119], (instregex "FCOM32m")>; +def: InstRW<[SKXWriteResGroup119], (instregex "FCOM64m")>; +def: InstRW<[SKXWriteResGroup119], (instregex "FCOMP32m")>; +def: InstRW<[SKXWriteResGroup119], (instregex "FCOMP64m")>; +def: InstRW<[SKXWriteResGroup119], (instregex "MMX_PSADBWirm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VFPCLASSSDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSDWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSDWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSDWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSWBYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSWBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSWBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSDWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSDWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSDWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSWBYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSWBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSWBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPALIGNRYrmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPALIGNRZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPALIGNRZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBLENDWYrmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTBYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTBZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTBZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTWZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTWZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPMOVSXBDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPMOVSXBQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPMOVSXWQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFBYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ512rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ512rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHDQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHQDQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHQDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHQDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHWDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLBWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLBWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLDQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLQDQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLQDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLQDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLWDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPDYrmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPSYrmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPSYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPSYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup120 : SchedWriteRes<[SKXPort01,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPEQBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPEQDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPEQQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPEQWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPGTBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPGTDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPGTWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSIGNBYrm256")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSIGNDYrm256")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSIGNWYrm256")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWZ256mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWZ256mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWZ256mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSWZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDPDYrmi")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDPSYrmi")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X2Z256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X2Zm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X4Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X4rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X8rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF64X2Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF64X2rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF64X4rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X2Z256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X2Zm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X4Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X4rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X8rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI64X2Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI64X2rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI64X4rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTSDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTSDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTSSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTSSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF32x4Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF32x4Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF32x8Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF64x2Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF64x2Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF64x4Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI32x4Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI32x4Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI32x8Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI64x2Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI64x2Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI64x4Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVAPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVAPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVAPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVAPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDDUPZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDDUPZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQA32Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQA32Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQA64Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQA64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU16Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU16Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU32Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU32Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU64Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU8Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU8Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVNTDQAZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVSHDUPZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVSHDUPZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVSLDUPZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVSLDUPZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVUPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVUPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVUPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVUPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDBYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDQYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDWYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDDYrmi")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBROADCASTDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBROADCASTDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBROADCASTQZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBROADCASTQZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPMASKMOVDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPMASKMOVQYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBBYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBQYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBWYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPTERNLOGDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPTERNLOGDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPTERNLOGQZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPTERNLOGQZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup122 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup122], (instregex "BLENDVPDrm0")>; +def: InstRW<[SKXWriteResGroup122], (instregex "BLENDVPSrm0")>; +def: InstRW<[SKXWriteResGroup122], (instregex "PBLENDVBrm0")>; +def: InstRW<[SKXWriteResGroup122], (instregex "VBLENDVPDrm")>; +def: InstRW<[SKXWriteResGroup122], (instregex "VBLENDVPSrm")>; +def: InstRW<[SKXWriteResGroup122], (instregex "VPBLENDVBYrm")>; +def: InstRW<[SKXWriteResGroup122], (instregex "VPBLENDVBrm")>; + +def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PHSUBSWrm64")>; + +def SKXWriteResGroup124 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort05]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PHADDrm64")>; +def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PHSUBWrm64")>; + +def SKXWriteResGroup125 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup125], (instregex "VCVTPS2PHYmr")>; + +def SKXWriteResGroup126 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKXWriteResGroup126], (instregex "ROR(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup126], (instregex "ROR8mCL")>; + +def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKXWriteResGroup127], (instregex "RCL(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCL(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCL8m1")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCL8mi")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCR(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCR(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCR8m1")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCR8mi")>; + +def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[SKXWriteResGroup128], (instregex "ROL(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "ROL8mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SAR(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SAR8mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SHL(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SHL8mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SHR(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SHR8mCL")>; + +def SKXWriteResGroup129 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[SKXWriteResGroup129], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup129], (instregex "ADC8mi")>; + +def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[SKXWriteResGroup130], (instregex "ADC(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup130], (instregex "ADC8mr")>; +def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG8rm")>; +def: InstRW<[SKXWriteResGroup130], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup130], (instregex "SBB(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup130], (instregex "SBB8mi")>; +def: InstRW<[SKXWriteResGroup130], (instregex "SBB8mr")>; + +def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 8; + let ResourceCycles = [1,2,1,2,2]; +} +def: InstRW<[SKXWriteResGroup131], (instregex "VPSCATTERQDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup131], (instregex "VPSCATTERQDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup131], (instregex "VSCATTERQPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup131], (instregex "VSCATTERQPSZ256mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup132 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 12; + let ResourceCycles = [1,4,1,4,2]; +} +def: InstRW<[SKXWriteResGroup132], (instregex "VPSCATTERDDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup132], (instregex "VSCATTERDPSZ128mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup133 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 20; + let ResourceCycles = [1,8,1,8,2]; +} +def: InstRW<[SKXWriteResGroup133], (instregex "VPSCATTERDDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup133], (instregex "VSCATTERDPSZ256mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup134 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 36; + let ResourceCycles = [1,16,1,16,2]; +} +def: InstRW<[SKXWriteResGroup134], (instregex "VPSCATTERDDZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMADDWDirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULHUWirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULHWirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULLWirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "RCPSSm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "RSQRTSSm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "VRCPSSm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "VRSQRTSSm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "VTESTPDYrm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "VTESTPSYrm")>; + +def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup136], (instregex "PCMPGTQrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "PSADBWrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNQZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VDBPSADBWZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VFPCLASSSSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPBZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPEQBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPEQDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPEQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPEQWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTQrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPQZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPUBZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPUDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPUQZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPUWZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPWZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMI2D128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMI2PD128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMI2PS128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMI2Q128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMT2D128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMT2PD128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMT2PS128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMT2Q128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMAXSQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMAXUQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMINSQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMINUQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXBDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXBQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXBWYrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXDQYrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXWDYrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXWQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXBDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXBQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXWDYrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXWQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPSADBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPSADBWrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTMBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTMDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTMQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTMWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTNMBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTNMDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTNMQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTNMWZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup137], (instregex "ADDSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "ADDSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "CMPSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "CVTPS2PDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MAXSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MAXSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MINSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MINSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MULSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MULSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "SUBSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "SUBSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VADDSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VADDSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VCMPSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VCMPSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VCVTPH2PSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VCVTPS2PDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD132SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD132SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD213SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD213SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD231SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD231SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB132SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB132SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB213SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB213SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB231SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB231SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD132SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD132SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD213SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD213SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD231SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD231SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB132SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB132SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB213SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB213SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB231SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB231SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMAXSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMAXSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMINSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMINSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMULSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMULSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VSUBSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VSUBSSrm")>; + +def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup138], (instregex "VRSQRT14PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup138], (instregex "VRSQRT14PSZr(b?)(k?)(z?)")>; + +def SKXWriteResGroup139 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup139], (instregex "DPPDrri")>; +def: InstRW<[SKXWriteResGroup139], (instregex "VDPPDrri")>; + +def SKXWriteResGroup140 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup140], (instregex "VBLENDVPDYrm")>; +def: InstRW<[SKXWriteResGroup140], (instregex "VBLENDVPSYrm")>; + +def SKXWriteResGroup141 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup141], (instregex "PTESTrm")>; +def: InstRW<[SKXWriteResGroup141], (instregex "VPTESTrm")>; + +def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup142], (instregex "MULX64rm")>; + +def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup143], (instregex "PHADDSWrm128")>; +def: InstRW<[SKXWriteResGroup143], (instregex "PHSUBSWrm128")>; +def: InstRW<[SKXWriteResGroup143], (instregex "VPHADDSWrm128")>; +def: InstRW<[SKXWriteResGroup143], (instregex "VPHSUBSWrm128")>; + +def SKXWriteResGroup144 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup144], (instregex "PHADDDrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "PHADDWrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "PHSUBDrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "PHSUBWrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "VPHADDDrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "VPHADDWrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "VPHSUBDrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "VPHSUBWrm")>; + +def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup145], (instregex "SHLD(16|32|64)mri8")>; +def: InstRW<[SKXWriteResGroup145], (instregex "SHRD(16|32|64)mri8")>; + +def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup146], (instregex "LAR(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup146], (instregex "LSL(16|32|64)rm")>; + +def SKXWriteResGroup147 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup147], (instregex "AESDECLASTrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "AESDECrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "AESENCLASTrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "AESENCrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "RCPPSm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "RSQRTPSm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VAESDECLASTrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VAESDECrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VAESENCLASTrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VAESENCrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCP14PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCP14PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCP14SDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCP14SSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCPPSm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRT14PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRT14PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRT14SDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRT14SSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRTPSm")>; + +def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup148], (instregex "ADD_F32m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "ADD_F64m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "ILD_F16m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "ILD_F32m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "ILD_F64m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "SUBR_F32m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "SUBR_F64m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "SUB_F32m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "SUB_F64m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VALIGNDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VALIGNDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VALIGNQZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VALIGNQZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VCMPPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VCMPPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VCMPPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VCMPPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VDBPSADBWZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VDBPSADBWZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPBZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPBZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPDZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPDZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTQYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPQZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPQZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUBZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUBZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUDZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUDZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUQZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUQZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUWZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUWZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPWZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPWZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERM2F128rm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERM2I128rm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMDYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2D256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2Drm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2PD256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2PDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2PS256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2PSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2Q256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2Qrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDYmi")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPSYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQYmi")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2D256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2Drm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2PD256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2PDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2PS256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2PSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2Q256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2Qrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMAXSQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMAXSQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMAXUQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMAXUQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMINSQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMINSQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMINUQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMINUQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXWQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXWQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBDYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBQYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBWYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXDQYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWQYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPSADBWYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPSADBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFF32X4Z256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFF32X4Zrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFF64X2Z256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFF64X2Zrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFI32X4Z256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFI32X4Zrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFI64X2Z256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFI64X2Zrm(b?)i(k?)(z?)")>; + +def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "ADDPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "ADDSUBPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "ADDSUBPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CMPPDrmi")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CMPPSrmi")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CVTDQ2PSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CVTPS2DQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CVTSS2SDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CVTTPS2DQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MAXPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MAXPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MINPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MINPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MULPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MULPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMADDUBSWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMADDWDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULDQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULHRSWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULHUWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULHWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULLWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULUDQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "SUBPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "SUBPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSUBPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSUBPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCMPPDrmi")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCMPPSrmi")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPD2QQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPD2UQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPH2PSYrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPH2PSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2DQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2DQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2QQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2UDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2UQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTQQ2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTSS2SDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTSS2SDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPD2QQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPD2UQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2DQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2QQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2UDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2UQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUDQ2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUDQ2PSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUQQ2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMSDrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMSSrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPPDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPPSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPSDm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPSSm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETMANTPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETMANTPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETMANTSDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETMANTSSZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPLZCNTDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPLZCNTQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMADDUBSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMADDUBSWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMADDWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMADDWDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULDQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHRSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHRSWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHUWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHUWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULLWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULLWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULUDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULUDQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VRANGEPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VRANGEPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VRANGESDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VRANGESSZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VREDUCEPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VREDUCEPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VREDUCESDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VREDUCESSZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSCALEFPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSCALEFPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSCALEFSDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSCALEFSSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBSSZrm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup150 : SchedWriteRes<[SKXPort0]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup150], (instregex "PCMPISTRIrr")>; +def: InstRW<[SKXWriteResGroup150], (instregex "PCMPISTRM128rr")>; +def: InstRW<[SKXWriteResGroup150], (instregex "VPCMPISTRIrr")>; +def: InstRW<[SKXWriteResGroup150], (instregex "VPCMPISTRM128rr")>; + +def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup151], (instregex "MPSADBWrmi")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VMPSADBWrmi")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VPEXPANDDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VPEXPANDQZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup152 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup152], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[SKXWriteResGroup152], (instregex "VPTESTYrm")>; + +def SKXWriteResGroup153 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup153], (instregex "CVTSD2SSrm")>; +def: InstRW<[SKXWriteResGroup153], (instregex "VCVTSD2SSrm")>; + +def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup154], (instregex "VPHADDSWrm256")>; +def: InstRW<[SKXWriteResGroup154], (instregex "VPHSUBSWrm256")>; + +def SKXWriteResGroup155 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup155], (instregex "VPHADDDYrm")>; +def: InstRW<[SKXWriteResGroup155], (instregex "VPHADDWYrm")>; +def: InstRW<[SKXWriteResGroup155], (instregex "VPHSUBDYrm")>; +def: InstRW<[SKXWriteResGroup155], (instregex "VPHSUBWYrm")>; + +def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup156], (instregex "MULX32rm")>; + +def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 10; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,3]; +} +def: InstRW<[SKXWriteResGroup157], (instregex "ADD8mi")>; +def: InstRW<[SKXWriteResGroup157], (instregex "AND8mi")>; +def: InstRW<[SKXWriteResGroup157], (instregex "OR8mi")>; +def: InstRW<[SKXWriteResGroup157], (instregex "SUB8mi")>; +def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup157], (instregex "XCHG8rm")>; +def: InstRW<[SKXWriteResGroup157], (instregex "XOR8mi")>; + +def SKXWriteResGroup158 : SchedWriteRes<[SKXPort05,SKXPort0156]> { + let Latency = 10; + let NumMicroOps = 10; + let ResourceCycles = [9,1]; +} +def: InstRW<[SKXWriteResGroup158], (instregex "MMX_EMMS")>; + +def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0]> { + let Latency = 11; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup159], (instregex "DIVPSrr")>; +def: InstRW<[SKXWriteResGroup159], (instregex "DIVSSrr")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSYrr")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSrr")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVSSrr")>; + +def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F32m")>; +def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F64m")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRCP14PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRCP14PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRCPPSYm")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRSQRT14PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRSQRT14PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRSQRTPSYm")>; + +def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDSUBPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDSUBPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCMPPDYrmi")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCMPPSYrmi")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPD2QQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPD2QQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPD2UQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPD2UQQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPH2PSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPH2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2DQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2DQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2PDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2QQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2UDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2UDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2UQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTQQ2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTQQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2QQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2QQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2UQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2UQQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2DQYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2DQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2DQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2QQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2UDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2UDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2UQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUQQ2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUQQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPDm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPSm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETMANTPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETMANTPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETMANTPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETMANTPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPLZCNTDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPLZCNTDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPLZCNTQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPLZCNTQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDUBSWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDUBSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDUBSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDWDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULDQYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHRSWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHRSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHRSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHUWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHUWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHUWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULLWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULLWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULUDQYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULUDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULUDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VRANGEPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VRANGEPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VRANGEPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VRANGEPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VREDUCEPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VREDUCEPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VREDUCEPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VREDUCEPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSCALEFPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSCALEFPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSCALEFPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSCALEFPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup162], (instregex "FICOM16m")>; +def: InstRW<[SKXWriteResGroup162], (instregex "FICOM32m")>; +def: InstRW<[SKXWriteResGroup162], (instregex "FICOMP16m")>; +def: InstRW<[SKXWriteResGroup162], (instregex "FICOMP32m")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VEXPANDPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VEXPANDPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VEXPANDPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VEXPANDPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VMPSADBWYrmi")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VPEXPANDDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VPEXPANDDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VPEXPANDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VPEXPANDQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup163 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup163], (instregex "VCVTSD2SSZrm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup164], (instregex "CVTDQ2PDrm")>; +def: InstRW<[SKXWriteResGroup164], (instregex "VCVTDQ2PDrm")>; + +def SKXWriteResGroup165 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup165], (instregex "CVTSD2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTSD2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTSS2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTSS2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTTSD2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTTSS2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2SI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2SIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2USI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2SI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2SIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2USIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2SI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2SIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2USI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2SI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2SIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2USIZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup166], (instregex "CVTPD2DQrm")>; +def: InstRW<[SKXWriteResGroup166], (instregex "CVTPD2PSrm")>; +def: InstRW<[SKXWriteResGroup166], (instregex "CVTTPD2DQrm")>; +def: InstRW<[SKXWriteResGroup166], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[SKXWriteResGroup166], (instregex "MMX_CVTTPD2PIirm")>; + +def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup168 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 11; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[SKXWriteResGroup168], (instregex "SHLD(16|32|64)mrCL")>; +def: InstRW<[SKXWriteResGroup168], (instregex "SHRD(16|32|64)mrCL")>; + +def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 11; + let NumMicroOps = 7; + let ResourceCycles = [2,3,2]; +} +def: InstRW<[SKXWriteResGroup169], (instregex "RCL(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup169], (instregex "RCR(16|32|64)rCL")>; + +def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 11; + let NumMicroOps = 9; + let ResourceCycles = [1,5,1,2]; +} +def: InstRW<[SKXWriteResGroup170], (instregex "RCL8rCL")>; + +def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 11; + let NumMicroOps = 11; + let ResourceCycles = [2,9]; +} +def: InstRW<[SKXWriteResGroup171], (instregex "LOOPE")>; +def: InstRW<[SKXWriteResGroup171], (instregex "LOOPNE")>; + +def SKXWriteResGroup172 : SchedWriteRes<[SKXPort0]> { + let Latency = 12; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup172], (instregex "SQRTPSr")>; +def: InstRW<[SKXWriteResGroup172], (instregex "SQRTSSr")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSYr")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSr")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTSSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTSSr")>; + +def SKXWriteResGroup173 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup173], (instregex "PCLMULQDQrm")>; +def: InstRW<[SKXWriteResGroup173], (instregex "VPCLMULQDQrm")>; + +def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup175], (instregex "VPERMWZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup176], (instregex "VCVTSD2USIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup176], (instregex "VCVTSS2USI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup176], (instregex "VCVTTSD2USIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup176], (instregex "VCVTTSS2USI64Zrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup177], (instregex "VCVTPS2QQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup177], (instregex "VCVTPS2UQQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup177], (instregex "VCVTTPS2QQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup177], (instregex "VCVTTPS2UQQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup178 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup178], (instregex "HADDPDrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "HADDPSrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "HSUBPDrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "HSUBPSrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "VHADDPDrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "VHADDPSrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "VHSUBPDrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "VHSUBPSrm")>; + +def SKXWriteResGroup179 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup179], (instregex "CVTTSS2SI64rm")>; + +def SKXWriteResGroup180 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup180], (instregex "ADD_FI16m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "ADD_FI32m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "SUBR_FI16m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "SUBR_FI32m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "SUB_FI16m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "SUB_FI32m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "VPERMWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup180], (instregex "VPERMWZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup181], (instregex "VCVTDQ2PDYrm")>; + +def SKXWriteResGroup182 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup182], (instregex "DPPSrri")>; +def: InstRW<[SKXWriteResGroup182], (instregex "VDPPSYrri")>; +def: InstRW<[SKXWriteResGroup182], (instregex "VDPPSrri")>; + +def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup183], (instregex "VHADDPDYrm")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VHADDPSYrm")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VHSUBPDYrm")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VHSUBPSYrm")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2W128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VPERMT2W128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup184], (instregex "DIVPDrr")>; +def: InstRW<[SKXWriteResGroup184], (instregex "DIVSDrr")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDYrr")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDrr")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVSDrr")>; + +def SKXWriteResGroup185 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup185], (instregex "AESIMCrm")>; +def: InstRW<[SKXWriteResGroup185], (instregex "VAESIMCrm")>; + +def SKXWriteResGroup186 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup186], (instregex "PMULLDrm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDPDm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDPSm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDSDm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDSSm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VPMULLDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VPMULLDrm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALESDm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALESSm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDPDm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDPSm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDSDm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDSSm")>; + +def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI16m")>; +def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI32m")>; + +def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2UDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTQQ2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTTPD2DQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTTPD2UDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTUQQ2PSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup189 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2W256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2Wrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup189], (instregex "VPERMT2W256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup189], (instregex "VPERMT2Wrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 14; + let NumMicroOps = 10; + let ResourceCycles = [2,4,1,3]; +} +def: InstRW<[SKXWriteResGroup190], (instregex "RCR8rCL")>; + +def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> { + let Latency = 15; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_FPrST0")>; +def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_FST0r")>; +def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_FrST0")>; + +def SKXWriteResGroup192 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 15; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDYrm")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VROUNDYPDm")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VROUNDYPSm")>; + +def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 15; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SKXWriteResGroup193], (instregex "DPPDrmi")>; +def: InstRW<[SKXWriteResGroup193], (instregex "VDPPDrmi")>; + +def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { + let Latency = 15; + let NumMicroOps = 8; + let ResourceCycles = [1,2,2,1,2]; +} +def: InstRW<[SKXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 15; + let NumMicroOps = 10; + let ResourceCycles = [1,1,1,5,1,1]; +} +def: InstRW<[SKXWriteResGroup195], (instregex "RCL(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup195], (instregex "RCL8mCL")>; + +def SKXWriteResGroup196 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 16; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup196], (instregex "DIVSSrm")>; +def: InstRW<[SKXWriteResGroup196], (instregex "VDIVSSrm")>; + +def SKXWriteResGroup197 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 16; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; +} +def: InstRW<[SKXWriteResGroup197], (instregex "PCMPISTRIrm")>; +def: InstRW<[SKXWriteResGroup197], (instregex "PCMPISTRM128rm")>; +def: InstRW<[SKXWriteResGroup197], (instregex "VPCMPISTRIrm")>; +def: InstRW<[SKXWriteResGroup197], (instregex "VPCMPISTRM128rm")>; + +def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 16; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup198], (instregex "VRCP14PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup198], (instregex "VRCP14PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup198], (instregex "VRSQRT14PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup198], (instregex "VRSQRT14PSZm(b?)(k?)(z?)")>; + +def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 16; + let NumMicroOps = 14; + let ResourceCycles = [1,1,1,4,2,5]; +} +def: InstRW<[SKXWriteResGroup199], (instregex "CMPXCHG8B")>; + +def SKXWriteResGroup200 : SchedWriteRes<[SKXPort0156]> { + let Latency = 16; + let NumMicroOps = 16; + let ResourceCycles = [16]; +} +def: InstRW<[SKXWriteResGroup200], (instregex "VZEROALL")>; + +def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 17; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup201], (instregex "DIVPSrm")>; +def: InstRW<[SKXWriteResGroup201], (instregex "SQRTSSm")>; +def: InstRW<[SKXWriteResGroup201], (instregex "VDIVPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup201], (instregex "VDIVPSrm")>; +def: InstRW<[SKXWriteResGroup201], (instregex "VDIVSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup201], (instregex "VSQRTSSm")>; + +def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { + let Latency = 17; + let NumMicroOps = 15; + let ResourceCycles = [2,1,2,4,2,4]; +} +def: InstRW<[SKXWriteResGroup202], (instregex "XCH_F")>; + +def SKXWriteResGroup203 : SchedWriteRes<[SKXPort0]> { + let Latency = 18; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup203], (instregex "SQRTPDr")>; +def: InstRW<[SKXWriteResGroup203], (instregex "SQRTSDr")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDYr")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDr")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTSDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTSDr")>; + +def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 18; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup204], (instregex "SQRTPSm")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPSYrm")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTPSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTPSm")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTSSZm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 18; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup206 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [4,3,1]; +} +def: InstRW<[SKXWriteResGroup206], (instregex "PCMPESTRIrr")>; +def: InstRW<[SKXWriteResGroup206], (instregex "VPCMPESTRIrr")>; + +def SKXWriteResGroup207 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,5]; +} +def: InstRW<[SKXWriteResGroup207], (instregex "CPUID")>; +def: InstRW<[SKXWriteResGroup207], (instregex "RDTSC")>; + +def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 18; + let NumMicroOps = 11; + let ResourceCycles = [2,1,1,4,1,2]; +} +def: InstRW<[SKXWriteResGroup208], (instregex "RCR(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup208], (instregex "RCR8mCL")>; + +def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 19; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup209], (instregex "DIVSDrm")>; +def: InstRW<[SKXWriteResGroup209], (instregex "VDIVSDrm")>; +def: InstRW<[SKXWriteResGroup209], (instregex "VSQRTPSYm")>; +def: InstRW<[SKXWriteResGroup209], (instregex "VSQRTPSZ256m(b?)(k?)(z?)")>; + +def SKXWriteResGroup210 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 19; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup210], (instregex "VSQRTPSZr(b?)(k?)(z?)")>; + +def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 19; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup212 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 19; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKXWriteResGroup212], (instregex "DPPSrmi")>; +def: InstRW<[SKXWriteResGroup212], (instregex "VDPPSrmi")>; + +def SKXWriteResGroup213 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015,SKXPort0156]> { + let Latency = 19; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[SKXWriteResGroup213], (instregex "PCMPESTRM128rr")>; +def: InstRW<[SKXWriteResGroup213], (instregex "VPCMPESTRM128rr")>; + +def SKXWriteResGroup214 : SchedWriteRes<[]> { + let Latency = 20; + let NumMicroOps = 0; +} +def: InstRW<[SKXWriteResGroup214], (instregex "VGATHERDPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup214], (instregex "VGATHERQPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup214], (instregex "VPGATHERDDZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> { + let Latency = 20; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup215], (instregex "DIV_FPrST0")>; +def: InstRW<[SKXWriteResGroup215], (instregex "DIV_FST0r")>; +def: InstRW<[SKXWriteResGroup215], (instregex "DIV_FrST0")>; + +def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 20; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup216], (instregex "DIVPDrm")>; +def: InstRW<[SKXWriteResGroup216], (instregex "VDIVPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup216], (instregex "VDIVPDrm")>; +def: InstRW<[SKXWriteResGroup216], (instregex "VDIVSDZrm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup217 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 20; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKXWriteResGroup217], (instregex "VDPPSYrmi")>; + +def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 20; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup218], (instregex "VGATHERQPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup218], (instregex "VGATHERQPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup218], (instregex "VPGATHERQDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup218], (instregex "VPGATHERQDZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 20; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,1,2]; +} +def: InstRW<[SKXWriteResGroup219], (instregex "INSB")>; +def: InstRW<[SKXWriteResGroup219], (instregex "INSL")>; +def: InstRW<[SKXWriteResGroup219], (instregex "INSW")>; + +def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> { + let Latency = 20; + let NumMicroOps = 10; + let ResourceCycles = [1,2,7]; +} +def: InstRW<[SKXWriteResGroup220], (instregex "MWAITrr")>; + +def SKXWriteResGroup221 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> { + let Latency = 20; + let NumMicroOps = 11; + let ResourceCycles = [3,6,2]; +} +def: InstRW<[SKXWriteResGroup221], (instregex "AESKEYGENASSIST128rr")>; +def: InstRW<[SKXWriteResGroup221], (instregex "VAESKEYGENASSIST128rr")>; + +def SKXWriteResGroup222 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup222], (instregex "VDIVPDYrm")>; +def: InstRW<[SKXWriteResGroup222], (instregex "VDIVPDZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 22; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F32m")>; +def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F64m")>; + +def SKXWriteResGroup224 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 22; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup224], (instregex "VGATHERDPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup224], (instregex "VGATHERQPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup224], (instregex "VPGATHERDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup224], (instregex "VPGATHERQQZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup224_2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { + let Latency = 22; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPSrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPSrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDQrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQQrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDQrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQQrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPSrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPSrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPDrm")>; + +def SKXWriteResGroup224_3 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { + let Latency = 25; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERDPSYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERQPDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERQPSYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDQYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQQYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDQYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQQYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERDPSYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERQPSYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERDPDYrm")>; + +def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { + let Latency = 22; + let NumMicroOps = 14; + let ResourceCycles = [5,5,4]; +} +def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTQZ256rr(b?)(k?)(z?)")>; + +def SKXWriteResGroup226 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 23; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup226], (instregex "SQRTSDm")>; +def: InstRW<[SKXWriteResGroup226], (instregex "VSQRTSDm")>; + +def SKXWriteResGroup227 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 23; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup227], (instregex "VDIVPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup227], (instregex "VDIVPSZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 23; + let NumMicroOps = 19; + let ResourceCycles = [2,1,4,1,1,4,6]; +} +def: InstRW<[SKXWriteResGroup228], (instregex "CMPXCHG16B")>; + +def SKXWriteResGroup229 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 24; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup229], (instregex "SQRTPDm")>; +def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTPDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTPDm")>; +def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTSDZm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 24; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup230], (instregex "VDIVPSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup231 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort0156]> { + let Latency = 24; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[SKXWriteResGroup231], (instregex "PCMPESTRIrm")>; +def: InstRW<[SKXWriteResGroup231], (instregex "VPCMPESTRIrm")>; + +def SKXWriteResGroup232 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 25; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup232], (instregex "VSQRTPDYm")>; +def: InstRW<[SKXWriteResGroup232], (instregex "VSQRTPDZ256m(b?)(k?)(z?)")>; + +def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 25; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI16m")>; +def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI32m")>; + +def SKXWriteResGroup234 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 25; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup234], (instregex "VGATHERDPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup234], (instregex "VGATHERQPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup234], (instregex "VPGATHERDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup234], (instregex "VPGATHERQDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup234], (instregex "VPGATHERQQZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup235 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 25; + let NumMicroOps = 10; + let ResourceCycles = [4,3,1,1,1]; +} +def: InstRW<[SKXWriteResGroup235], (instregex "PCMPESTRM128rm")>; +def: InstRW<[SKXWriteResGroup235], (instregex "VPCMPESTRM128rm")>; + +def SKXWriteResGroup236 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 25; + let NumMicroOps = 11; + let ResourceCycles = [3,6,1,1]; +} +def: InstRW<[SKXWriteResGroup236], (instregex "AESKEYGENASSIST128rm")>; +def: InstRW<[SKXWriteResGroup236], (instregex "VAESKEYGENASSIST128rm")>; + +def SKXWriteResGroup237 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 26; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup237], (instregex "VSQRTPSZm(b?)(k?)(z?)")>; + +def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 26; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup238], (instregex "VGATHERDPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup238], (instregex "VGATHERQPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup238], (instregex "VPGATHERDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup238], (instregex "VPGATHERQQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 27; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F32m")>; +def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F64m")>; + +def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 27; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup240], (instregex "VGATHERDPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup240], (instregex "VPGATHERDDZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup241 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort0156]> { + let Latency = 28; + let NumMicroOps = 8; + let ResourceCycles = [2,4,1,1]; +} +def: InstRW<[SKXWriteResGroup241], (instregex "IDIV(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup241], (instregex "IDIV8m")>; + +def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { + let Latency = 29; + let NumMicroOps = 15; + let ResourceCycles = [5,5,1,4]; +} +def: InstRW<[SKXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 30; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI16m")>; +def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI32m")>; + +def SKXWriteResGroup244 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 30; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup244], (instregex "VDIVPDZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 30; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup245], (instregex "VGATHERDPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup245], (instregex "VPGATHERDDZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup246 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 31; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup246], (instregex "VSQRTPDZr(b?)(k?)(z?)")>; + +def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> { + let Latency = 35; + let NumMicroOps = 23; + let ResourceCycles = [1,5,3,4,10]; +} +def: InstRW<[SKXWriteResGroup247], (instregex "IN32ri")>; +def: InstRW<[SKXWriteResGroup247], (instregex "IN32rr")>; +def: InstRW<[SKXWriteResGroup247], (instregex "IN8ri")>; +def: InstRW<[SKXWriteResGroup247], (instregex "IN8rr")>; + +def SKXWriteResGroup248 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 35; + let NumMicroOps = 23; + let ResourceCycles = [1,5,2,1,4,10]; +} +def: InstRW<[SKXWriteResGroup248], (instregex "OUT32ir")>; +def: InstRW<[SKXWriteResGroup248], (instregex "OUT32rr")>; +def: InstRW<[SKXWriteResGroup248], (instregex "OUT8ir")>; +def: InstRW<[SKXWriteResGroup248], (instregex "OUT8rr")>; + +def SKXWriteResGroup249 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { + let Latency = 37; + let NumMicroOps = 21; + let ResourceCycles = [9,7,5]; +} +def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTQZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { + let Latency = 37; + let NumMicroOps = 31; + let ResourceCycles = [1,8,1,21]; +} +def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64?)")>; + +def SKXWriteResGroup251 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 38; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup251], (instregex "VSQRTPDZm(b?)(k?)(z?)")>; + +def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> { + let Latency = 40; + let NumMicroOps = 18; + let ResourceCycles = [1,1,2,3,1,1,1,8]; +} +def: InstRW<[SKXWriteResGroup252], (instregex "VMCLEARm")>; + +def SKXWriteResGroup253 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 41; + let NumMicroOps = 39; + let ResourceCycles = [1,10,1,1,26]; +} +def: InstRW<[SKXWriteResGroup253], (instregex "XSAVE64")>; + +def SKXWriteResGroup254 : SchedWriteRes<[SKXPort5,SKXPort0156]> { + let Latency = 42; + let NumMicroOps = 22; + let ResourceCycles = [2,20]; +} +def: InstRW<[SKXWriteResGroup254], (instregex "RDTSCP")>; + +def SKXWriteResGroup255 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 42; + let NumMicroOps = 40; + let ResourceCycles = [1,11,1,1,26]; +} +def: InstRW<[SKXWriteResGroup255], (instregex "XSAVE")>; + +def SKXWriteResGroup256 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { + let Latency = 44; + let NumMicroOps = 22; + let ResourceCycles = [9,7,1,5]; +} +def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup258 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05,SKXPort06,SKXPort0156]> { + let Latency = 62; + let NumMicroOps = 64; + let ResourceCycles = [2,8,5,10,39]; +} +def: InstRW<[SKXWriteResGroup258], (instregex "FLDENVm")>; +def: InstRW<[SKXWriteResGroup258], (instregex "FLDENVm")>; + +def SKXWriteResGroup259 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 63; + let NumMicroOps = 88; + let ResourceCycles = [4,4,31,1,2,1,45]; +} +def: InstRW<[SKXWriteResGroup259], (instregex "FXRSTOR64")>; + +def SKXWriteResGroup260 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 63; + let NumMicroOps = 90; + let ResourceCycles = [4,2,33,1,2,1,47]; +} +def: InstRW<[SKXWriteResGroup260], (instregex "FXRSTOR")>; + +def SKXWriteResGroup261 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { + let Latency = 67; + let NumMicroOps = 35; + let ResourceCycles = [17,11,7]; +} +def: InstRW<[SKXWriteResGroup261], (instregex "VPCONFLICTDZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup262 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { + let Latency = 74; + let NumMicroOps = 36; + let ResourceCycles = [17,11,1,7]; +} +def: InstRW<[SKXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> { + let Latency = 75; + let NumMicroOps = 15; + let ResourceCycles = [6,3,6]; +} +def: InstRW<[SKXWriteResGroup263], (instregex "FNINIT")>; + +def SKXWriteResGroup264 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { + let Latency = 76; + let NumMicroOps = 32; + let ResourceCycles = [7,2,8,3,1,11]; +} +def: InstRW<[SKXWriteResGroup264], (instregex "DIV(16|32|64)r")>; + +def SKXWriteResGroup265 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> { + let Latency = 102; + let NumMicroOps = 66; + let ResourceCycles = [4,2,4,8,14,34]; +} +def: InstRW<[SKXWriteResGroup265], (instregex "IDIV(16|32|64)r")>; + +def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 106; + let NumMicroOps = 100; + let ResourceCycles = [9,1,11,16,1,11,21,30]; +} +def: InstRW<[SKXWriteResGroup266], (instregex "FSTENVm")>; +def: InstRW<[SKXWriteResGroup266], (instregex "FSTENVm")>; + +def SKXWriteResGroup267 : SchedWriteRes<[SKXPort6,SKXPort0156]> { + let Latency = 140; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup267], (instregex "PAUSE")>; +} // SchedModel Index: llvm/trunk/lib/Target/X86/X86Schedule.td =================================================================== --- llvm/trunk/lib/Target/X86/X86Schedule.td +++ llvm/trunk/lib/Target/X86/X86Schedule.td @@ -667,4 +667,6 @@ include "X86ScheduleZnver1.td" include "X86ScheduleBtVer2.td" include "X86SchedSkylakeClient.td" +include "X86SchedSkylakeServer.td" + Index: llvm/trunk/test/CodeGen/X86/avx-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll @@ -35,9 +35,9 @@ ; ; SKX-LABEL: test_addpd: ; SKX: # BB#0: -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addpd: ; BTVER2: # BB#0: @@ -83,9 +83,9 @@ ; ; SKX-LABEL: test_addps: ; SKX: # BB#0: -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addps: ; BTVER2: # BB#0: @@ -131,9 +131,9 @@ ; ; SKX-LABEL: test_addsubpd: ; SKX: # BB#0: -; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # BB#0: @@ -180,9 +180,9 @@ ; ; SKX-LABEL: test_addsubps: ; SKX: # BB#0: -; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # BB#0: @@ -233,10 +233,10 @@ ; ; SKX-LABEL: test_andnotpd: ; SKX: # BB#0: -; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # BB#0: @@ -295,10 +295,10 @@ ; ; SKX-LABEL: test_andnotps: ; SKX: # BB#0: -; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # BB#0: @@ -357,10 +357,10 @@ ; ; SKX-LABEL: test_andpd: ; SKX: # BB#0: -; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andpd: ; BTVER2: # BB#0: @@ -417,10 +417,10 @@ ; ; SKX-LABEL: test_andps: ; SKX: # BB#0: -; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andps: ; BTVER2: # BB#0: @@ -477,10 +477,10 @@ ; ; SKX-LABEL: test_blendpd: ; SKX: # BB#0: -; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # BB#0: @@ -529,9 +529,9 @@ ; ; SKX-LABEL: test_blendps: ; SKX: # BB#0: -; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] -; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] +; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendps: ; BTVER2: # BB#0: @@ -578,8 +578,8 @@ ; SKX-LABEL: test_blendvpd: ; SKX: # BB#0: ; SKX-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: @@ -627,8 +627,8 @@ ; SKX-LABEL: test_blendvps: ; SKX: # BB#0: ; SKX-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: @@ -671,8 +671,8 @@ ; ; SKX-LABEL: test_broadcastf128: ; SKX: # BB#0: -; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_broadcastf128: ; BTVER2: # BB#0: @@ -711,8 +711,8 @@ ; ; SKX-LABEL: test_broadcastsd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_broadcastsd_ymm: ; BTVER2: # BB#0: @@ -752,8 +752,8 @@ ; ; SKX-LABEL: test_broadcastss: ; SKX: # BB#0: -; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_broadcastss: ; BTVER2: # BB#0: @@ -793,8 +793,8 @@ ; ; SKX-LABEL: test_broadcastss_ymm: ; SKX: # BB#0: -; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_broadcastss_ymm: ; BTVER2: # BB#0: @@ -842,12 +842,12 @@ ; ; SKX-LABEL: test_cmppd: ; SKX: # BB#0: -; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 -; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %k1 +; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %k1 # sched: [10:1.00] ; SKX-NEXT: vpmovm2q %k0, %ymm0 ; SKX-NEXT: vpmovm2q %k1, %ymm1 -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: @@ -903,12 +903,12 @@ ; ; SKX-LABEL: test_cmpps: ; SKX: # BB#0: -; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 -; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %k1 +; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %k1 # sched: [10:1.00] ; SKX-NEXT: vpmovm2d %k0, %ymm0 ; SKX-NEXT: vpmovm2d %k1, %ymm1 -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: @@ -965,9 +965,9 @@ ; SKX-LABEL: test_cvtdq2pd: ; SKX: # BB#0: ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] -; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [7:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: @@ -1023,9 +1023,9 @@ ; SKX-LABEL: test_cvtdq2ps: ; SKX: # BB#0: ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [4:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: @@ -1081,7 +1081,7 @@ ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # BB#0: @@ -1137,7 +1137,7 @@ ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: @@ -1191,9 +1191,9 @@ ; SKX-LABEL: test_cvtps2dq: ; SKX: # BB#0: ; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [4:0.50] -; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50] +; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # BB#0: @@ -1243,8 +1243,8 @@ ; SKX-LABEL: test_divpd: ; SKX: # BB#0: ; SKX-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:1.00] -; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [14:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_divpd: ; BTVER2: # BB#0: @@ -1291,8 +1291,8 @@ ; SKX-LABEL: test_divps: ; SKX: # BB#0: ; SKX-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:1.00] -; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [11:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_divps: ; BTVER2: # BB#0: @@ -1339,8 +1339,8 @@ ; SKX-LABEL: test_dpps: ; SKX: # BB#0: ; SKX-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33] -; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [13:1.33] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_dpps: ; BTVER2: # BB#0: @@ -1394,7 +1394,7 @@ ; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_extractf128: ; BTVER2: # BB#0: @@ -1442,8 +1442,8 @@ ; SKX-LABEL: test_haddpd: ; SKX: # BB#0: ; SKX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: @@ -1491,8 +1491,8 @@ ; SKX-LABEL: test_haddps: ; SKX: # BB#0: ; SKX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: @@ -1540,8 +1540,8 @@ ; SKX-LABEL: test_hsubpd: ; SKX: # BB#0: ; SKX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: @@ -1589,8 +1589,8 @@ ; SKX-LABEL: test_hsubps: ; SKX: # BB#0: ; SKX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: @@ -1642,9 +1642,9 @@ ; SKX-LABEL: test_insertf128: ; SKX: # BB#0: ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_insertf128: ; BTVER2: # BB#0: @@ -1691,8 +1691,8 @@ ; ; SKX-LABEL: test_lddqu: ; SKX: # BB#0: -; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # BB#0: @@ -1739,10 +1739,10 @@ ; ; SKX-LABEL: test_maskmovpd: ; SKX: # BB#0: -; SKX-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [1:0.50] -; SKX-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] +; SKX-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maskmovpd: ; BTVER2: # BB#0: @@ -1795,10 +1795,10 @@ ; ; SKX-LABEL: test_maskmovpd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; SKX-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] +; SKX-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maskmovpd_ymm: ; BTVER2: # BB#0: @@ -1851,10 +1851,10 @@ ; ; SKX-LABEL: test_maskmovps: ; SKX: # BB#0: -; SKX-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [1:0.50] -; SKX-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50] +; SKX-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maskmovps: ; BTVER2: # BB#0: @@ -1907,10 +1907,10 @@ ; ; SKX-LABEL: test_maskmovps_ymm: ; SKX: # BB#0: -; SKX-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; SKX-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50] +; SKX-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maskmovps_ymm: ; BTVER2: # BB#0: @@ -1961,7 +1961,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: @@ -2010,7 +2010,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: @@ -2059,7 +2059,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: @@ -2108,7 +2108,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: @@ -2159,10 +2159,10 @@ ; ; SKX-LABEL: test_movapd: ; SKX: # BB#0: -; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movapd: ; BTVER2: # BB#0: @@ -2214,10 +2214,10 @@ ; ; SKX-LABEL: test_movaps: ; SKX: # BB#0: -; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movaps: ; BTVER2: # BB#0: @@ -2270,9 +2270,9 @@ ; SKX-LABEL: test_movddup: ; SKX: # BB#0: ; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [1:0.50] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # BB#0: @@ -2323,7 +2323,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: @@ -2369,7 +2369,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: @@ -2413,9 +2413,9 @@ ; ; SKX-LABEL: test_movntpd: ; SKX: # BB#0: -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: @@ -2460,9 +2460,9 @@ ; ; SKX-LABEL: test_movntps: ; SKX: # BB#0: -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: @@ -2512,9 +2512,9 @@ ; SKX-LABEL: test_movshdup: ; SKX: # BB#0: ; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [1:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # BB#0: @@ -2568,9 +2568,9 @@ ; SKX-LABEL: test_movsldup: ; SKX: # BB#0: ; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [1:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # BB#0: @@ -2625,10 +2625,10 @@ ; ; SKX-LABEL: test_movupd: ; SKX: # BB#0: -; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movupd: ; BTVER2: # BB#0: @@ -2682,10 +2682,10 @@ ; ; SKX-LABEL: test_movups: ; SKX: # BB#0: -; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movups: ; BTVER2: # BB#0: @@ -2733,9 +2733,9 @@ ; ; SKX-LABEL: test_mulpd: ; SKX: # BB#0: -; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # BB#0: @@ -2781,9 +2781,9 @@ ; ; SKX-LABEL: test_mulps: ; SKX: # BB#0: -; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mulps: ; BTVER2: # BB#0: @@ -2833,10 +2833,10 @@ ; ; SKX-LABEL: orpd: ; SKX: # BB#0: -; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: orpd: ; BTVER2: # BB#0: @@ -2893,10 +2893,10 @@ ; ; SKX-LABEL: test_orps: ; SKX: # BB#0: -; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_orps: ; BTVER2: # BB#0: @@ -2954,9 +2954,9 @@ ; SKX-LABEL: test_perm2f128: ; SKX: # BB#0: ; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_perm2f128: ; BTVER2: # BB#0: @@ -3010,9 +3010,9 @@ ; SKX-LABEL: test_permilpd: ; SKX: # BB#0: ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilpd: ; BTVER2: # BB#0: @@ -3066,9 +3066,9 @@ ; SKX-LABEL: test_permilpd_ymm: ; SKX: # BB#0: ; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] -; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [1:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilpd_ymm: ; BTVER2: # BB#0: @@ -3122,9 +3122,9 @@ ; SKX-LABEL: test_permilps: ; SKX: # BB#0: ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilps: ; BTVER2: # BB#0: @@ -3178,9 +3178,9 @@ ; SKX-LABEL: test_permilps_ymm: ; SKX: # BB#0: ; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilps_ymm: ; BTVER2: # BB#0: @@ -3230,8 +3230,8 @@ ; SKX-LABEL: test_permilvarpd: ; SKX: # BB#0: ; SKX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilvarpd: ; BTVER2: # BB#0: @@ -3279,8 +3279,8 @@ ; SKX-LABEL: test_permilvarpd_ymm: ; SKX: # BB#0: ; SKX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilvarpd_ymm: ; BTVER2: # BB#0: @@ -3328,8 +3328,8 @@ ; SKX-LABEL: test_permilvarps: ; SKX: # BB#0: ; SKX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilvarps: ; BTVER2: # BB#0: @@ -3377,8 +3377,8 @@ ; SKX-LABEL: test_permilvarps_ymm: ; SKX: # BB#0: ; SKX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilvarps_ymm: ; BTVER2: # BB#0: @@ -3429,10 +3429,10 @@ ; ; SKX-LABEL: test_rcpps: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm0 -; SKX-NEXT: vrcp14ps (%rdi), %ymm1 -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vrcp14ps (%rdi), %ymm1 # sched: [11:1.00] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # BB#0: @@ -3487,9 +3487,9 @@ ; SKX-LABEL: test_roundpd: ; SKX: # BB#0: ; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:0.67] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:0.67] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: @@ -3544,9 +3544,9 @@ ; SKX-LABEL: test_roundps: ; SKX: # BB#0: ; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:0.67] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:0.67] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: @@ -3600,10 +3600,10 @@ ; ; SKX-LABEL: test_rsqrtps: ; SKX: # BB#0: -; SKX-NEXT: vrsqrt14ps %ymm0, %ymm0 -; SKX-NEXT: vrsqrt14ps (%rdi), %ymm1 -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrsqrt14ps %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vrsqrt14ps (%rdi), %ymm1 # sched: [11:1.00] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # BB#0: @@ -3658,9 +3658,9 @@ ; SKX-LABEL: test_shufpd: ; SKX: # BB#0: ; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # BB#0: @@ -3710,8 +3710,8 @@ ; SKX-LABEL: test_shufps: ; SKX: # BB#0: ; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_shufps: ; BTVER2: # BB#0: @@ -3762,9 +3762,9 @@ ; SKX-LABEL: test_sqrtpd: ; SKX: # BB#0: ; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:1.00] -; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [18:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # BB#0: @@ -3819,9 +3819,9 @@ ; SKX-LABEL: test_sqrtps: ; SKX: # BB#0: ; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:1.00] -; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [12:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:1.00] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # BB#0: @@ -3871,9 +3871,9 @@ ; ; SKX-LABEL: test_subpd: ; SKX: # BB#0: -; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_subpd: ; BTVER2: # BB#0: @@ -3919,9 +3919,9 @@ ; ; SKX-LABEL: test_subps: ; SKX: # BB#0: -; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_subps: ; BTVER2: # BB#0: @@ -3981,10 +3981,10 @@ ; SKX: # BB#0: ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] ; SKX-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setb %al # sched: [1:1.00] -; SKX-NEXT: vtestpd (%rdi), %xmm0 # sched: [2:1.00] -; SKX-NEXT: adcl $0, %eax # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: setb %al # sched: [1:0.50] +; SKX-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] +; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_testpd: ; BTVER2: # BB#0: @@ -4056,11 +4056,11 @@ ; SKX: # BB#0: ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] ; SKX-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00] -; SKX-NEXT: setb %al # sched: [1:1.00] -; SKX-NEXT: vtestpd (%rdi), %ymm0 # sched: [2:1.00] -; SKX-NEXT: adcl $0, %eax # sched: [1:1.00] +; SKX-NEXT: setb %al # sched: [1:0.50] +; SKX-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00] +; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_testpd_ymm: ; BTVER2: # BB#0: @@ -4129,10 +4129,10 @@ ; SKX: # BB#0: ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] ; SKX-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setb %al # sched: [1:1.00] -; SKX-NEXT: vtestps (%rdi), %xmm0 # sched: [2:1.00] -; SKX-NEXT: adcl $0, %eax # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: setb %al # sched: [1:0.50] +; SKX-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] +; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_testps: ; BTVER2: # BB#0: @@ -4204,11 +4204,11 @@ ; SKX: # BB#0: ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] ; SKX-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00] -; SKX-NEXT: setb %al # sched: [1:1.00] -; SKX-NEXT: vtestps (%rdi), %ymm0 # sched: [2:1.00] -; SKX-NEXT: adcl $0, %eax # sched: [1:1.00] +; SKX-NEXT: setb %al # sched: [1:0.50] +; SKX-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00] +; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_testps_ymm: ; BTVER2: # BB#0: @@ -4268,9 +4268,9 @@ ; SKX-LABEL: test_unpckhpd: ; SKX: # BB#0: ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [1:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # BB#0: @@ -4320,8 +4320,8 @@ ; SKX-LABEL: test_unpckhps: ; SKX: # BB#0: ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # BB#0: @@ -4372,9 +4372,9 @@ ; SKX-LABEL: test_unpcklpd: ; SKX: # BB#0: ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # BB#0: @@ -4424,8 +4424,8 @@ ; SKX-LABEL: test_unpcklps: ; SKX: # BB#0: ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # BB#0: @@ -4475,10 +4475,10 @@ ; ; SKX-LABEL: test_xorpd: ; SKX: # BB#0: -; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # BB#0: @@ -4535,10 +4535,10 @@ ; ; SKX-LABEL: test_xorps: ; SKX: # BB#0: -; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_xorps: ; BTVER2: # BB#0: @@ -4588,7 +4588,7 @@ ; SKX-LABEL: test_zeroall: ; SKX: # BB#0: ; SKX-NEXT: vzeroall # sched: [16:4.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_zeroall: ; BTVER2: # BB#0: @@ -4628,7 +4628,7 @@ ; SKX-LABEL: test_zeroupper: ; SKX: # BB#0: ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_zeroupper: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll @@ -26,9 +26,9 @@ ; ; SKX-LABEL: test_broadcasti128: ; SKX: # BB#0: -; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [1:0.50] -; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] +; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcasti128: ; ZNVER1: # BB#0: @@ -63,8 +63,8 @@ ; SKX-LABEL: test_broadcastsd_ymm: ; SKX: # BB#0: ; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcastsd_ymm: ; ZNVER1: # BB#0: @@ -98,8 +98,8 @@ ; SKX-LABEL: test_broadcastss: ; SKX: # BB#0: ; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcastss: ; ZNVER1: # BB#0: @@ -133,8 +133,8 @@ ; SKX-LABEL: test_broadcastss_ymm: ; SKX: # BB#0: ; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcastss_ymm: ; ZNVER1: # BB#0: @@ -176,12 +176,12 @@ ; ; SKX-LABEL: test_extracti128: ; SKX: # BB#0: -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] -; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33] +; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_extracti128: ; ZNVER1: # BB#0: @@ -217,8 +217,8 @@ ; ; SKX-LABEL: test_gatherdpd: ; SKX: # BB#0: -; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_gatherdpd: ; ZNVER1: # BB#0: @@ -247,8 +247,8 @@ ; ; SKX-LABEL: test_gatherdpd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [20:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_gatherdpd_ymm: ; ZNVER1: # BB#0: @@ -277,8 +277,8 @@ ; ; SKX-LABEL: test_gatherdps: ; SKX: # BB#0: -; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_gatherdps: ; ZNVER1: # BB#0: @@ -307,8 +307,8 @@ ; ; SKX-LABEL: test_gatherdps_ymm: ; SKX: # BB#0: -; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [20:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_gatherdps_ymm: ; ZNVER1: # BB#0: @@ -337,8 +337,8 @@ ; ; SKX-LABEL: test_gatherqpd: ; SKX: # BB#0: -; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_gatherqpd: ; ZNVER1: # BB#0: @@ -367,8 +367,8 @@ ; ; SKX-LABEL: test_gatherqpd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [20:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_gatherqpd_ymm: ; ZNVER1: # BB#0: @@ -397,8 +397,8 @@ ; ; SKX-LABEL: test_gatherqps: ; SKX: # BB#0: -; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_gatherqps: ; ZNVER1: # BB#0: @@ -430,9 +430,9 @@ ; ; SKX-LABEL: test_gatherqps_ymm: ; SKX: # BB#0: -; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [20:1.00] +; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_gatherqps_ymm: ; ZNVER1: # BB#0: @@ -469,9 +469,9 @@ ; SKX-LABEL: test_inserti128: ; SKX: # BB#0: ; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_inserti128: ; ZNVER1: # BB#0: @@ -506,8 +506,8 @@ ; ; SKX-LABEL: test_movntdqa: ; SKX: # BB#0: -; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_movntdqa: ; ZNVER1: # BB#0: @@ -540,8 +540,8 @@ ; SKX-LABEL: test_mpsadbw: ; SKX: # BB#0: ; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] -; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [4:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_mpsadbw: ; ZNVER1: # BB#0: @@ -580,10 +580,10 @@ ; ; SKX-LABEL: test_pabsb: ; SKX: # BB#0: -; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [1:0.50] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pabsb: ; ZNVER1: # BB#0: @@ -623,10 +623,10 @@ ; ; SKX-LABEL: test_pabsd: ; SKX: # BB#0: -; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pabsd: ; ZNVER1: # BB#0: @@ -666,10 +666,10 @@ ; ; SKX-LABEL: test_pabsw: ; SKX: # BB#0: -; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pabsw: ; ZNVER1: # BB#0: @@ -707,8 +707,8 @@ ; SKX-LABEL: test_packssdw: ; SKX: # BB#0: ; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_packssdw: ; ZNVER1: # BB#0: @@ -745,8 +745,8 @@ ; SKX-LABEL: test_packsswb: ; SKX: # BB#0: ; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_packsswb: ; ZNVER1: # BB#0: @@ -783,8 +783,8 @@ ; SKX-LABEL: test_packusdw: ; SKX: # BB#0: ; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_packusdw: ; ZNVER1: # BB#0: @@ -821,8 +821,8 @@ ; SKX-LABEL: test_packuswb: ; SKX: # BB#0: ; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_packuswb: ; ZNVER1: # BB#0: @@ -858,9 +858,9 @@ ; ; SKX-LABEL: test_paddb: ; SKX: # BB#0: -; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_paddb: ; ZNVER1: # BB#0: @@ -894,9 +894,9 @@ ; ; SKX-LABEL: test_paddd: ; SKX: # BB#0: -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_paddd: ; ZNVER1: # BB#0: @@ -930,9 +930,9 @@ ; ; SKX-LABEL: test_paddq: ; SKX: # BB#0: -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_paddq: ; ZNVER1: # BB#0: @@ -966,9 +966,9 @@ ; ; SKX-LABEL: test_paddsb: ; SKX: # BB#0: -; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_paddsb: ; ZNVER1: # BB#0: @@ -1003,9 +1003,9 @@ ; ; SKX-LABEL: test_paddsw: ; SKX: # BB#0: -; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_paddsw: ; ZNVER1: # BB#0: @@ -1040,9 +1040,9 @@ ; ; SKX-LABEL: test_paddusb: ; SKX: # BB#0: -; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_paddusb: ; ZNVER1: # BB#0: @@ -1077,9 +1077,9 @@ ; ; SKX-LABEL: test_paddusw: ; SKX: # BB#0: -; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_paddusw: ; ZNVER1: # BB#0: @@ -1114,9 +1114,9 @@ ; ; SKX-LABEL: test_paddw: ; SKX: # BB#0: -; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_paddw: ; ZNVER1: # BB#0: @@ -1151,8 +1151,8 @@ ; SKX-LABEL: test_palignr: ; SKX: # BB#0: ; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; SKX-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpalignr {{.*#+}} ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_palignr: ; ZNVER1: # BB#0: @@ -1189,10 +1189,10 @@ ; ; SKX-LABEL: test_pand: ; SKX: # BB#0: -; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pand: ; ZNVER1: # BB#0: @@ -1231,10 +1231,10 @@ ; ; SKX-LABEL: test_pandn: ; SKX: # BB#0: -; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pandn: ; ZNVER1: # BB#0: @@ -1272,9 +1272,9 @@ ; ; SKX-LABEL: test_pavgb: ; SKX: # BB#0: -; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pavgb: ; ZNVER1: # BB#0: @@ -1318,9 +1318,9 @@ ; ; SKX-LABEL: test_pavgw: ; SKX: # BB#0: -; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pavgw: ; ZNVER1: # BB#0: @@ -1367,10 +1367,10 @@ ; ; SKX-LABEL: test_pblendd: ; SKX: # BB#0: -; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] -; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [1:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] +; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pblendd: ; ZNVER1: # BB#0: @@ -1409,10 +1409,10 @@ ; ; SKX-LABEL: test_pblendd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] -; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [1:0.50] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] +; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pblendd_ymm: ; ZNVER1: # BB#0: @@ -1449,8 +1449,8 @@ ; SKX-LABEL: test_pblendvb: ; SKX: # BB#0: ; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] -; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [2:0.67] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pblendvb: ; ZNVER1: # BB#0: @@ -1486,8 +1486,8 @@ ; SKX-LABEL: test_pblendw: ; SKX: # BB#0: ; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] -; SKX-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1],mem[2],ymm0[3],mem[4],ymm0[5],mem[6],ymm0[7],mem[8],ymm0[9],mem[10],ymm0[11],mem[12],ymm0[13],mem[14],ymm0[15] sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pblendw: ; ZNVER1: # BB#0: @@ -1525,9 +1525,9 @@ ; SKX-LABEL: test_pbroadcastb: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [1:1.00] -; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00] +; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pbroadcastb: ; ZNVER1: # BB#0: @@ -1567,9 +1567,9 @@ ; SKX-LABEL: test_pbroadcastb_ymm: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [1:1.00] -; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00] +; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pbroadcastb_ymm: ; ZNVER1: # BB#0: @@ -1609,8 +1609,8 @@ ; SKX-LABEL: test_pbroadcastd: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddd (%rdi){1to4}, %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pbroadcastd: ; ZNVER1: # BB#0: @@ -1650,8 +1650,8 @@ ; SKX-LABEL: test_pbroadcastd_ymm: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddd (%rdi){1to8}, %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pbroadcastd_ymm: ; ZNVER1: # BB#0: @@ -1691,8 +1691,8 @@ ; SKX-LABEL: test_pbroadcastq: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pbroadcastq: ; ZNVER1: # BB#0: @@ -1732,8 +1732,8 @@ ; SKX-LABEL: test_pbroadcastq_ymm: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pbroadcastq_ymm: ; ZNVER1: # BB#0: @@ -1773,9 +1773,9 @@ ; SKX-LABEL: test_pbroadcastw: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [1:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pbroadcastw: ; ZNVER1: # BB#0: @@ -1815,9 +1815,9 @@ ; SKX-LABEL: test_pbroadcastw_ymm: ; SKX: # BB#0: ; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [1:1.00] -; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00] +; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pbroadcastw_ymm: ; ZNVER1: # BB#0: @@ -1853,11 +1853,11 @@ ; ; SKX-LABEL: test_pcmpeqb: ; SKX: # BB#0: -; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2b %k0, %ymm0 -; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 +; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 # sched: [10:1.00] ; SKX-NEXT: vpmovm2b %k0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpeqb: ; ZNVER1: # BB#0: @@ -1893,11 +1893,11 @@ ; ; SKX-LABEL: test_pcmpeqd: ; SKX: # BB#0: -; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2d %k0, %ymm0 -; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 +; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 # sched: [10:1.00] ; SKX-NEXT: vpmovm2d %k0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpeqd: ; ZNVER1: # BB#0: @@ -1933,11 +1933,11 @@ ; ; SKX-LABEL: test_pcmpeqq: ; SKX: # BB#0: -; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2q %k0, %ymm0 -; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 +; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 # sched: [10:1.00] ; SKX-NEXT: vpmovm2q %k0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpeqq: ; ZNVER1: # BB#0: @@ -1973,11 +1973,11 @@ ; ; SKX-LABEL: test_pcmpeqw: ; SKX: # BB#0: -; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2w %k0, %ymm0 -; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 +; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 # sched: [10:1.00] ; SKX-NEXT: vpmovm2w %k0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpeqw: ; ZNVER1: # BB#0: @@ -2013,11 +2013,11 @@ ; ; SKX-LABEL: test_pcmpgtb: ; SKX: # BB#0: -; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2b %k0, %ymm0 -; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 +; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 # sched: [10:1.00] ; SKX-NEXT: vpmovm2b %k0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpgtb: ; ZNVER1: # BB#0: @@ -2053,11 +2053,11 @@ ; ; SKX-LABEL: test_pcmpgtd: ; SKX: # BB#0: -; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2d %k0, %ymm0 -; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 +; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 # sched: [10:1.00] ; SKX-NEXT: vpmovm2d %k0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpgtd: ; ZNVER1: # BB#0: @@ -2093,11 +2093,11 @@ ; ; SKX-LABEL: test_pcmpgtq: ; SKX: # BB#0: -; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2q %k0, %ymm0 -; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 +; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 # sched: [10:1.00] ; SKX-NEXT: vpmovm2q %k0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpgtq: ; ZNVER1: # BB#0: @@ -2133,11 +2133,11 @@ ; ; SKX-LABEL: test_pcmpgtw: ; SKX: # BB#0: -; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 +; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2w %k0, %ymm0 -; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 +; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 # sched: [10:1.00] ; SKX-NEXT: vpmovm2w %k0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pcmpgtw: ; ZNVER1: # BB#0: @@ -2177,9 +2177,9 @@ ; SKX-LABEL: test_perm2i128: ; SKX: # BB#0: ; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] -; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] +; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_perm2i128: ; ZNVER1: # BB#0: @@ -2219,9 +2219,9 @@ ; SKX-LABEL: test_permd: ; SKX: # BB#0: ; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_permd: ; ZNVER1: # BB#0: @@ -2262,9 +2262,9 @@ ; SKX-LABEL: test_permpd: ; SKX: # BB#0: ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_permpd: ; ZNVER1: # BB#0: @@ -2304,9 +2304,9 @@ ; SKX-LABEL: test_permps: ; SKX: # BB#0: ; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_permps: ; ZNVER1: # BB#0: @@ -2347,9 +2347,9 @@ ; SKX-LABEL: test_permq: ; SKX: # BB#0: ; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] -; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [3:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_permq: ; ZNVER1: # BB#0: @@ -2382,8 +2382,8 @@ ; ; SKX-LABEL: test_pgatherdd: ; SKX: # BB#0: -; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pgatherdd: ; ZNVER1: # BB#0: @@ -2412,8 +2412,8 @@ ; ; SKX-LABEL: test_pgatherdd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pgatherdd_ymm: ; ZNVER1: # BB#0: @@ -2442,8 +2442,8 @@ ; ; SKX-LABEL: test_pgatherdq: ; SKX: # BB#0: -; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pgatherdq: ; ZNVER1: # BB#0: @@ -2472,8 +2472,8 @@ ; ; SKX-LABEL: test_pgatherdq_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [20:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pgatherdq_ymm: ; ZNVER1: # BB#0: @@ -2502,8 +2502,8 @@ ; ; SKX-LABEL: test_pgatherqd: ; SKX: # BB#0: -; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pgatherqd: ; ZNVER1: # BB#0: @@ -2535,9 +2535,9 @@ ; ; SKX-LABEL: test_pgatherqd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [20:1.00] +; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pgatherqd_ymm: ; ZNVER1: # BB#0: @@ -2567,8 +2567,8 @@ ; ; SKX-LABEL: test_pgatherqq: ; SKX: # BB#0: -; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [17:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pgatherqq: ; ZNVER1: # BB#0: @@ -2597,8 +2597,8 @@ ; ; SKX-LABEL: test_pgatherqq_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [20:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pgatherqq_ymm: ; ZNVER1: # BB#0: @@ -2631,8 +2631,8 @@ ; SKX-LABEL: test_phaddd: ; SKX: # BB#0: ; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_phaddd: ; ZNVER1: # BB#0: @@ -2668,8 +2668,8 @@ ; SKX-LABEL: test_phaddsw: ; SKX: # BB#0: ; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_phaddsw: ; ZNVER1: # BB#0: @@ -2705,8 +2705,8 @@ ; SKX-LABEL: test_phaddw: ; SKX: # BB#0: ; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_phaddw: ; ZNVER1: # BB#0: @@ -2742,8 +2742,8 @@ ; SKX-LABEL: test_phsubd: ; SKX: # BB#0: ; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_phsubd: ; ZNVER1: # BB#0: @@ -2779,8 +2779,8 @@ ; SKX-LABEL: test_phsubsw: ; SKX: # BB#0: ; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_phsubsw: ; ZNVER1: # BB#0: @@ -2816,8 +2816,8 @@ ; SKX-LABEL: test_phsubw: ; SKX: # BB#0: ; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_phsubw: ; ZNVER1: # BB#0: @@ -2853,8 +2853,8 @@ ; SKX-LABEL: test_pmaddubsw: ; SKX: # BB#0: ; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaddubsw: ; ZNVER1: # BB#0: @@ -2891,8 +2891,8 @@ ; SKX-LABEL: test_pmaddwd: ; SKX: # BB#0: ; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaddwd: ; ZNVER1: # BB#0: @@ -2931,10 +2931,10 @@ ; ; SKX-LABEL: test_pmaskmovd: ; SKX: # BB#0: -; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [1:0.50] -; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] +; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaskmovd: ; ZNVER1: # BB#0: @@ -2973,10 +2973,10 @@ ; ; SKX-LABEL: test_pmaskmovd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] +; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaskmovd_ymm: ; ZNVER1: # BB#0: @@ -3015,10 +3015,10 @@ ; ; SKX-LABEL: test_pmaskmovq: ; SKX: # BB#0: -; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [1:0.50] -; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50] +; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaskmovq: ; ZNVER1: # BB#0: @@ -3057,10 +3057,10 @@ ; ; SKX-LABEL: test_pmaskmovq_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50] +; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaskmovq_ymm: ; ZNVER1: # BB#0: @@ -3096,9 +3096,9 @@ ; ; SKX-LABEL: test_pmaxsb: ; SKX: # BB#0: -; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaxsb: ; ZNVER1: # BB#0: @@ -3133,9 +3133,9 @@ ; ; SKX-LABEL: test_pmaxsd: ; SKX: # BB#0: -; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaxsd: ; ZNVER1: # BB#0: @@ -3170,9 +3170,9 @@ ; ; SKX-LABEL: test_pmaxsw: ; SKX: # BB#0: -; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaxsw: ; ZNVER1: # BB#0: @@ -3207,9 +3207,9 @@ ; ; SKX-LABEL: test_pmaxub: ; SKX: # BB#0: -; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaxub: ; ZNVER1: # BB#0: @@ -3244,9 +3244,9 @@ ; ; SKX-LABEL: test_pmaxud: ; SKX: # BB#0: -; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaxud: ; ZNVER1: # BB#0: @@ -3281,9 +3281,9 @@ ; ; SKX-LABEL: test_pmaxuw: ; SKX: # BB#0: -; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmaxuw: ; ZNVER1: # BB#0: @@ -3318,9 +3318,9 @@ ; ; SKX-LABEL: test_pminsb: ; SKX: # BB#0: -; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pminsb: ; ZNVER1: # BB#0: @@ -3355,9 +3355,9 @@ ; ; SKX-LABEL: test_pminsd: ; SKX: # BB#0: -; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pminsd: ; ZNVER1: # BB#0: @@ -3392,9 +3392,9 @@ ; ; SKX-LABEL: test_pminsw: ; SKX: # BB#0: -; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pminsw: ; ZNVER1: # BB#0: @@ -3429,9 +3429,9 @@ ; ; SKX-LABEL: test_pminub: ; SKX: # BB#0: -; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pminub: ; ZNVER1: # BB#0: @@ -3466,9 +3466,9 @@ ; ; SKX-LABEL: test_pminud: ; SKX: # BB#0: -; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pminud: ; ZNVER1: # BB#0: @@ -3503,9 +3503,9 @@ ; ; SKX-LABEL: test_pminuw: ; SKX: # BB#0: -; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pminuw: ; ZNVER1: # BB#0: @@ -3542,7 +3542,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovmskb: ; ZNVER1: # BB#0: @@ -3579,9 +3579,9 @@ ; SKX-LABEL: test_pmovsxbd: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovsxbd: ; ZNVER1: # BB#0: @@ -3623,9 +3623,9 @@ ; SKX-LABEL: test_pmovsxbq: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovsxbq: ; ZNVER1: # BB#0: @@ -3667,9 +3667,9 @@ ; SKX-LABEL: test_pmovsxbw: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] +; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovsxbw: ; ZNVER1: # BB#0: @@ -3709,9 +3709,9 @@ ; SKX-LABEL: test_pmovsxdq: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovsxdq: ; ZNVER1: # BB#0: @@ -3751,9 +3751,9 @@ ; SKX-LABEL: test_pmovsxwd: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovsxwd: ; ZNVER1: # BB#0: @@ -3793,9 +3793,9 @@ ; SKX-LABEL: test_pmovsxwq: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [3:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovsxwq: ; ZNVER1: # BB#0: @@ -3837,9 +3837,9 @@ ; SKX-LABEL: test_pmovzxbd: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovzxbd: ; ZNVER1: # BB#0: @@ -3881,9 +3881,9 @@ ; SKX-LABEL: test_pmovzxbq: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovzxbq: ; ZNVER1: # BB#0: @@ -3925,9 +3925,9 @@ ; SKX-LABEL: test_pmovzxbw: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [3:1.00] -; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] +; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovzxbw: ; ZNVER1: # BB#0: @@ -3967,9 +3967,9 @@ ; SKX-LABEL: test_pmovzxdq: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [3:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovzxdq: ; ZNVER1: # BB#0: @@ -4009,9 +4009,9 @@ ; SKX-LABEL: test_pmovzxwd: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [3:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovzxwd: ; ZNVER1: # BB#0: @@ -4051,9 +4051,9 @@ ; SKX-LABEL: test_pmovzxwq: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmovzxwq: ; ZNVER1: # BB#0: @@ -4092,8 +4092,8 @@ ; SKX-LABEL: test_pmuldq: ; SKX: # BB#0: ; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmuldq: ; ZNVER1: # BB#0: @@ -4130,8 +4130,8 @@ ; SKX-LABEL: test_pmulhrsw: ; SKX: # BB#0: ; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmulhrsw: ; ZNVER1: # BB#0: @@ -4167,8 +4167,8 @@ ; SKX-LABEL: test_pmulhuw: ; SKX: # BB#0: ; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmulhuw: ; ZNVER1: # BB#0: @@ -4204,8 +4204,8 @@ ; SKX-LABEL: test_pmulhw: ; SKX: # BB#0: ; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmulhw: ; ZNVER1: # BB#0: @@ -4241,8 +4241,8 @@ ; SKX-LABEL: test_pmulld: ; SKX: # BB#0: ; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [15:0.67] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmulld: ; ZNVER1: # BB#0: @@ -4277,8 +4277,8 @@ ; SKX-LABEL: test_pmullw: ; SKX: # BB#0: ; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmullw: ; ZNVER1: # BB#0: @@ -4313,8 +4313,8 @@ ; SKX-LABEL: test_pmuludq: ; SKX: # BB#0: ; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmuludq: ; ZNVER1: # BB#0: @@ -4353,10 +4353,10 @@ ; ; SKX-LABEL: test_por: ; SKX: # BB#0: -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_por: ; ZNVER1: # BB#0: @@ -4393,8 +4393,8 @@ ; SKX-LABEL: test_psadbw: ; SKX: # BB#0: ; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psadbw: ; ZNVER1: # BB#0: @@ -4431,8 +4431,8 @@ ; SKX-LABEL: test_pshufb: ; SKX: # BB#0: ; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pshufb: ; ZNVER1: # BB#0: @@ -4471,9 +4471,9 @@ ; SKX-LABEL: test_pshufd: ; SKX: # BB#0: ; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] -; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [1:1.00] -; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] +; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pshufd: ; ZNVER1: # BB#0: @@ -4513,9 +4513,9 @@ ; SKX-LABEL: test_pshufhw: ; SKX: # BB#0: ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [1:1.00] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pshufhw: ; ZNVER1: # BB#0: @@ -4555,9 +4555,9 @@ ; SKX-LABEL: test_pshuflw: ; SKX: # BB#0: ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [1:1.00] -; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] +; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pshuflw: ; ZNVER1: # BB#0: @@ -4593,9 +4593,9 @@ ; ; SKX-LABEL: test_psignb: ; SKX: # BB#0: -; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psignb: ; ZNVER1: # BB#0: @@ -4630,9 +4630,9 @@ ; ; SKX-LABEL: test_psignd: ; SKX: # BB#0: -; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psignd: ; ZNVER1: # BB#0: @@ -4667,9 +4667,9 @@ ; ; SKX-LABEL: test_psignw: ; SKX: # BB#0: -; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psignw: ; ZNVER1: # BB#0: @@ -4708,9 +4708,9 @@ ; SKX-LABEL: test_pslld: ; SKX: # BB#0: ; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pslld: ; ZNVER1: # BB#0: @@ -4745,7 +4745,7 @@ ; SKX-LABEL: test_pslldq: ; SKX: # BB#0: ; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pslldq: ; ZNVER1: # BB#0: @@ -4780,9 +4780,9 @@ ; SKX-LABEL: test_psllq: ; SKX: # BB#0: ; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psllq: ; ZNVER1: # BB#0: @@ -4819,9 +4819,9 @@ ; ; SKX-LABEL: test_psllvd: ; SKX: # BB#0: -; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psllvd: ; ZNVER1: # BB#0: @@ -4856,9 +4856,9 @@ ; ; SKX-LABEL: test_psllvd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psllvd_ymm: ; ZNVER1: # BB#0: @@ -4893,9 +4893,9 @@ ; ; SKX-LABEL: test_psllvq: ; SKX: # BB#0: -; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psllvq: ; ZNVER1: # BB#0: @@ -4930,9 +4930,9 @@ ; ; SKX-LABEL: test_psllvq_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psllvq_ymm: ; ZNVER1: # BB#0: @@ -4971,9 +4971,9 @@ ; SKX-LABEL: test_psllw: ; SKX: # BB#0: ; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psllw: ; ZNVER1: # BB#0: @@ -5014,9 +5014,9 @@ ; SKX-LABEL: test_psrad: ; SKX: # BB#0: ; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psrad: ; ZNVER1: # BB#0: @@ -5053,9 +5053,9 @@ ; ; SKX-LABEL: test_psravd: ; SKX: # BB#0: -; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psravd: ; ZNVER1: # BB#0: @@ -5090,9 +5090,9 @@ ; ; SKX-LABEL: test_psravd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psravd_ymm: ; ZNVER1: # BB#0: @@ -5131,9 +5131,9 @@ ; SKX-LABEL: test_psraw: ; SKX: # BB#0: ; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psraw: ; ZNVER1: # BB#0: @@ -5174,9 +5174,9 @@ ; SKX-LABEL: test_psrld: ; SKX: # BB#0: ; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psrld: ; ZNVER1: # BB#0: @@ -5211,7 +5211,7 @@ ; SKX-LABEL: test_psrldq: ; SKX: # BB#0: ; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psrldq: ; ZNVER1: # BB#0: @@ -5246,9 +5246,9 @@ ; SKX-LABEL: test_psrlq: ; SKX: # BB#0: ; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psrlq: ; ZNVER1: # BB#0: @@ -5285,9 +5285,9 @@ ; ; SKX-LABEL: test_psrlvd: ; SKX: # BB#0: -; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psrlvd: ; ZNVER1: # BB#0: @@ -5322,9 +5322,9 @@ ; ; SKX-LABEL: test_psrlvd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psrlvd_ymm: ; ZNVER1: # BB#0: @@ -5359,9 +5359,9 @@ ; ; SKX-LABEL: test_psrlvq: ; SKX: # BB#0: -; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psrlvq: ; ZNVER1: # BB#0: @@ -5396,9 +5396,9 @@ ; ; SKX-LABEL: test_psrlvq_ymm: ; SKX: # BB#0: -; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psrlvq_ymm: ; ZNVER1: # BB#0: @@ -5437,9 +5437,9 @@ ; SKX-LABEL: test_psrlw: ; SKX: # BB#0: ; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] -; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psrlw: ; ZNVER1: # BB#0: @@ -5476,9 +5476,9 @@ ; ; SKX-LABEL: test_psubb: ; SKX: # BB#0: -; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psubb: ; ZNVER1: # BB#0: @@ -5512,9 +5512,9 @@ ; ; SKX-LABEL: test_psubd: ; SKX: # BB#0: -; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psubd: ; ZNVER1: # BB#0: @@ -5548,9 +5548,9 @@ ; ; SKX-LABEL: test_psubq: ; SKX: # BB#0: -; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psubq: ; ZNVER1: # BB#0: @@ -5584,9 +5584,9 @@ ; ; SKX-LABEL: test_psubsb: ; SKX: # BB#0: -; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psubsb: ; ZNVER1: # BB#0: @@ -5621,9 +5621,9 @@ ; ; SKX-LABEL: test_psubsw: ; SKX: # BB#0: -; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psubsw: ; ZNVER1: # BB#0: @@ -5658,9 +5658,9 @@ ; ; SKX-LABEL: test_psubusb: ; SKX: # BB#0: -; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psubusb: ; ZNVER1: # BB#0: @@ -5695,9 +5695,9 @@ ; ; SKX-LABEL: test_psubusw: ; SKX: # BB#0: -; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] +; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psubusw: ; ZNVER1: # BB#0: @@ -5732,9 +5732,9 @@ ; ; SKX-LABEL: test_psubw: ; SKX: # BB#0: -; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_psubw: ; ZNVER1: # BB#0: @@ -5769,8 +5769,8 @@ ; SKX-LABEL: test_punpckhbw: ; SKX: # BB#0: ; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] -; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_punpckhbw: ; ZNVER1: # BB#0: @@ -5811,10 +5811,10 @@ ; SKX-LABEL: test_punpckhdq: ; SKX: # BB#0: ; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] -; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] -; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] +; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_punpckhdq: ; ZNVER1: # BB#0: @@ -5855,9 +5855,9 @@ ; SKX-LABEL: test_punpckhqdq: ; SKX: # BB#0: ; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00] -; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] +; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_punpckhqdq: ; ZNVER1: # BB#0: @@ -5894,8 +5894,8 @@ ; SKX-LABEL: test_punpckhwd: ; SKX: # BB#0: ; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] -; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_punpckhwd: ; ZNVER1: # BB#0: @@ -5930,8 +5930,8 @@ ; SKX-LABEL: test_punpcklbw: ; SKX: # BB#0: ; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] -; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_punpcklbw: ; ZNVER1: # BB#0: @@ -5972,10 +5972,10 @@ ; SKX-LABEL: test_punpckldq: ; SKX: # BB#0: ; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] -; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:1.00] -; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] +; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_punpckldq: ; ZNVER1: # BB#0: @@ -6016,9 +6016,9 @@ ; SKX-LABEL: test_punpcklqdq: ; SKX: # BB#0: ; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00] -; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] +; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_punpcklqdq: ; ZNVER1: # BB#0: @@ -6055,8 +6055,8 @@ ; SKX-LABEL: test_punpcklwd: ; SKX: # BB#0: ; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] -; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_punpcklwd: ; ZNVER1: # BB#0: @@ -6093,10 +6093,10 @@ ; ; SKX-LABEL: test_pxor: ; SKX: # BB#0: -; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] +; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pxor: ; ZNVER1: # BB#0: Index: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll @@ -5,8 +5,8 @@ define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { ; CHECK-LABEL: addpd512: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %add.i = fadd <8 x double> %x, %y ret <8 x double> %add.i @@ -15,8 +15,8 @@ define <8 x double> @addpd512fold(<8 x double> %y) { ; CHECK-LABEL: addpd512fold: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %add.i = fadd <8 x double> %y, ret <8 x double> %add.i @@ -25,8 +25,8 @@ define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { ; CHECK-LABEL: addps512: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %add.i = fadd <16 x float> %x, %y ret <16 x float> %add.i @@ -35,8 +35,8 @@ define <16 x float> @addps512fold(<16 x float> %y) { ; CHECK-LABEL: addps512fold: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %add.i = fadd <16 x float> %y, ret <16 x float> %add.i @@ -45,8 +45,8 @@ define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { ; CHECK-LABEL: subpd512: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %sub.i = fsub <8 x double> %x, %y ret <8 x double> %sub.i @@ -55,8 +55,8 @@ define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) { ; CHECK-LABEL: subpd512fold: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %tmp2 = load <8 x double>, <8 x double>* %x, align 8 %sub.i = fsub <8 x double> %y, %tmp2 @@ -66,8 +66,8 @@ define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { ; CHECK-LABEL: subps512: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %sub.i = fsub <16 x float> %x, %y ret <16 x float> %sub.i @@ -76,8 +76,8 @@ define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) { ; CHECK-LABEL: subps512fold: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %tmp2 = load <16 x float>, <16 x float>* %x, align 4 %sub.i = fsub <16 x float> %y, %tmp2 @@ -87,8 +87,8 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { ; CHECK-LABEL: imulq512: ; CHECK: # BB#0: -; CHECK-NEXT: vpmullq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: imulq512: ; SKX: # BB#0: ; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 @@ -100,8 +100,8 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { ; CHECK-LABEL: imulq256: ; CHECK: # BB#0: -; CHECK-NEXT: vpmullq %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: imulq256: ; SKX: # BB#0: ; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 @@ -113,8 +113,8 @@ define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { ; CHECK-LABEL: imulq128: ; CHECK: # BB#0: -; CHECK-NEXT: vpmullq %xmm0, %xmm1, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: imulq128: ; SKX: # BB#0: ; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 @@ -126,8 +126,8 @@ define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { ; CHECK-LABEL: mulpd512: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %mul.i = fmul <8 x double> %x, %y ret <8 x double> %mul.i @@ -136,8 +136,8 @@ define <8 x double> @mulpd512fold(<8 x double> %y) { ; CHECK-LABEL: mulpd512fold: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %mul.i = fmul <8 x double> %y, ret <8 x double> %mul.i @@ -146,8 +146,8 @@ define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { ; CHECK-LABEL: mulps512: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %mul.i = fmul <16 x float> %x, %y ret <16 x float> %mul.i @@ -156,8 +156,8 @@ define <16 x float> @mulps512fold(<16 x float> %y) { ; CHECK-LABEL: mulps512fold: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %mul.i = fmul <16 x float> %y, ret <16 x float> %mul.i @@ -166,8 +166,8 @@ define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { ; CHECK-LABEL: divpd512: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %div.i = fdiv <8 x double> %x, %y ret <8 x double> %div.i @@ -176,8 +176,8 @@ define <8 x double> @divpd512fold(<8 x double> %y) { ; CHECK-LABEL: divpd512fold: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %div.i = fdiv <8 x double> %y, ret <8 x double> %div.i @@ -186,8 +186,8 @@ define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { ; CHECK-LABEL: divps512: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [23:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %div.i = fdiv <16 x float> %x, %y ret <16 x float> %div.i @@ -196,8 +196,8 @@ define <16 x float> @divps512fold(<16 x float> %y) { ; CHECK-LABEL: divps512fold: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [24:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %div.i = fdiv <16 x float> %y, ret <16 x float> %div.i @@ -206,8 +206,8 @@ define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { ; CHECK-LABEL: vpaddq_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = add <8 x i64> %i, %j ret <8 x i64> %x } @@ -215,8 +215,8 @@ define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind { ; CHECK-LABEL: vpaddq_fold_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %tmp = load <8 x i64>, <8 x i64>* %j, align 4 %x = add <8 x i64> %i, %tmp ret <8 x i64> %x @@ -225,8 +225,8 @@ define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { ; CHECK-LABEL: vpaddq_broadcast_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %x = add <8 x i64> %i, ret <8 x i64> %x } @@ -234,8 +234,8 @@ define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind { ; CHECK-LABEL: vpaddq_broadcast2_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %tmp = load i64, i64* %j %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 @@ -252,8 +252,8 @@ define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { ; CHECK-LABEL: vpaddd_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = add <16 x i32> %i, %j ret <16 x i32> %x } @@ -261,8 +261,8 @@ define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind { ; CHECK-LABEL: vpaddd_fold_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %tmp = load <16 x i32>, <16 x i32>* %j, align 4 %x = add <16 x i32> %i, %tmp ret <16 x i32> %x @@ -271,8 +271,8 @@ define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { ; CHECK-LABEL: vpaddd_broadcast_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %x = add <16 x i32> %i, ret <16 x i32> %x } @@ -280,10 +280,10 @@ define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { ; CHECK-LABEL: vpaddd_mask_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 -; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %j %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i @@ -293,10 +293,10 @@ define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { ; CHECK-LABEL: vpaddd_maskz_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 -; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %j %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer @@ -306,10 +306,10 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { ; CHECK-LABEL: vpaddd_mask_fold_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %j = load <16 x i32>, <16 x i32>* %j.ptr %x = add <16 x i32> %i, %j @@ -320,10 +320,10 @@ define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { ; CHECK-LABEL: vpaddd_mask_broadcast_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i @@ -333,10 +333,10 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone { ; CHECK-LABEL: vpaddd_maskz_fold_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %j = load <16 x i32>, <16 x i32>* %j.ptr %x = add <16 x i32> %i, %j @@ -347,10 +347,10 @@ define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { ; CHECK-LABEL: vpaddd_maskz_broadcast_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = add <16 x i32> %i, %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer @@ -360,8 +360,8 @@ define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { ; CHECK-LABEL: vpsubq_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = sub <8 x i64> %i, %j ret <8 x i64> %x } @@ -369,8 +369,8 @@ define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { ; CHECK-LABEL: vpsubd_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = sub <16 x i32> %i, %j ret <16 x i32> %x } @@ -378,8 +378,8 @@ define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { ; CHECK-LABEL: vpmulld_test: ; CHECK: # BB#0: -; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [8:0.67] +; CHECK-NEXT: retq # sched: [7:1.00] %x = mul <16 x i32> %i, %j ret <16 x i32> %x } @@ -389,7 +389,7 @@ ; CHECK-LABEL: sqrtA: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %conv1 = tail call float @sqrtf(float %a) nounwind readnone ret float %conv1 @@ -400,7 +400,7 @@ ; CHECK-LABEL: sqrtB: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %call = tail call double @sqrt(double %a) nounwind readnone ret double %call @@ -411,7 +411,7 @@ ; CHECK-LABEL: sqrtC: ; CHECK: # BB#0: ; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = call float @llvm.sqrt.f32(float %a) ret float %b } @@ -420,8 +420,8 @@ define <16 x float> @sqrtD(<16 x float> %a) nounwind { ; CHECK-LABEL: sqrtD: ; CHECK: # BB#0: -; CHECK-NEXT: vsqrtps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vsqrtps %zmm0, %zmm0 # sched: [19:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) ret <16 x float> %b } @@ -430,8 +430,8 @@ define <8 x double> @sqrtE(<8 x double> %a) nounwind { ; CHECK-LABEL: sqrtE: ; CHECK: # BB#0: -; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [31:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) ret <8 x double> %b } @@ -439,8 +439,8 @@ define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { ; CHECK-LABEL: fadd_broadcast: ; CHECK: # BB#0: -; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fadd <16 x float> %a, ret <16 x float> %b } @@ -448,8 +448,8 @@ define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { ; CHECK-LABEL: addq_broadcast: ; CHECK: # BB#0: -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %b = add <8 x i64> %a, ret <8 x i64> %b } @@ -457,8 +457,8 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { ; CHECK-LABEL: orq_broadcast: ; CHECK: # BB#0: -; CHECK-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: orq_broadcast: ; SKX: # BB#0: ; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 @@ -470,8 +470,8 @@ define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; CHECK-LABEL: andd512fold: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vandps (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: andd512fold: ; SKX: # BB#0: # %entry ; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 @@ -485,8 +485,8 @@ define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) { ; CHECK-LABEL: andqbrst: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: andqbrst: ; SKX: # BB#0: # %entry ; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 @@ -502,10 +502,10 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, ; CHECK-LABEL: test_mask_vaddps: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 -; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00] +; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] <16 x float> %j, <16 x i32> %mask1) nounwind readnone { %mask = icmp ne <16 x i32> %mask1, zeroinitializer @@ -517,10 +517,10 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, ; CHECK-LABEL: test_mask_vmulps: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 -; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] <16 x float> %j, <16 x i32> %mask1) nounwind readnone { %mask = icmp ne <16 x i32> %mask1, zeroinitializer @@ -532,10 +532,10 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, ; CHECK-LABEL: test_mask_vminps: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 -; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00] +; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] <16 x float> %j, <16 x i32> %mask1) nounwind readnone { %mask = icmp ne <16 x i32> %mask1, zeroinitializer @@ -548,10 +548,10 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_vminpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 -; CHECK-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 # sched: [3:1.00] +; CHECK-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mask_vminpd: ; SKX: # BB#0: ; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4 @@ -570,10 +570,10 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, ; CHECK-LABEL: test_mask_vmaxps: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 -; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] <16 x float> %j, <16 x i32> %mask1) nounwind readnone { %mask = icmp ne <16 x i32> %mask1, zeroinitializer @@ -586,10 +586,10 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_vmaxpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 -; CHECK-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %ymm4, %ymm3, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mask_vmaxpd: ; SKX: # BB#0: ; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4 @@ -608,10 +608,10 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, ; CHECK-LABEL: test_mask_vsubps: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 -; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00] +; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] <16 x float> %j, <16 x i32> %mask1) nounwind readnone { %mask = icmp ne <16 x i32> %mask1, zeroinitializer @@ -623,10 +623,10 @@ define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, ; CHECK-LABEL: test_mask_vdivps: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 -; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm4, %zmm3, %k1 # sched: [3:1.00] +; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [23:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] <16 x float> %j, <16 x i32> %mask1) nounwind readnone { %mask = icmp ne <16 x i32> %mask1, zeroinitializer @@ -638,10 +638,10 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1 -; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm4, %zmm3, %k1 # sched: [3:1.00] +; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] <8 x double> %j, <8 x i64> %mask1) nounwind readnone { %mask = icmp ne <8 x i64> %mask1, zeroinitializer @@ -653,10 +653,10 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, ; CHECK-LABEL: test_maskz_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 -; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] <8 x i64> %mask1) nounwind readnone { %mask = icmp ne <8 x i64> %mask1, zeroinitializer %x = fadd <8 x double> %i, %j @@ -667,10 +667,10 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_fold_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 -; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm3, %zmm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] <8 x double>* %j, <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer @@ -683,10 +683,10 @@ define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, ; CHECK-LABEL: test_maskz_fold_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 -; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load <8 x double>, <8 x double>* %j, align 8 @@ -698,8 +698,8 @@ define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind { ; CHECK-LABEL: test_broadcast_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %tmp = load double, double* %j %b = insertelement <8 x double> undef, double %tmp, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, @@ -711,11 +711,11 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, ; CHECK-LABEL: test_mask_broadcast_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 -; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm0, %zmm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1} # sched: [11:0.50] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] double* %j, <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -730,10 +730,10 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, ; CHECK-LABEL: test_maskz_broadcast_vaddpd: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 -; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] <8 x i64> %mask1) nounwind { %mask = icmp ne <8 x i64> %mask1, zeroinitializer %tmp = load double, double* %j @@ -748,8 +748,8 @@ define <16 x float> @test_fxor(<16 x float> %a) { ; CHECK-LABEL: test_fxor: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_fxor: ; SKX: # BB#0: ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 @@ -762,8 +762,8 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { ; CHECK-LABEL: test_fxor_8f32: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_fxor_8f32: ; SKX: # BB#0: ; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 @@ -775,8 +775,8 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) ; CHECK-LABEL: fabs_v8f64: ; CHECK: # BB#0: -; CHECK-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: fabs_v8f64: ; SKX: # BB#0: ; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 @@ -790,8 +790,8 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) ; CHECK-LABEL: fabs_v16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: fabs_v16f32: ; SKX: # BB#0: ; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 @@ -806,14 +806,14 @@ ; CHECK-LABEL: test1: ; CHECK: # BB#0: ; CHECK-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; CHECK-NEXT: jne .LBB64_1 # sched: [1:1.00] -; CHECK-NEXT: jnp .LBB64_2 # sched: [1:1.00] +; CHECK-NEXT: jne .LBB64_1 # sched: [1:0.50] +; CHECK-NEXT: jnp .LBB64_2 # sched: [1:0.50] ; CHECK-NEXT: .LBB64_1: # %l1 -; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; CHECK-NEXT: .LBB64_2: # %l2 -; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %tobool = fcmp une double %a, %b br i1 %tobool, label %l1, label %l2 @@ -829,13 +829,13 @@ ; CHECK-LABEL: test2: ; CHECK: # BB#0: ; CHECK-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] -; CHECK-NEXT: jbe .LBB65_2 # sched: [1:1.00] +; CHECK-NEXT: jbe .LBB65_2 # sched: [1:0.50] ; CHECK-NEXT: # BB#1: # %l1 -; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; CHECK-NEXT: .LBB65_2: # %l2 -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %tobool = fcmp olt float %a, %b br i1 %tobool, label %l1, label %l2 @@ -851,9 +851,9 @@ ; CHECK-LABEL: test3: ; CHECK: # BB#0: ; CHECK-NEXT: vcmpeqss %xmm1, %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: movzbl %al, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test3: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %k0 @@ -869,17 +869,17 @@ define float @test5(float %p) #0 { ; CHECK-LABEL: test5: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; CHECK-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] -; CHECK-NEXT: jne .LBB67_1 # sched: [1:1.00] -; CHECK-NEXT: jp .LBB67_1 # sched: [1:1.00] +; CHECK-NEXT: jne .LBB67_1 # sched: [1:0.50] +; CHECK-NEXT: jp .LBB67_1 # sched: [1:0.50] ; CHECK-NEXT: # BB#2: # %return -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; CHECK-NEXT: .LBB67_1: # %if.end ; CHECK-NEXT: seta %al # sched: [2:1.00] ; CHECK-NEXT: movzbl %al, %eax # sched: [1:0.25] -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %cmp = fcmp oeq float %p, 0.000000e+00 br i1 %cmp, label %return, label %if.end @@ -899,8 +899,8 @@ ; CHECK: # BB#0: ; CHECK-NEXT: xorl %eax, %eax # sched: [1:0.25] ; CHECK-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; CHECK-NEXT: sete %al # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: sete %al # sched: [1:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %cmp = icmp eq i32 %a, %b %res = zext i1 %cmp to i32 ret i32 %res @@ -911,8 +911,8 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: xorl %eax, %eax # sched: [1:0.25] ; CHECK-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; CHECK-NEXT: setne %al # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: setne %al # sched: [1:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %0 = fcmp one double %x, %y %or = zext i1 %0 to i32 @@ -927,10 +927,10 @@ ; CHECK-NEXT: # sched: [1:0.25] ; CHECK-NEXT: testl %edx, %edx # sched: [1:0.25] ; CHECK-NEXT: movl $1, %eax # sched: [1:0.25] -; CHECK-NEXT: cmovel %eax, %edx # sched: [1:1.00] +; CHECK-NEXT: cmovel %eax, %edx # sched: [1:0.50] ; CHECK-NEXT: orl %edi, %esi # sched: [1:0.25] -; CHECK-NEXT: cmovnel %edx, %eax # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: cmovnel %edx, %eax # sched: [1:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %tmp1 = icmp eq i32 %a1, -1 %tmp2 = icmp eq i32 %a2, -2147483648 %tmp3 = and i1 %tmp1, %tmp2 @@ -944,13 +944,13 @@ ; CHECK-LABEL: test9: ; CHECK: # BB#0: ; CHECK-NEXT: testb $1, %dil # sched: [1:0.25] -; CHECK-NEXT: jne .LBB71_2 # sched: [1:1.00] +; CHECK-NEXT: jne .LBB71_2 # sched: [1:0.50] ; CHECK-NEXT: # BB#1: # %A ; CHECK-NEXT: movl $6, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; CHECK-NEXT: .LBB71_2: # %B ; CHECK-NEXT: movl $7, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = and i64 %a, 1 %cmp10.i = icmp eq i64 %b, 0 br i1 %cmp10.i, label %A, label %B @@ -966,17 +966,17 @@ ; CHECK-NEXT: movl %edx, %eax # sched: [1:0.25] ; CHECK-NEXT: andb $1, %al # sched: [1:0.25] ; CHECK-NEXT: cmpq %rsi, %rdi # sched: [1:0.25] -; CHECK-NEXT: sete %cl # sched: [1:1.00] +; CHECK-NEXT: sete %cl # sched: [1:0.50] ; CHECK-NEXT: orb %dl, %cl # sched: [1:0.25] ; CHECK-NEXT: andb $1, %cl # sched: [1:0.25] ; CHECK-NEXT: cmpb %cl, %al # sched: [1:0.25] -; CHECK-NEXT: je .LBB72_1 # sched: [1:1.00] +; CHECK-NEXT: je .LBB72_1 # sched: [1:0.50] ; CHECK-NEXT: # BB#2: # %if.end.i ; CHECK-NEXT: movl $6, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; CHECK-NEXT: .LBB72_1: # %if.then.i ; CHECK-NEXT: movl $5, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %cmp8.i = icmp eq i64 %b, %c %or1 = or i1 %d, %cmp8.i @@ -993,8 +993,8 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind { ; CHECK-LABEL: sitof32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i32> %a to <16 x float> ret <16 x float> %b } @@ -1002,8 +1002,8 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; CHECK-LABEL: sltof864: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <8 x i64> %a to <8 x double> ret <8 x double> %b } @@ -1011,8 +1011,8 @@ define <4 x double> @slto4f64(<4 x i64> %a) { ; CHECK-LABEL: slto4f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: slto4f64: ; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0 @@ -1024,8 +1024,8 @@ define <2 x double> @slto2f64(<2 x i64> %a) { ; CHECK-LABEL: slto2f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: slto2f64: ; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0 @@ -1037,8 +1037,8 @@ define <2 x float> @sltof2f32(<2 x i64> %a) { ; CHECK-LABEL: sltof2f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: sltof2f32: ; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 @@ -1051,7 +1051,7 @@ ; CHECK-LABEL: slto4f32_mem: ; CHECK: # BB#0: ; CHECK-NEXT: vcvtqq2psy (%rdi), %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: slto4f32_mem: ; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0 @@ -1064,8 +1064,8 @@ define <4 x i64> @f64to4sl(<4 x double> %a) { ; CHECK-LABEL: f64to4sl: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: f64to4sl: ; VLDQ: # BB#0: ; VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0 @@ -1077,8 +1077,8 @@ define <4 x i64> @f32to4sl(<4 x float> %a) { ; CHECK-LABEL: f32to4sl: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: f32to4sl: ; VLDQ: # BB#0: ; VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 @@ -1090,9 +1090,9 @@ define <4 x float> @slto4f32(<4 x i64> %a) { ; CHECK-LABEL: slto4f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: slto4f32: ; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 @@ -1105,9 +1105,9 @@ define <4 x float> @ulto4f32(<4 x i64> %a) { ; CHECK-LABEL: ulto4f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: ulto4f32: ; VLDQ: # BB#0: ; VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 @@ -1120,8 +1120,8 @@ define <8 x double> @ulto8f64(<8 x i64> %a) { ; CHECK-LABEL: ulto8f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i64> %a to <8 x double> ret <8 x double> %b } @@ -1129,9 +1129,9 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; CHECK-LABEL: ulto16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 -; CHECK-NEXT: vcvtuqq2pd %zmm1, %zmm1 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i64> %a to <16 x double> ret <16 x double> %b } @@ -1139,8 +1139,8 @@ define <16 x i32> @f64to16si(<16 x float> %a) nounwind { ; CHECK-LABEL: f64to16si: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptosi <16 x float> %a to <16 x i32> ret <16 x i32> %b } @@ -1148,8 +1148,8 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { ; CHECK-LABEL: f32to16ui: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptoui <16 x float> %a to <16 x i32> ret <16 x i32> %b } @@ -1157,10 +1157,10 @@ define <16 x i8> @f32to16uc(<16 x float> %f) { ; CHECK-LABEL: f32to16uc: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 -; CHECK-NEXT: vpmovdb %zmm0, %xmm0 +; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = fptoui <16 x float> %f to <16 x i8> ret <16 x i8> %res } @@ -1168,9 +1168,9 @@ define <16 x i16> @f32to16us(<16 x float> %f) { ; CHECK-LABEL: f32to16us: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 -; CHECK-NEXT: vpmovdw %zmm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = fptoui <16 x float> %f to <16 x i16> ret <16 x i16> %res } @@ -1178,8 +1178,8 @@ define <8 x i32> @f32to8ui(<8 x float> %a) nounwind { ; CHECK-LABEL: f32to8ui: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptoui <8 x float> %a to <8 x i32> ret <8 x i32> %b } @@ -1187,8 +1187,8 @@ define <4 x i32> @f32to4ui(<4 x float> %a) nounwind { ; CHECK-LABEL: f32to4ui: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptoui <4 x float> %a to <4 x i32> ret <4 x i32> %b } @@ -1196,8 +1196,8 @@ define <8 x i32> @f64to8ui(<8 x double> %a) nounwind { ; CHECK-LABEL: f64to8ui: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptoui <8 x double> %a to <8 x i32> ret <8 x i32> %b } @@ -1205,10 +1205,10 @@ define <8 x i16> @f64to8us(<8 x double> %f) { ; CHECK-LABEL: f64to8us: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 -; CHECK-NEXT: vpmovdw %ymm0, %xmm0 +; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = fptoui <8 x double> %f to <8 x i16> ret <8 x i16> %res } @@ -1216,10 +1216,10 @@ define <8 x i8> @f64to8uc(<8 x double> %f) { ; CHECK-LABEL: f64to8uc: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 -; CHECK-NEXT: vpmovdw %ymm0, %xmm0 +; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = fptoui <8 x double> %f to <8 x i8> ret <8 x i8> %res } @@ -1227,9 +1227,9 @@ define <4 x i32> @f64to4ui(<4 x double> %a) nounwind { ; CHECK-LABEL: f64to4ui: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 +; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptoui <4 x double> %a to <4 x i32> ret <4 x i32> %b } @@ -1237,17 +1237,17 @@ define <8 x double> @sito8f64(<8 x i32> %a) { ; CHECK-LABEL: sito8f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <8 x i32> %a to <8 x double> ret <8 x double> %b } define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { ; CHECK-LABEL: i32to8f64_mask: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLBW-LABEL: i32to8f64_mask: ; VLBW: # BB#0: ; VLBW-NEXT: kmovd %edi, %k1 @@ -1266,9 +1266,9 @@ define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { ; CHECK-LABEL: sito8f64_maskz: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLBW-LABEL: sito8f64_maskz: ; VLBW: # BB#0: ; VLBW-NEXT: kmovd %edi, %k1 @@ -1288,8 +1288,8 @@ define <8 x i32> @f64to8si(<8 x double> %a) { ; CHECK-LABEL: f64to8si: ; CHECK: # BB#0: -; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptosi <8 x double> %a to <8 x i32> ret <8 x i32> %b } @@ -1299,7 +1299,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptosi <4 x double> %a to <4 x i32> ret <4 x i32> %b } @@ -1307,10 +1307,10 @@ define <16 x float> @f64to16f32(<16 x double> %b) nounwind { ; CHECK-LABEL: f64to16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm0 -; CHECK-NEXT: vcvtpd2ps %zmm1, %ymm1 -; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00] +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = fptrunc <16 x double> %b to <16 x float> ret <16 x float> %a } @@ -1320,7 +1320,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = fptrunc <4 x double> %b to <4 x float> ret <4 x float> %a } @@ -1328,11 +1328,11 @@ define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { ; CHECK-LABEL: f64to4f32_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 -; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = fptrunc <4 x double> %b to <4 x float> %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer ret <4 x float> %c @@ -1342,7 +1342,7 @@ ; CHECK-LABEL: f64tof32_inreg: ; CHECK: # BB#0: ; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %ext = extractelement <2 x double> %a0, i32 0 %cvt = fptrunc double %ext to float %res = insertelement <4 x float> %a1, float %cvt, i32 0 @@ -1352,8 +1352,8 @@ define <8 x double> @f32to8f64(<8 x float> %b) nounwind { ; CHECK-LABEL: f32to8f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtps2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = fpext <8 x float> %b to <8 x double> ret <8 x double> %a } @@ -1361,9 +1361,9 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { ; CHECK-LABEL: f32to4f64_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vcmpltpd %ymm2, %ymm1, %k1 -; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = fpext <4 x float> %b to <4 x double> %mask = fcmp ogt <4 x double> %a1, %b1 %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer @@ -1374,7 +1374,7 @@ ; CHECK-LABEL: f32tof64_inreg: ; CHECK: # BB#0: ; CHECK-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %ext = extractelement <4 x float> %a1, i32 0 %cvt = fpext float %ext to double %res = insertelement <2 x double> %a0, double %cvt, i32 0 @@ -1385,7 +1385,7 @@ ; CHECK-LABEL: sltof64_load: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %tmp1 = load i64, i64* %e, align 8 %conv = sitofp i64 %tmp1 to double @@ -1396,7 +1396,7 @@ ; CHECK-LABEL: sitof64_load: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %tmp1 = load i32, i32* %e, align 4 %conv = sitofp i32 %tmp1 to double @@ -1407,7 +1407,7 @@ ; CHECK-LABEL: sitof32_load: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %tmp1 = load i32, i32* %e, align 4 %conv = sitofp i32 %tmp1 to float @@ -1418,7 +1418,7 @@ ; CHECK-LABEL: sltof32_load: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %tmp1 = load i64, i64* %e, align 8 %conv = sitofp i64 %tmp1 to float @@ -1428,10 +1428,10 @@ define void @f32tof64_loadstore() { ; CHECK-LABEL: f32tof64_loadstore: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] ; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %f = alloca float, align 4 %d = alloca double, align 8 @@ -1444,10 +1444,10 @@ define void @f64tof32_loadstore() nounwind uwtable { ; CHECK-LABEL: f64tof32_loadstore: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50] +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] ; CHECK-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; CHECK-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %f = alloca float, align 4 %d = alloca double, align 8 @@ -1461,7 +1461,7 @@ ; CHECK-LABEL: long_to_double: ; CHECK: # BB#0: ; CHECK-NEXT: vmovq %rdi, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = bitcast i64 %x to double ret double %res } @@ -1470,7 +1470,7 @@ ; CHECK-LABEL: double_to_long: ; CHECK: # BB#0: ; CHECK-NEXT: vmovq %xmm0, %rax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = bitcast double %x to i64 ret i64 %res } @@ -1479,7 +1479,7 @@ ; CHECK-LABEL: int_to_float: ; CHECK: # BB#0: ; CHECK-NEXT: vmovd %edi, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = bitcast i32 %x to float ret float %res } @@ -1488,7 +1488,7 @@ ; CHECK-LABEL: float_to_int: ; CHECK: # BB#0: ; CHECK-NEXT: vmovd %xmm0, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = bitcast float %x to i32 ret i32 %res } @@ -1496,11 +1496,11 @@ define <16 x double> @uito16f64(<16 x i32> %a) nounwind { ; CHECK-LABEL: uito16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm2 -; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [7:1.00] +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i32> %a to <16 x double> ret <16 x double> %b } @@ -1508,8 +1508,8 @@ define <8 x float> @slto8f32(<8 x i64> %a) { ; CHECK-LABEL: slto8f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <8 x i64> %a to <8 x float> ret <8 x float> %b } @@ -1517,10 +1517,10 @@ define <16 x float> @slto16f32(<16 x i64> %a) { ; CHECK-LABEL: slto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 -; CHECK-NEXT: vcvtqq2ps %zmm1, %ymm1 -; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00] +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i64> %a to <16 x float> ret <16 x float> %b } @@ -1528,8 +1528,8 @@ define <8 x double> @slto8f64(<8 x i64> %a) { ; CHECK-LABEL: slto8f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <8 x i64> %a to <8 x double> ret <8 x double> %b } @@ -1537,9 +1537,9 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; CHECK-LABEL: slto16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 -; CHECK-NEXT: vcvtqq2pd %zmm1, %zmm1 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i64> %a to <16 x double> ret <16 x double> %b } @@ -1547,8 +1547,8 @@ define <8 x float> @ulto8f32(<8 x i64> %a) { ; CHECK-LABEL: ulto8f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i64> %a to <8 x float> ret <8 x float> %b } @@ -1556,10 +1556,10 @@ define <16 x float> @ulto16f32(<16 x i64> %a) { ; CHECK-LABEL: ulto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 -; CHECK-NEXT: vcvtuqq2ps %zmm1, %ymm1 -; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00] +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i64> %a to <16 x float> ret <16 x float> %b } @@ -1567,9 +1567,9 @@ define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind { ; CHECK-LABEL: uito8f64_mask: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLBW-LABEL: uito8f64_mask: ; VLBW: # BB#0: ; VLBW-NEXT: kmovd %edi, %k1 @@ -1588,9 +1588,9 @@ define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { ; CHECK-LABEL: uito8f64_maskz: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLBW-LABEL: uito8f64_maskz: ; VLBW: # BB#0: ; VLBW-NEXT: kmovd %edi, %k1 @@ -1610,8 +1610,8 @@ define <4 x double> @uito4f64(<4 x i32> %a) nounwind { ; CHECK-LABEL: uito4f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <4 x i32> %a to <4 x double> ret <4 x double> %b } @@ -1619,8 +1619,8 @@ define <16 x float> @uito16f32(<16 x i32> %a) nounwind { ; CHECK-LABEL: uito16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i32> %a to <16 x float> ret <16 x float> %b } @@ -1628,8 +1628,8 @@ define <8 x double> @uito8f64(<8 x i32> %a) { ; CHECK-LABEL: uito8f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i32> %a to <8 x double> ret <8 x double> %b } @@ -1637,8 +1637,8 @@ define <8 x float> @uito8f32(<8 x i32> %a) nounwind { ; CHECK-LABEL: uito8f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i32> %a to <8 x float> ret <8 x float> %b } @@ -1646,8 +1646,8 @@ define <4 x float> @uito4f32(<4 x i32> %a) nounwind { ; CHECK-LABEL: uito4f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <4 x i32> %a to <4 x float> ret <4 x float> %b } @@ -1656,7 +1656,7 @@ ; CHECK-LABEL: fptosi: ; CHECK: # BB#0: ; CHECK-NEXT: vcvttss2si %xmm0, %eax # sched: [7:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptosi float %a to i32 ret i32 %b } @@ -1665,7 +1665,7 @@ ; CHECK-LABEL: fptoui: ; CHECK: # BB#0: ; CHECK-NEXT: vcvttss2usi %xmm0, %eax -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptoui float %a to i32 ret i32 %b } @@ -1673,8 +1673,8 @@ define float @uitof32(i32 %a) nounwind { ; CHECK-LABEL: uitof32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp i32 %a to float ret float %b } @@ -1682,8 +1682,8 @@ define double @uitof64(i32 %a) nounwind { ; CHECK-LABEL: uitof64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp i32 %a to double ret double %b } @@ -1691,11 +1691,11 @@ define <16 x float> @sbto16f32(<16 x i32> %a) { ; CHECK-LABEL: sbto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %zmm0 -; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = sitofp <16 x i1> %mask to <16 x float> ret <16 x float> %1 @@ -1704,9 +1704,9 @@ define <16 x float> @scto16f32(<16 x i8> %a) { ; CHECK-LABEL: scto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 -; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = sitofp <16 x i8> %a to <16 x float> ret <16 x float> %1 } @@ -1714,9 +1714,9 @@ define <16 x float> @ssto16f32(<16 x i16> %a) { ; CHECK-LABEL: ssto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 -; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = sitofp <16 x i16> %a to <16 x float> ret <16 x float> %1 } @@ -1725,8 +1725,8 @@ ; CHECK-LABEL: ssto16f64: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = sitofp <8 x i16> %a to <8 x double> ret <8 x double> %1 } @@ -1735,10 +1735,10 @@ ; CHECK-LABEL: scto8f64: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; CHECK-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:0.50] +; CHECK-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = sitofp <8 x i8> %a to <8 x double> ret <8 x double> %1 } @@ -1746,11 +1746,11 @@ define <16 x double> @scto16f64(<16 x i8> %a) { ; CHECK-LABEL: scto16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxbd %xmm0, %zmm1 -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 -; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i8> %a to <16 x double> ret <16 x double> %b } @@ -1794,14 +1794,14 @@ ; ; CHECK-LABEL: sbto16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vcmpltpd %zmm1, %zmm2, %k0 -; CHECK-NEXT: vcmpltpd %zmm0, %zmm2, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00] +; CHECK-NEXT: vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k1, %ymm0 -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] ; CHECK-NEXT: vpmovm2d %k0, %ymm1 -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <16 x double> %a, zeroinitializer %1 = sitofp <16 x i1> %cmpres to <16 x double> ret <16 x double> %1 @@ -1836,11 +1836,11 @@ ; ; CHECK-LABEL: sbto8f64: ; CHECK: # BB#0: -; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %ymm0 -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <8 x double> %a, zeroinitializer %1 = sitofp <8 x i1> %cmpres to <8 x double> ret <8 x double> %1 @@ -1876,11 +1876,11 @@ ; ; CHECK-LABEL: sbto8f32: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpltps %ymm0, %ymm1, %k0 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %ymm0 ; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <8 x float> %a, zeroinitializer %1 = sitofp <8 x i1> %cmpres to <8 x float> ret <8 x float> %1 @@ -1889,11 +1889,11 @@ define <4 x float> @sbto4f32(<4 x float> %a) { ; CHECK-LABEL: sbto4f32: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpltps %xmm0, %xmm1, %k0 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %xmm0 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: sbto4f32: ; VLDQ: # BB#0: ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -1917,11 +1917,11 @@ define <4 x double> @sbto4f64(<4 x double> %a) { ; CHECK-LABEL: sbto4f64: ; CHECK: # BB#0: -; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpltpd %ymm0, %ymm1, %k0 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %xmm0 ; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: sbto4f64: ; VLDQ: # BB#0: ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -1945,11 +1945,11 @@ define <2 x float> @sbto2f32(<2 x float> %a) { ; CHECK-LABEL: sbto2f32: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpltps %xmm0, %xmm1, %k0 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %xmm0 ; CHECK-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: sbto2f32: ; VLDQ: # BB#0: ; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 @@ -1973,11 +1973,11 @@ define <2 x double> @sbto2f64(<2 x double> %a) { ; CHECK-LABEL: sbto2f64: ; CHECK: # BB#0: -; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpltpd %xmm0, %xmm1, %k0 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2q %k0, %xmm0 -; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: sbto2f64: ; VLDQ: # BB#0: ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 @@ -2005,9 +2005,9 @@ define <16 x float> @ucto16f32(<16 x i8> %a) { ; CHECK-LABEL: ucto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i8> %a to <16 x float> ret <16 x float>%b } @@ -2015,10 +2015,10 @@ define <8 x double> @ucto8f64(<8 x i8> %a) { ; CHECK-LABEL: ucto8f64: ; CHECK: # BB#0: -; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i8> %a to <8 x double> ret <8 x double> %b } @@ -2026,9 +2026,9 @@ define <16 x float> @swto16f32(<16 x i16> %a) { ; CHECK-LABEL: swto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 -; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i16> %a to <16 x float> ret <16 x float> %b } @@ -2037,8 +2037,8 @@ ; CHECK-LABEL: swto8f64: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <8 x i16> %a to <8 x double> ret <8 x double> %b } @@ -2046,11 +2046,11 @@ define <16 x double> @swto16f64(<16 x i16> %a) { ; CHECK-LABEL: swto16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxwd %ymm0, %zmm1 -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 -; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i16> %a to <16 x double> ret <16 x double> %b } @@ -2058,11 +2058,11 @@ define <16 x double> @ucto16f64(<16 x i8> %a) { ; CHECK-LABEL: ucto16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 -; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i8> %a to <16 x double> ret <16 x double> %b } @@ -2070,9 +2070,9 @@ define <16 x float> @uwto16f32(<16 x i16> %a) { ; CHECK-LABEL: uwto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i16> %a to <16 x float> ret <16 x float> %b } @@ -2081,8 +2081,8 @@ ; CHECK-LABEL: uwto8f64: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i16> %a to <8 x double> ret <8 x double> %b } @@ -2090,11 +2090,11 @@ define <16 x double> @uwto16f64(<16 x i16> %a) { ; CHECK-LABEL: uwto16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 -; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i16> %a to <16 x double> ret <16 x double> %b } @@ -2102,8 +2102,8 @@ define <16 x float> @sito16f32(<16 x i32> %a) { ; CHECK-LABEL: sito16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i32> %a to <16 x float> ret <16 x float> %b } @@ -2111,11 +2111,11 @@ define <16 x double> @sito16f64(<16 x i32> %a) { ; CHECK-LABEL: sito16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm2 -; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [7:1.00] +; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00] +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i32> %a to <16 x double> ret <16 x double> %b } @@ -2123,9 +2123,9 @@ define <16 x float> @usto16f32(<16 x i16> %a) { ; CHECK-LABEL: usto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i16> %a to <16 x float> ret <16 x float> %b } @@ -2133,11 +2133,11 @@ define <16 x float> @ubto16f32(<16 x i32> %a) { ; CHECK-LABEL: ubto16f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x float> ret <16 x float> %1 @@ -2146,15 +2146,15 @@ define <16 x double> @ubto16f64(<16 x i32> %a) { ; CHECK-LABEL: ubto16f64: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; CHECK-NEXT: movl {{.*}}(%rip), %eax # sched: [1:0.50] -; CHECK-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} -; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 -; CHECK-NEXT: kshiftrw $8, %k1, %k1 -; CHECK-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} -; CHECK-NEXT: vcvtudq2pd %ymm1, %zmm1 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50] +; CHECK-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: kshiftrw $8, %k1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: vcvtudq2pd %ymm1, %zmm1 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x double> ret <16 x double> %1 @@ -2163,11 +2163,11 @@ define <8 x float> @ubto8f32(<8 x i32> %a) { ; CHECK-LABEL: ubto8f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} -; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp slt <8 x i32> %a, zeroinitializer %1 = uitofp <8 x i1> %mask to <8 x float> ret <8 x float> %1 @@ -2176,11 +2176,11 @@ define <8 x double> @ubto8f64(<8 x i32> %a) { ; CHECK-LABEL: ubto8f64: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} -; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp slt <8 x i32> %a, zeroinitializer %1 = uitofp <8 x i1> %mask to <8 x double> ret <8 x double> %1 @@ -2189,11 +2189,11 @@ define <4 x float> @ubto4f32(<4 x i32> %a) { ; CHECK-LABEL: ubto4f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} -; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] +; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp slt <4 x i32> %a, zeroinitializer %1 = uitofp <4 x i1> %mask to <4 x float> ret <4 x float> %1 @@ -2202,11 +2202,11 @@ define <4 x double> @ubto4f64(<4 x i32> %a) { ; CHECK-LABEL: ubto4f64: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} -; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] +; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp slt <4 x i32> %a, zeroinitializer %1 = uitofp <4 x i1> %mask to <4 x double> ret <4 x double> %1 @@ -2215,12 +2215,12 @@ define <2 x float> @ubto2f32(<2 x i32> %a) { ; CHECK-LABEL: ubto2f32: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} -; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] +; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] +; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ult <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x float> ret <2 x float> %1 @@ -2229,12 +2229,12 @@ define <2 x double> @ubto2f64(<2 x i32> %a) { ; CHECK-LABEL: ubto2f64: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] +; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] +; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; VLDQ-LABEL: ubto2f64: ; VLDQ: # BB#0: ; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -2263,10 +2263,10 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_8x8mem_to_8x16: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8x8mem_to_8x16: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2282,10 +2282,10 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_8x8mem_to_8x16: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_8x8mem_to_8x16: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2302,10 +2302,10 @@ define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_16x8mem_to_16x16: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm0, %k1 -; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_16x8mem_to_16x16: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2321,10 +2321,10 @@ define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_16x8mem_to_16x16: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm0, %k1 -; CHECK-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_16x8mem_to_16x16: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2341,7 +2341,7 @@ ; CHECK-LABEL: zext_16x8_to_16x16: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %x = zext <16 x i8> %a to <16 x i16> ret <16 x i16> %x } @@ -2349,10 +2349,10 @@ define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_16x8_to_16x16_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm1, %k1 -; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_16x8_to_16x16_mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 @@ -2368,7 +2368,7 @@ ; CHECK-LABEL: sext_16x8_to_16x16: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %x = sext <16 x i8> %a to <16 x i16> ret <16 x i16> %x } @@ -2376,10 +2376,10 @@ define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_16x8_to_16x16_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm1, %k1 -; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_16x8_to_16x16_mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 @@ -2394,10 +2394,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_32x8mem_to_32x16: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %ymm0, %k1 -; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_32x8mem_to_32x16: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 @@ -2413,10 +2413,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_32x8mem_to_32x16: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %ymm0, %k1 -; CHECK-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_32x8mem_to_32x16: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 @@ -2432,8 +2432,8 @@ define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { ; CHECK-LABEL: zext_32x8_to_32x16: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_32x8_to_32x16: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero @@ -2445,10 +2445,10 @@ define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_32x8_to_32x16_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %ymm1, %k1 -; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_32x8_to_32x16_mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 @@ -2463,8 +2463,8 @@ define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { ; CHECK-LABEL: sext_32x8_to_32x16: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_32x8_to_32x16: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 @@ -2476,10 +2476,10 @@ define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_32x8_to_32x16_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %ymm1, %k1 -; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_32x8_to_32x16_mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 @@ -2494,10 +2494,10 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_4x8mem_to_4x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_4x8mem_to_4x32: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -2513,10 +2513,10 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_4x8mem_to_4x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_4x8mem_to_4x32: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -2532,10 +2532,10 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_8x8mem_to_8x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8x8mem_to_8x32: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2551,10 +2551,10 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_8x8mem_to_8x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_8x8mem_to_8x32: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2570,10 +2570,10 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_16x8mem_to_16x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm0, %k1 -; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_16x8mem_to_16x32: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2589,10 +2589,10 @@ define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_16x8mem_to_16x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm0, %k1 -; CHECK-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_16x8mem_to_16x32: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2608,10 +2608,10 @@ define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_16x8_to_16x32_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm1, %k1 -; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_16x8_to_16x32_mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 @@ -2626,10 +2626,10 @@ define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_16x8_to_16x32_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm1, %k1 -; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_16x8_to_16x32_mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 @@ -2644,8 +2644,8 @@ define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { ; CHECK-LABEL: zext_16x8_to_16x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %x = zext <16 x i8> %i to <16 x i32> ret <16 x i32> %x } @@ -2653,8 +2653,8 @@ define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { ; CHECK-LABEL: sext_16x8_to_16x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %x = sext <16 x i8> %i to <16 x i32> ret <16 x i32> %x } @@ -2662,10 +2662,10 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_2x8mem_to_2x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_2x8mem_to_2x64: ; SKX: # BB#0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -2680,10 +2680,10 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_2x8mem_to_2x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_2x8mem_to_2x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -2698,8 +2698,8 @@ define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone { ; CHECK-LABEL: sext_2x8mem_to_2x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <2 x i8>,<2 x i8> *%i,align 1 %x = sext <2 x i8> %a to <2 x i64> ret <2 x i64> %x @@ -2708,10 +2708,10 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_4x8mem_to_4x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_4x8mem_to_4x64: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -2727,10 +2727,10 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_4x8mem_to_4x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_4x8mem_to_4x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -2746,8 +2746,8 @@ define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone { ; CHECK-LABEL: sext_4x8mem_to_4x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <4 x i8>,<4 x i8> *%i,align 1 %x = sext <4 x i8> %a to <4 x i64> ret <4 x i64> %x @@ -2756,10 +2756,10 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_8x8mem_to_8x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8x8mem_to_8x64: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2775,10 +2775,10 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_8x8mem_to_8x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_8x8mem_to_8x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2794,8 +2794,8 @@ define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { ; CHECK-LABEL: sext_8x8mem_to_8x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxbq (%rdi), %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <8 x i8>,<8 x i8> *%i,align 1 %x = sext <8 x i8> %a to <8 x i64> ret <8 x i64> %x @@ -2804,10 +2804,10 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_4x16mem_to_4x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_4x16mem_to_4x32: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -2823,10 +2823,10 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_4x16mem_to_4x32mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_4x16mem_to_4x32mask: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -2842,8 +2842,8 @@ define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone { ; CHECK-LABEL: sext_4x16mem_to_4x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <4 x i16>,<4 x i16> *%i,align 1 %x = sext <4 x i16> %a to <4 x i32> ret <4 x i32> %x @@ -2853,10 +2853,10 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_8x16mem_to_8x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8x16mem_to_8x32: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2872,10 +2872,10 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_8x16mem_to_8x32mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_8x16mem_to_8x32mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2891,8 +2891,8 @@ define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone { ; CHECK-LABEL: sext_8x16mem_to_8x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <8 x i16>,<8 x i16> *%i,align 1 %x = sext <8 x i16> %a to <8 x i32> ret <8 x i32> %x @@ -2901,10 +2901,10 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_8x16_to_8x32mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm1, %k1 -; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8x16_to_8x32mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 @@ -2920,7 +2920,7 @@ ; CHECK-LABEL: zext_8x16_to_8x32: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %x = zext <8 x i16> %a to <8 x i32> ret <8 x i32> %x } @@ -2928,10 +2928,10 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_16x16mem_to_16x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm0, %k1 -; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_16x16mem_to_16x32: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2947,10 +2947,10 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_16x16mem_to_16x32mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm0, %k1 -; CHECK-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_16x16mem_to_16x32mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2966,8 +2966,8 @@ define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { ; CHECK-LABEL: sext_16x16mem_to_16x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxwd (%rdi), %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <16 x i16>,<16 x i16> *%i,align 1 %x = sext <16 x i16> %a to <16 x i32> ret <16 x i32> %x @@ -2975,10 +2975,10 @@ define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_16x16_to_16x32mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm1, %k1 -; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_16x16_to_16x32mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 @@ -2993,8 +2993,8 @@ define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { ; CHECK-LABEL: zext_16x16_to_16x32: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %x = zext <16 x i16> %a to <16 x i32> ret <16 x i32> %x } @@ -3002,10 +3002,10 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_2x16mem_to_2x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_2x16mem_to_2x64: ; SKX: # BB#0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -3021,10 +3021,10 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_2x16mem_to_2x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_2x16mem_to_2x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -3040,8 +3040,8 @@ define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone { ; CHECK-LABEL: sext_2x16mem_to_2x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <2 x i16>,<2 x i16> *%i,align 1 %x = sext <2 x i16> %a to <2 x i64> ret <2 x i64> %x @@ -3050,10 +3050,10 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_4x16mem_to_4x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_4x16mem_to_4x64: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -3069,10 +3069,10 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_4x16mem_to_4x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_4x16mem_to_4x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -3088,8 +3088,8 @@ define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone { ; CHECK-LABEL: sext_4x16mem_to_4x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <4 x i16>,<4 x i16> *%i,align 1 %x = sext <4 x i16> %a to <4 x i64> ret <4 x i64> %x @@ -3098,10 +3098,10 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_8x16mem_to_8x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8x16mem_to_8x64: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -3117,10 +3117,10 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_8x16mem_to_8x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_8x16mem_to_8x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -3136,8 +3136,8 @@ define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { ; CHECK-LABEL: sext_8x16mem_to_8x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxwq (%rdi), %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <8 x i16>,<8 x i16> *%i,align 1 %x = sext <8 x i16> %a to <8 x i64> ret <8 x i64> %x @@ -3146,10 +3146,10 @@ define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_8x16_to_8x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm1, %k1 -; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8x16_to_8x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 @@ -3164,8 +3164,8 @@ define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { ; CHECK-LABEL: zext_8x16_to_8x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %ret = zext <8 x i16> %a to <8 x i64> ret <8 x i64> %ret } @@ -3173,10 +3173,10 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_2x32mem_to_2x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_2x32mem_to_2x64: ; SKX: # BB#0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -3192,10 +3192,10 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_2x32mem_to_2x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_2x32mem_to_2x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -3211,8 +3211,8 @@ define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone { ; CHECK-LABEL: sext_2x32mem_to_2x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <2 x i32>,<2 x i32> *%i,align 1 %x = sext <2 x i32> %a to <2 x i64> ret <2 x i64> %x @@ -3221,10 +3221,10 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_4x32mem_to_4x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_4x32mem_to_4x64: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -3240,10 +3240,10 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_4x32mem_to_4x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_4x32mem_to_4x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -3259,8 +3259,8 @@ define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone { ; CHECK-LABEL: sext_4x32mem_to_4x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [9:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <4 x i32>,<4 x i32> *%i,align 1 %x = sext <4 x i32> %a to <4 x i64> ret <4 x i64> %x @@ -3270,7 +3270,7 @@ ; CHECK-LABEL: sext_4x32_to_4x64: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %x = sext <4 x i32> %a to <4 x i64> ret <4 x i64> %x } @@ -3278,10 +3278,10 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_4x32_to_4x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 -; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_4x32_to_4x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 @@ -3296,10 +3296,10 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_8x32mem_to_8x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8x32mem_to_8x64: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -3315,10 +3315,10 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: sext_8x32mem_to_8x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k1 -; CHECK-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_8x32mem_to_8x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -3334,8 +3334,8 @@ define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { ; CHECK-LABEL: sext_8x32mem_to_8x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxdq (%rdi), %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load <8 x i32>,<8 x i32> *%i,align 1 %x = sext <8 x i32> %a to <8 x i64> ret <8 x i64> %x @@ -3344,8 +3344,8 @@ define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { ; CHECK-LABEL: sext_8x32_to_8x64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxdq %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %x = sext <8 x i32> %a to <8 x i64> ret <8 x i64> %x } @@ -3353,10 +3353,10 @@ define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone { ; CHECK-LABEL: zext_8x32_to_8x64mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm1, %k1 -; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8x32_to_8x64mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 @@ -3370,8 +3370,8 @@ define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone { ; CHECK-LABEL: fptrunc_test: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fptrunc <8 x double> %a to <8 x float> ret <8 x float> %b } @@ -3379,8 +3379,8 @@ define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone { ; CHECK-LABEL: fpext_test: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtps2pd %ymm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = fpext <8 x float> %a to <8 x double> ret <8 x double> %b } @@ -3388,9 +3388,9 @@ define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { ; CHECK-LABEL: zext_16i1_to_16xi32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_16i1_to_16xi32: ; SKX: # BB#0: ; SKX-NEXT: kmovd %edi, %k1 @@ -3404,9 +3404,9 @@ define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { ; CHECK-LABEL: zext_8i1_to_8xi64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_8i1_to_8xi64: ; SKX: # BB#0: ; SKX-NEXT: kmovd %edi, %k1 @@ -3420,11 +3420,11 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { ; CHECK-LABEL: trunc_16i8_to_16i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AX %AX %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: trunc_16i8_to_16i1: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -3440,12 +3440,12 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { ; CHECK-LABEL: trunc_16i32_to_16i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %zmm0, %zmm0 -; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: trunc_16i32_to_16i1: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %zmm0, %zmm0 @@ -3462,12 +3462,12 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: trunc_4i32_to_4i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 -; CHECK-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: trunc_4i32_to_4i1: ; SKX: # BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -3487,11 +3487,11 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { ; CHECK-LABEL: trunc_8i16_to_8i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AL %AL %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: trunc_8i16_to_8i1: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -3507,9 +3507,9 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { ; CHECK-LABEL: sext_8i1_8i32: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 +; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_8i1_8i32: ; SKX: # BB#0: ; SKX-NEXT: vpcmpled %ymm0, %ymm1, %k0 @@ -3526,15 +3526,15 @@ ; CHECK-LABEL: trunc_i32_to_i1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-4, %ax # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k0 -; CHECK-NEXT: kshiftrw $1, %k0, %k0 -; CHECK-NEXT: kshiftlw $1, %k0, %k0 +; CHECK-NEXT: kmovd %eax, %k0 # sched: [1:1.00] +; CHECK-NEXT: kshiftrw $1, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kshiftlw $1, %k0, %k0 # sched: [3:1.00] ; CHECK-NEXT: andl $1, %edi # sched: [1:0.25] -; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: korw %k1, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovw %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AX %AX %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: trunc_i32_to_i1: ; SKX: # BB#0: ; SKX-NEXT: movw $-4, %ax @@ -3556,10 +3556,10 @@ define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { ; CHECK-LABEL: sext_8i1_8i16: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 +; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_8i1_8i16: ; SKX: # BB#0: ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 @@ -3574,9 +3574,9 @@ define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { ; CHECK-LABEL: sext_16i1_16i32: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 +; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_16i1_16i32: ; SKX: # BB#0: ; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 @@ -3590,9 +3590,9 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { ; CHECK-LABEL: sext_8i1_8i64: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 +; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2q %k0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: sext_8i1_8i64: ; SKX: # BB#0: ; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 @@ -3606,10 +3606,10 @@ define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { ; CHECK-LABEL: extload_v8i64: ; CHECK: # BB#0: -; CHECK-NEXT: vpmovsxbq (%rdi), %zmm0 -; CHECK-NEXT: vmovdqa64 %zmm0, (%rsi) +; CHECK-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00] +; CHECK-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: extload_v8i64: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 @@ -3625,12 +3625,12 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; CHECK-LABEL: test21: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %zmm2, %zmm2 -; CHECK-NEXT: vpmovb2m %zmm2, %k1 +; CHECK-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: kshiftrq $32, %k1, %k1 +; CHECK-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00] ; CHECK-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test21: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 @@ -3647,7 +3647,7 @@ ; CHECK-LABEL: shuffle_zext_16x8_to_16x16: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> %2 = bitcast <32 x i8> %1 to <16 x i16> ret <16 x i16> %2 @@ -3656,10 +3656,10 @@ define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone { ; CHECK-LABEL: shuffle_zext_16x8_to_16x16_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm1, %k1 -; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: ; SKX: # BB#0: ; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 @@ -3676,7 +3676,7 @@ ; CHECK-LABEL: zext_32x8_to_16x16: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> %2 = bitcast <32 x i8> %1 to <16 x i16> ret <16 x i16> %2 @@ -3686,7 +3686,7 @@ ; CHECK-LABEL: zext_32x8_to_8x32: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> %2 = bitcast <32 x i8> %1 to <8 x i32> ret <8 x i32> %2 @@ -3696,7 +3696,7 @@ ; CHECK-LABEL: zext_32x8_to_4x64: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> %2 = bitcast <32 x i8> %1 to <4 x i64> ret <4 x i64> %2 @@ -3706,7 +3706,7 @@ ; CHECK-LABEL: zext_16x16_to_8x32: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> %2 = bitcast <16 x i16> %1 to <8 x i32> ret <8 x i32> %2 @@ -3716,7 +3716,7 @@ ; CHECK-LABEL: zext_16x16_to_4x64: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> %2 = bitcast <16 x i16> %1 to <4 x i64> ret <4 x i64> %2 @@ -3726,7 +3726,7 @@ ; CHECK-LABEL: zext_8x32_to_4x64: ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> %2 = bitcast <8 x i32> %1 to <4 x i64> ret <4 x i64> %2 @@ -3735,9 +3735,9 @@ define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 { ; CHECK-LABEL: zext_64xi1_to_64xi8: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_64xi1_to_64xi8: ; SKX: # BB#0: ; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 @@ -3751,9 +3751,9 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { ; CHECK-LABEL: zext_32xi1_to_32xi16: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_32xi1_to_32xi16: ; SKX: # BB#0: ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 @@ -3767,9 +3767,9 @@ define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 { ; CHECK-LABEL: zext_16xi1_to_16xi16: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 -; CHECK-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_16xi1_to_16xi16: ; SKX: # BB#0: ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %k1 @@ -3784,9 +3784,9 @@ define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 { ; CHECK-LABEL: zext_32xi1_to_32xi8: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_32xi1_to_32xi8: ; SKX: # BB#0: ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 @@ -3800,12 +3800,12 @@ define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 { ; CHECK-LABEL: zext_4xi1_to_4x32: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [1:0.50] -; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 -; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] sched: [6:0.50] +; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_4xi1_to_4x32: ; SKX: # BB#0: ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] @@ -3822,12 +3822,12 @@ define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 { ; CHECK-LABEL: zext_2xi1_to_2xi64: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [1:0.50] -; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 -; CHECK-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] sched: [6:0.50] +; CHECK-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33] +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_2xi1_to_2xi64: ; SKX: # BB#0: ; SKX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] @@ -3844,9 +3844,9 @@ define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_fmadd_ps_z: ; CHECK: # BB#0: -; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vaddps %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %res = fadd <16 x float> %x, %a2 ret <16 x float> %res @@ -3855,9 +3855,9 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_fmsub_ps_z: ; CHECK: # BB#0: -; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vsubps %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %x, %a2 ret <16 x float> %res @@ -3866,9 +3866,9 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_fnmadd_ps_z: ; CHECK: # BB#0: -; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vsubps %zmm0, %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %a2, %x ret <16 x float> %res @@ -3877,10 +3877,10 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { ; CHECK-LABEL: test_x86_fnmsub_ps_z: ; CHECK: # BB#0: -; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vsubps %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %y = fsub <16 x float> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_x86_fmadd_pd_z: ; CHECK: # BB#0: -; CHECK-NEXT: vmulpd %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vaddpd %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = fmul <8 x double> %a0, %a1 %res = fadd <8 x double> %x, %a2 ret <8 x double> %res @@ -3904,9 +3904,9 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { ; CHECK-LABEL: test_x86_fmsub_pd_z: ; CHECK: # BB#0: -; CHECK-NEXT: vmulpd %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vsubpd %zmm2, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = fmul <8 x double> %a0, %a1 %res = fsub <8 x double> %x, %a2 ret <8 x double> %res @@ -3915,9 +3915,9 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { ; CHECK-LABEL: test_x86_fmsub_213: ; CHECK: # BB#0: -; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %x = fmul double %a0, %a1 %res = fsub double %x, %a2 ret double %res @@ -3926,9 +3926,9 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { ; CHECK-LABEL: test_x86_fmsub_213_m: ; CHECK: # BB#0: -; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a1 %res = fsub double %x, %a2 @@ -3938,9 +3938,9 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { ; CHECK-LABEL: test_x86_fmsub_231_m: ; CHECK: # BB#0: -; CHECK-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; CHECK-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a2 %res = fsub double %x, %a1 @@ -3950,9 +3950,9 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; CHECK-LABEL: test231_br: ; CHECK: # BB#0: -; CHECK-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %b1 = fmul <16 x float> %a1, %b2 = fadd <16 x float> %b1, %a2 ret <16 x float> %b2 @@ -3961,9 +3961,9 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; CHECK-LABEL: test213_br: ; CHECK: # BB#0: -; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %b1 = fmul <16 x float> %a1, %a2 %b2 = fadd <16 x float> %b1, ret <16 x float> %b2 @@ -3973,11 +3973,11 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { ; CHECK-LABEL: test_x86_fmadd132_ps: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm2, %k1 -; CHECK-NEXT: vmulps (%rdi), %zmm0, %zmm2 -; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50] +; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_x86_fmadd132_ps: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 @@ -3995,12 +3995,12 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { ; CHECK-LABEL: test_x86_fmadd231_ps: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm2, %k1 -; CHECK-NEXT: vmulps (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.33] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_x86_fmadd231_ps: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 @@ -4019,12 +4019,12 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) { ; CHECK-LABEL: test_x86_fmadd213_ps: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm2, %k1 -; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_x86_fmadd213_ps: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 @@ -4042,9 +4042,9 @@ define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpandd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. %a2 = add <16 x i32> %a, @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpandnd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vpandnq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. %a2 = add <16 x i32> %a, @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpord: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. %a2 = add <16 x i32> %a, @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpxord: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. %a2 = add <16 x i32> %a, @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpandq: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. %a2 = add <8 x i64> %a, @@ -4113,9 +4113,9 @@ define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpandnq: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: vpandnq %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. %a2 = add <8 x i64> %a, @@ -4127,9 +4127,9 @@ define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vporq: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. %a2 = add <8 x i64> %a, @@ -4140,9 +4140,9 @@ define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp { ; CHECK-LABEL: vpxorq: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. %a2 = add <8 x i64> %a, @@ -4153,8 +4153,8 @@ define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK-LABEL: and_v64i8: ; CHECK: # BB#0: -; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: and_v64i8: ; SKX: ## BB#0: ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 @@ -4166,8 +4166,8 @@ define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK-LABEL: andn_v64i8: ; CHECK: # BB#0: -; CHECK-NEXT: vandnps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: andn_v64i8: ; SKX: ## BB#0: ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 @@ -4183,8 +4183,8 @@ define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK-LABEL: or_v64i8: ; CHECK: # BB#0: -; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: or_v64i8: ; SKX: ## BB#0: ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 @@ -4196,8 +4196,8 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { ; CHECK-LABEL: xor_v64i8: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: xor_v64i8: ; SKX: ## BB#0: ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 @@ -4209,8 +4209,8 @@ define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK-LABEL: and_v32i16: ; CHECK: # BB#0: -; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: and_v32i16: ; SKX: ## BB#0: ; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 @@ -4222,8 +4222,8 @@ define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK-LABEL: andn_v32i16: ; CHECK: # BB#0: -; CHECK-NEXT: vandnps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: andn_v32i16: ; SKX: ## BB#0: ; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 @@ -4237,8 +4237,8 @@ define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK-LABEL: or_v32i16: ; CHECK: # BB#0: -; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: or_v32i16: ; SKX: ## BB#0: ; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 @@ -4250,8 +4250,8 @@ define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { ; CHECK-LABEL: xor_v32i16: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: xor_v32i16: ; SKX: ## BB#0: ; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 @@ -4263,10 +4263,10 @@ define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { ; CHECK-LABEL: masked_and_v16f32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vaddps %zmm2, %zmm3, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] +; CHECK-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: masked_and_v16f32: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k1 @@ -4287,10 +4287,10 @@ define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { ; CHECK-LABEL: masked_or_v16f32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vaddps %zmm2, %zmm3, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] +; CHECK-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: masked_or_v16f32: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k1 @@ -4311,10 +4311,10 @@ define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) { ; CHECK-LABEL: masked_xor_v16f32: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vaddps %zmm2, %zmm3, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] +; CHECK-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: masked_xor_v16f32: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k1 @@ -4335,10 +4335,10 @@ define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { ; CHECK-LABEL: masked_and_v8f64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vaddpd %zmm2, %zmm3, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] +; CHECK-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: masked_and_v8f64: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k1 @@ -4359,10 +4359,10 @@ define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { ; CHECK-LABEL: masked_or_v8f64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vaddpd %zmm2, %zmm3, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] +; CHECK-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: masked_or_v8f64: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k1 @@ -4383,10 +4383,10 @@ define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) { ; CHECK-LABEL: masked_xor_v8f64: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vaddpd %zmm2, %zmm3, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] +; CHECK-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: masked_xor_v8f64: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k1 @@ -4407,9 +4407,9 @@ define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { ; CHECK-LABEL: test_mm512_mask_and_epi32: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_and_epi32: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4428,9 +4428,9 @@ define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { ; CHECK-LABEL: test_mm512_mask_or_epi32: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_or_epi32: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4449,9 +4449,9 @@ define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) { ; CHECK-LABEL: test_mm512_mask_xor_epi32: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_xor_epi32: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4470,9 +4470,9 @@ define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; CHECK-LABEL: test_mm512_mask_xor_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_xor_pd: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4491,9 +4491,9 @@ define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; CHECK-LABEL: test_mm512_maskz_xor_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_maskz_xor_pd: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4512,9 +4512,9 @@ define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; CHECK-LABEL: test_mm512_mask_xor_ps: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_xor_ps: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4533,9 +4533,9 @@ define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; CHECK-LABEL: test_mm512_maskz_xor_ps: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_maskz_xor_ps: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4554,9 +4554,9 @@ define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; CHECK-LABEL: test_mm512_mask_or_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_or_pd: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4575,9 +4575,9 @@ define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; CHECK-LABEL: test_mm512_maskz_or_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_maskz_or_pd: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4596,9 +4596,9 @@ define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; CHECK-LABEL: test_mm512_mask_or_ps: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_or_ps: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4617,9 +4617,9 @@ define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; CHECK-LABEL: test_mm512_maskz_or_ps: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_maskz_or_ps: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4638,9 +4638,9 @@ define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; CHECK-LABEL: test_mm512_mask_and_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_and_pd: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4659,9 +4659,9 @@ define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; CHECK-LABEL: test_mm512_maskz_and_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_maskz_and_pd: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4680,9 +4680,9 @@ define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; CHECK-LABEL: test_mm512_mask_and_ps: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_and_ps: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4701,9 +4701,9 @@ define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; CHECK-LABEL: test_mm512_maskz_and_ps: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_maskz_and_ps: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4722,9 +4722,9 @@ define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; CHECK-LABEL: test_mm512_mask_andnot_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_andnot_pd: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4744,9 +4744,9 @@ define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; CHECK-LABEL: test_mm512_maskz_andnot_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_maskz_andnot_pd: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4766,9 +4766,9 @@ define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; CHECK-LABEL: test_mm512_mask_andnot_ps: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_mask_andnot_ps: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4788,9 +4788,9 @@ define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { ; CHECK-LABEL: test_mm512_maskz_andnot_ps: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k1 # sched: [1:1.00] +; CHECK-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_mm512_maskz_andnot_ps: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kmovd %edi, %k1 @@ -4811,7 +4811,7 @@ ; CHECK-LABEL: mov_test1: ; CHECK: # BB#0: ; CHECK-NEXT: vmovd %xmm0, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = bitcast float %x to i32 ret i32 %res } @@ -4820,7 +4820,7 @@ ; CHECK-LABEL: mov_test2: ; CHECK: # BB#0: ; CHECK-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = insertelement <4 x i32>undef, i32 %x, i32 0 ret <4 x i32>%res } @@ -4829,7 +4829,7 @@ ; CHECK-LABEL: mov_test3: ; CHECK: # BB#0: ; CHECK-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = insertelement <2 x i64>undef, i64 %x, i32 0 ret <2 x i64>%res } @@ -4837,8 +4837,8 @@ define <4 x i32> @mov_test4(i32* %x) { ; CHECK-LABEL: mov_test4: ; CHECK: # BB#0: -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %y = load i32, i32* %x %res = insertelement <4 x i32>undef, i32 %y, i32 0 ret <4 x i32>%res @@ -4848,7 +4848,7 @@ ; CHECK-LABEL: mov_test5: ; CHECK: # BB#0: ; CHECK-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] store float %x, float* %y, align 4 ret void } @@ -4857,7 +4857,7 @@ ; CHECK-LABEL: mov_test6: ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] store double %x, double* %y, align 8 ret void } @@ -4865,8 +4865,8 @@ define float @mov_test7(i32* %x) { ; CHECK-LABEL: mov_test7: ; CHECK: # BB#0: -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %y = load i32, i32* %x %res = bitcast i32 %y to float ret float %res @@ -4876,7 +4876,7 @@ ; CHECK-LABEL: mov_test8: ; CHECK: # BB#0: ; CHECK-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = extractelement <4 x i32> %x, i32 0 ret i32 %res } @@ -4885,7 +4885,7 @@ ; CHECK-LABEL: mov_test9: ; CHECK: # BB#0: ; CHECK-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = extractelement <2 x i64> %x, i32 0 ret i64 %res } @@ -4893,8 +4893,8 @@ define <4 x i32> @mov_test10(i32* %x) { ; CHECK-LABEL: mov_test10: ; CHECK: # BB#0: -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %y = load i32, i32* %x, align 4 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 ret <4 x i32>%res @@ -4903,8 +4903,8 @@ define <4 x float> @mov_test11(float* %x) { ; CHECK-LABEL: mov_test11: ; CHECK: # BB#0: -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %y = load float, float* %x, align 4 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 ret <4 x float>%res @@ -4913,8 +4913,8 @@ define <2 x double> @mov_test12(double* %x) { ; CHECK-LABEL: mov_test12: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %y = load double, double* %x, align 8 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 ret <2 x double>%res @@ -4924,7 +4924,7 @@ ; CHECK-LABEL: mov_test13: ; CHECK: # BB#0: ; CHECK-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 ret <2 x i64>%res } @@ -4933,7 +4933,7 @@ ; CHECK-LABEL: mov_test14: ; CHECK: # BB#0: ; CHECK-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 ret <4 x i32>%res } @@ -4941,8 +4941,8 @@ define <4 x i32> @mov_test15(i32* %x) { ; CHECK-LABEL: mov_test15: ; CHECK: # BB#0: -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %y = load i32, i32* %x, align 4 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 ret <4 x i32>%res @@ -4951,8 +4951,8 @@ define <16 x i32> @mov_test16(i8 * %addr) { ; CHECK-LABEL: mov_test16: ; CHECK: # BB#0: -; CHECK-NEXT: vmovups (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x i32>* %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 ret <16 x i32>%res @@ -4961,8 +4961,8 @@ define <16 x i32> @mov_test17(i8 * %addr) { ; CHECK-LABEL: mov_test17: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x i32>* %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 ret <16 x i32>%res @@ -4971,9 +4971,9 @@ define void @mov_test18(i8 * %addr, <8 x i64> %data) { ; CHECK-LABEL: mov_test18: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps %zmm0, (%rdi) +; CHECK-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 64 ret void @@ -4982,9 +4982,9 @@ define void @mov_test19(i8 * %addr, <16 x i32> %data) { ; CHECK-LABEL: mov_test19: ; CHECK: # BB#0: -; CHECK-NEXT: vmovups %zmm0, (%rdi) +; CHECK-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 1 ret void @@ -4993,9 +4993,9 @@ define void @mov_test20(i8 * %addr, <16 x i32> %data) { ; CHECK-LABEL: mov_test20: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps %zmm0, (%rdi) +; CHECK-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x i32>* store <16 x i32>%data, <16 x i32>* %vaddr, align 64 ret void @@ -5004,8 +5004,8 @@ define <8 x i64> @mov_test21(i8 * %addr) { ; CHECK-LABEL: mov_test21: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x i64>* %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 ret <8 x i64>%res @@ -5014,9 +5014,9 @@ define void @mov_test22(i8 * %addr, <8 x i64> %data) { ; CHECK-LABEL: mov_test22: ; CHECK: # BB#0: -; CHECK-NEXT: vmovups %zmm0, (%rdi) +; CHECK-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x i64>* store <8 x i64>%data, <8 x i64>* %vaddr, align 1 ret void @@ -5025,8 +5025,8 @@ define <8 x i64> @mov_test23(i8 * %addr) { ; CHECK-LABEL: mov_test23: ; CHECK: # BB#0: -; CHECK-NEXT: vmovups (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x i64>* %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 ret <8 x i64>%res @@ -5035,9 +5035,9 @@ define void @mov_test24(i8 * %addr, <8 x double> %data) { ; CHECK-LABEL: mov_test24: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps %zmm0, (%rdi) +; CHECK-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x double>* store <8 x double>%data, <8 x double>* %vaddr, align 64 ret void @@ -5046,8 +5046,8 @@ define <8 x double> @mov_test25(i8 * %addr) { ; CHECK-LABEL: mov_test25: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x double>* %res = load <8 x double>, <8 x double>* %vaddr, align 64 ret <8 x double>%res @@ -5056,9 +5056,9 @@ define void @mov_test26(i8 * %addr, <16 x float> %data) { ; CHECK-LABEL: mov_test26: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps %zmm0, (%rdi) +; CHECK-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x float>* store <16 x float>%data, <16 x float>* %vaddr, align 64 ret void @@ -5067,8 +5067,8 @@ define <16 x float> @mov_test27(i8 * %addr) { ; CHECK-LABEL: mov_test27: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x float>* %res = load <16 x float>, <16 x float>* %vaddr, align 64 ret <16 x float>%res @@ -5077,9 +5077,9 @@ define void @mov_test28(i8 * %addr, <8 x double> %data) { ; CHECK-LABEL: mov_test28: ; CHECK: # BB#0: -; CHECK-NEXT: vmovups %zmm0, (%rdi) +; CHECK-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x double>* store <8 x double>%data, <8 x double>* %vaddr, align 1 ret void @@ -5088,8 +5088,8 @@ define <8 x double> @mov_test29(i8 * %addr) { ; CHECK-LABEL: mov_test29: ; CHECK: # BB#0: -; CHECK-NEXT: vmovups (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <8 x double>* %res = load <8 x double>, <8 x double>* %vaddr, align 1 ret <8 x double>%res @@ -5098,9 +5098,9 @@ define void @mov_test30(i8 * %addr, <16 x float> %data) { ; CHECK-LABEL: mov_test30: ; CHECK: # BB#0: -; CHECK-NEXT: vmovups %zmm0, (%rdi) +; CHECK-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x float>* store <16 x float>%data, <16 x float>* %vaddr, align 1 ret void @@ -5109,8 +5109,8 @@ define <16 x float> @mov_test31(i8 * %addr) { ; CHECK-LABEL: mov_test31: ; CHECK: # BB#0: -; CHECK-NEXT: vmovups (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %vaddr = bitcast i8* %addr to <16 x float>* %res = load <16 x float>, <16 x float>* %vaddr, align 1 ret <16 x float>%res @@ -5119,10 +5119,10 @@ define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; CHECK-LABEL: mov_test32: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 @@ -5133,10 +5133,10 @@ define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { ; CHECK-LABEL: mov_test33: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 @@ -5147,10 +5147,10 @@ define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) { ; CHECK-LABEL: mov_test34: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 @@ -5161,10 +5161,10 @@ define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) { ; CHECK-LABEL: mov_test35: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x i32>* %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 @@ -5175,10 +5175,10 @@ define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; CHECK-LABEL: mov_test36: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 @@ -5189,10 +5189,10 @@ define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { ; CHECK-LABEL: mov_test37: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 -; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 @@ -5203,10 +5203,10 @@ define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) { ; CHECK-LABEL: mov_test38: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 @@ -5217,10 +5217,10 @@ define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) { ; CHECK-LABEL: mov_test39: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 -; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x i64>* %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 @@ -5231,11 +5231,11 @@ define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { ; CHECK-LABEL: mov_test40: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 -; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} -; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 64 @@ -5246,11 +5246,11 @@ define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { ; CHECK-LABEL: mov_test41: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 -; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} -; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 1 @@ -5261,11 +5261,11 @@ define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) { ; CHECK-LABEL: mov_test42: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 64 @@ -5276,11 +5276,11 @@ define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) { ; CHECK-LABEL: mov_test43: ; CHECK: # BB#0: -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 -; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = fcmp one <16 x float> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <16 x float>* %r = load <16 x float>, <16 x float>* %vaddr, align 1 @@ -5291,11 +5291,11 @@ define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { ; CHECK-LABEL: mov_test44: ; CHECK: # BB#0: -; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} -; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 64 @@ -5306,11 +5306,11 @@ define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { ; CHECK-LABEL: mov_test45: ; CHECK: # BB#0: -; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} -; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 1 @@ -5321,11 +5321,11 @@ define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) { ; CHECK-LABEL: mov_test46: ; CHECK: # BB#0: -; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 64 @@ -5336,11 +5336,11 @@ define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) { ; CHECK-LABEL: mov_test47: ; CHECK: # BB#0: -; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} -; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = fcmp one <8 x double> %mask1, zeroinitializer %vaddr = bitcast i8* %addr to <8 x double>* %r = load <8 x double>, <8 x double>* %vaddr, align 1 @@ -5351,11 +5351,11 @@ define i16 @mask16(i16 %x) { ; CHECK-LABEL: mask16: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AX %AX %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: mask16: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -5372,10 +5372,10 @@ define i32 @mask16_zext(i16 %x) { ; CHECK-LABEL: mask16_zext: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kmovw %k0, %eax -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovw %k0, %eax # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: mask16_zext: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -5392,11 +5392,11 @@ define i8 @mask8(i8 %x) { ; CHECK-LABEL: mask8: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: knotb %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: knotb %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AL %AL %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: mask8: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -5413,10 +5413,10 @@ define i32 @mask8_zext(i8 %x) { ; CHECK-LABEL: mask8_zext: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: knotb %k0, %k0 -; CHECK-NEXT: kmovb %k0, %eax -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: knotb %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovb %k0, %eax # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: mask8_zext: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -5433,10 +5433,10 @@ define void @mask16_mem(i16* %ptr) { ; CHECK-LABEL: mask16_mem: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw (%rdi), %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kmovw %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] +; CHECK-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %x = load i16, i16* %ptr, align 4 %m0 = bitcast i16 %x to <16 x i1> %m1 = xor <16 x i1> %m0, @@ -5448,10 +5448,10 @@ define void @mask8_mem(i8* %ptr) { ; CHECK-LABEL: mask8_mem: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb (%rdi), %k0 -; CHECK-NEXT: knotb %k0, %k0 -; CHECK-NEXT: kmovb %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] +; CHECK-NEXT: knotb %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: mask8_mem: ; SKX: ## BB#0: ; SKX-NEXT: kmovb (%rdi), %k0 @@ -5474,7 +5474,7 @@ ; CHECK-NEXT: andl %esi, %edi # sched: [1:0.25] ; CHECK-NEXT: orl %eax, %edi # sched: [1:0.25] ; CHECK-NEXT: movl %edi, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %ma = bitcast i16 %x to <16 x i1> %mb = bitcast i16 %y to <16 x i1> %mc = and <16 x i1> %ma, %mb @@ -5487,14 +5487,14 @@ define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { ; CHECK-LABEL: mand16_mem: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw (%rdi), %k0 -; CHECK-NEXT: kmovw (%rsi), %k1 -; CHECK-NEXT: kandw %k1, %k0, %k2 -; CHECK-NEXT: kxorw %k1, %k0, %k0 -; CHECK-NEXT: korw %k0, %k2, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] +; CHECK-NEXT: kmovw (%rsi), %k1 # sched: [7:1.00] +; CHECK-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00] +; CHECK-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AX %AX %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: mand16_mem: ; SKX: ## BB#0: ; SKX-NEXT: kmovw (%rdi), %k0 @@ -5517,11 +5517,11 @@ define i8 @shuf_test1(i16 %v) nounwind { ; CHECK-LABEL: shuf_test1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kshiftrw $8, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AL %AL %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: shuf_test1: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -5538,13 +5538,13 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: zext_test1: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 -; CHECK-NEXT: kshiftlw $10, %k0, %k0 -; CHECK-NEXT: kshiftrw $15, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kshiftlw $10, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: andl $1, %eax # sched: [1:0.25] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_test1: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 @@ -5563,14 +5563,14 @@ define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: zext_test2: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 -; CHECK-NEXT: kshiftlw $10, %k0, %k0 -; CHECK-NEXT: kshiftrw $15, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kshiftlw $10, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: andl $1, %eax # sched: [1:0.25] ; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_test2: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 @@ -5590,14 +5590,14 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: zext_test3: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 -; CHECK-NEXT: kshiftlw $10, %k0, %k0 -; CHECK-NEXT: kshiftrw $15, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kshiftlw $10, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kshiftrw $15, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: andb $1, %al # sched: [1:0.25] ; CHECK-NEXT: # kill: %AL %AL %EAX ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: zext_test3: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 @@ -5617,11 +5617,11 @@ define i8 @conv1(<8 x i1>* %R) { ; CHECK-LABEL: conv1: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: kxnorw %k0, %k0, %k0 -; CHECK-NEXT: kmovb %k0, (%rdi) +; CHECK-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; CHECK-NEXT: movb $-2, %al # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: conv1: ; SKX: ## BB#0: ## %entry ; SKX-NEXT: kxnorw %k0, %k0, %k0 @@ -5642,12 +5642,12 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { ; CHECK-LABEL: test4: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 -; CHECK-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 -; CHECK-NEXT: kandnw %k0, %k1, %k0 +; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: kandnw %k0, %k1, %k0 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2d %k0, %xmm0 ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test4: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 @@ -5666,11 +5666,11 @@ define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { ; CHECK-LABEL: vcmp_test5: ; CHECK: # BB#0: -; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 -; CHECK-NEXT: vpcmpgtq %xmm3, %xmm2, %k1 -; CHECK-NEXT: kandnw %k1, %k0, %k0 +; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 # sched: [3:1.00] +; CHECK-NEXT: vpcmpgtq %xmm3, %xmm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: kandnw %k1, %k0, %k0 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2q %k0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vcmp_test5: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 @@ -5699,13 +5699,13 @@ define void @vcmp_test7(<8 x i1> %mask) { ; CHECK-LABEL: vcmp_test7: ; CHECK: # BB#0: # %allocas -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k0 +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] ; CHECK-NEXT: movb $85, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: korb %k1, %k0, %k0 -; CHECK-NEXT: ktestb %k0, %k0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: ktestb %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vcmp_test7: ; SKX: ## BB#0: ## %allocas ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -5730,19 +5730,19 @@ define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; CHECK-LABEL: vcmp_test8: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; CHECK-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; CHECK-NEXT: jg .LBB386_1 # sched: [1:1.00] +; CHECK-NEXT: jg .LBB386_1 # sched: [1:0.50] ; CHECK-NEXT: # BB#2: -; CHECK-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; CHECK-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2b %k0, %xmm0 ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; CHECK-NEXT: .LBB386_1: -; CHECK-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; CHECK-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2b %k0, %xmm0 ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vcmp_test8: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi @@ -5769,16 +5769,16 @@ ; CHECK-LABEL: vpmov_test9: ; CHECK: # BB#0: ; CHECK-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; CHECK-NEXT: jg .LBB387_1 # sched: [1:1.00] +; CHECK-NEXT: jg .LBB387_1 # sched: [1:0.50] ; CHECK-NEXT: # BB#2: -; CHECK-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: jmp .LBB387_3 # sched: [1:1.00] +; CHECK-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: jmp .LBB387_3 # sched: [1:0.50] ; CHECK-NEXT: .LBB387_1: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] ; CHECK-NEXT: .LBB387_3: -; CHECK-NEXT: vpmovb2m %xmm0, %k0 +; CHECK-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2b %k0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vpmov_test9: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi @@ -5805,16 +5805,16 @@ ; CHECK-LABEL: vmov_test11: ; CHECK: # BB#0: ; CHECK-NEXT: cmpl %esi, %edi # sched: [1:0.25] -; CHECK-NEXT: jg .LBB389_1 # sched: [1:1.00] +; CHECK-NEXT: jg .LBB389_1 # sched: [1:0.50] ; CHECK-NEXT: # BB#2: -; CHECK-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: jmp .LBB389_3 # sched: [1:1.00] +; CHECK-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: jmp .LBB389_3 # sched: [1:0.50] ; CHECK-NEXT: .LBB389_1: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] ; CHECK-NEXT: .LBB389_3: -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vmov_test11: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi @@ -5837,7 +5837,7 @@ ; CHECK-LABEL: vmov_test12: ; CHECK: # BB#0: ; CHECK-NEXT: movl %edi, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 0 %c = select i1 %b, i32 %x, i32 %y @@ -5848,7 +5848,7 @@ ; CHECK-LABEL: vmov_test13: ; CHECK: # BB#0: ; CHECK-NEXT: movl %esi, %eax # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = bitcast i16 21845 to <16 x i1> %b = extractelement <16 x i1> %a, i32 3 %c = select i1 %b, i32 %x, i32 %y @@ -5867,10 +5867,10 @@ ; CHECK-NEXT: movw $21845, %ax # imm = 0x5555 ; CHECK-NEXT: # sched: [1:0.25] ; CHECK-NEXT: movw $1, %cx # sched: [1:0.25] -; CHECK-NEXT: cmovgw %ax, %cx # sched: [1:1.00] -; CHECK-NEXT: kmovd %ecx, %k0 +; CHECK-NEXT: cmovgw %ax, %cx # sched: [1:0.50] +; CHECK-NEXT: kmovd %ecx, %k0 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2b %k0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vmov_test15: ; SKX: ## BB#0: ; SKX-NEXT: cmpl %esi, %edi @@ -5891,19 +5891,19 @@ ; ; CHECK-LABEL: vmov_test16: ; CHECK: # BB#0: -; CHECK-NEXT: kmovq %rdi, %k0 +; CHECK-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2b %k1, %zmm0 -; CHECK-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00] +; CHECK-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50] ; CHECK-NEXT: vpmovm2b %k0, %zmm1 ; CHECK-NEXT: movl $32, %eax # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; CHECK-NEXT: vpmovb2m %zmm0, %k0 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00] +; CHECK-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2b %k0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vmov_test16: ; SKX: ## BB#0: ; SKX-NEXT: kmovq %rdi, %k0 @@ -5929,20 +5929,20 @@ ; ; CHECK-LABEL: vmov_test17: ; CHECK: # BB#0: -; CHECK-NEXT: kmovq %rdi, %k0 +; CHECK-NEXT: kmovq %rdi, %k0 # sched: [1:1.00] ; CHECK-NEXT: cmpl %edx, %esi # sched: [1:0.25] -; CHECK-NEXT: setg %al # sched: [1:1.00] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: setg %al # sched: [1:0.50] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2b %k1, %zmm0 -; CHECK-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00] +; CHECK-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:0.50] ; CHECK-NEXT: vpmovm2b %k0, %zmm1 ; CHECK-NEXT: movl $32, %eax # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; CHECK-NEXT: vpmovb2m %zmm0, %k0 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [3:1.00] +; CHECK-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2b %k0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vmov_test17: ; SKX: ## BB#0: ; SKX-NEXT: kmovq %rdi, %k0 @@ -5969,28 +5969,28 @@ define <8 x i1> @vmov_test18(i8 %a, i16 %y) { ; CHECK-LABEL: vmov_test18: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kmovd %esi, %k1 -; CHECK-NEXT: kshiftlw $7, %k1, %k2 -; CHECK-NEXT: kshiftrw $15, %k2, %k2 -; CHECK-NEXT: kmovd %k2, %eax -; CHECK-NEXT: kshiftlw $6, %k1, %k1 -; CHECK-NEXT: kshiftrw $15, %k1, %k1 -; CHECK-NEXT: kmovd %k1, %ecx +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; CHECK-NEXT: kshiftlw $7, %k1, %k2 # sched: [3:1.00] +; CHECK-NEXT: kshiftrw $15, %k2, %k2 # sched: [3:1.00] +; CHECK-NEXT: kmovd %k2, %eax # sched: [3:1.00] +; CHECK-NEXT: kshiftlw $6, %k1, %k1 # sched: [3:1.00] +; CHECK-NEXT: kshiftrw $15, %k1, %k1 # sched: [3:1.00] +; CHECK-NEXT: kmovd %k1, %ecx # sched: [3:1.00] ; CHECK-NEXT: vpmovm2q %k0, %zmm0 -; CHECK-NEXT: kmovd %ecx, %k0 +; CHECK-NEXT: kmovd %ecx, %k0 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2q %k0, %zmm1 -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [5:0.50] -; CHECK-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 -; CHECK-NEXT: vpmovq2m %zmm2, %k0 -; CHECK-NEXT: kshiftlb $1, %k0, %k0 -; CHECK-NEXT: kshiftrb $1, %k0, %k0 -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: kshiftlb $7, %k1, %k1 -; CHECK-NEXT: korb %k1, %k0, %k0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [8:0.50] +; CHECK-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [3:1.00] +; CHECK-NEXT: vpmovq2m %zmm2, %k0 # sched: [1:1.00] +; CHECK-NEXT: kshiftlb $1, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kshiftrb $1, %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: kshiftlb $7, %k1, %k1 # sched: [3:1.00] +; CHECK-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vmov_test18: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -6026,10 +6026,10 @@ define <32 x i16> @vmov_test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { ; CHECK-LABEL: vmov_test21: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %ymm1, %k1 +; CHECK-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vmov_test21: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 @@ -6043,10 +6043,10 @@ define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) { ; CHECK-LABEL: vmov_test22: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vmov_test22: ; SKX: ## BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -6060,10 +6060,10 @@ define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) { ; CHECK-LABEL: vmov_test23: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: vmov_test23: ; SKX: ## BB#0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -6077,11 +6077,11 @@ define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { ; CHECK-LABEL: store_v1i1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kxnorw %k0, %k0, %k1 -; CHECK-NEXT: kxorw %k1, %k0, %k0 -; CHECK-NEXT: kmovb %k0, (%rsi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00] +; CHECK-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_v1i1: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -6097,11 +6097,11 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; CHECK-LABEL: store_v2i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kmovb %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_v2i1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 @@ -6117,11 +6117,11 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; CHECK-LABEL: store_v4i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kmovb %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_v4i1: ; SKX: ## BB#0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 @@ -6137,11 +6137,11 @@ define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { ; CHECK-LABEL: store_v8i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k0 -; CHECK-NEXT: knotb %k0, %k0 -; CHECK-NEXT: kmovb %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: knotb %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_v8i1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -6157,11 +6157,11 @@ define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { ; CHECK-LABEL: store_v16i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm0, %k0 -; CHECK-NEXT: knotw %k0, %k0 -; CHECK-NEXT: kmovw %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: knotw %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_v16i1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -6190,7 +6190,7 @@ define void @f1(i32 %c) { ; CHECK-LABEL: f1: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movzbl {{.*}}(%rip), %edi # sched: [1:0.50] +; CHECK-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] ; CHECK-NEXT: xorl $1, %edi # sched: [1:0.25] ; CHECK-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] ; CHECK-NEXT: jmp f2 # TAILCALL @@ -6210,7 +6210,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: andl $1, %edi # sched: [1:0.25] ; CHECK-NEXT: movb %dil, (%rsi) # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %c = trunc i16 %x to i1 store i1 %c, i1* %y ret void @@ -6221,7 +6221,7 @@ ; CHECK: # BB#0: ; CHECK-NEXT: andl $1, %edi # sched: [1:0.25] ; CHECK-NEXT: movb %dil, (%rsi) # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %c = trunc i8 %x to i1 store i1 %c, i1* %y ret void @@ -6232,9 +6232,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1497715861, %eax # imm = 0x59455495 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_build_vec_v32i1: ; SKX: ## BB#0: ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495 @@ -6250,9 +6250,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movabsq $6432645796886517060, %rax # imm = 0x5945594549549544 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_build_vec_v64i1: ; SKX: ## BB#0: ; SKX-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544 @@ -6266,20 +6266,20 @@ define void @ktest_1(<8 x double> %in, double * %base) { ; CHECK-LABEL: ktest_1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovupd (%rdi), %zmm1 # sched: [5:0.50] -; CHECK-NEXT: vcmpltpd %zmm0, %zmm1, %k1 -; CHECK-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: ktestb %k0, %k0 -; CHECK-NEXT: je .LBB410_2 # sched: [1:1.00] +; CHECK-NEXT: vmovupd (%rdi), %zmm1 # sched: [8:0.50] +; CHECK-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] +; CHECK-NEXT: ktestb %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: je .LBB410_2 # sched: [1:0.50] ; CHECK-NEXT: # BB#1: # %L1 -; CHECK-NEXT: vmovapd %zmm0, (%rdi) +; CHECK-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; CHECK-NEXT: .LBB410_2: # %L2 -; CHECK-NEXT: vmovapd %zmm0, 8(%rdi) +; CHECK-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: ktest_1: ; SKX: ## BB#0: ; SKX-NEXT: vmovupd (%rdi), %zmm1 @@ -6327,29 +6327,29 @@ ; ; CHECK-LABEL: ktest_2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovups (%rdi), %zmm2 # sched: [5:0.50] -; CHECK-NEXT: vmovups 64(%rdi), %zmm3 # sched: [5:0.50] -; CHECK-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; CHECK-NEXT: vcmpltps %zmm1, %zmm3, %k2 -; CHECK-NEXT: kunpckwd %k1, %k2, %k0 -; CHECK-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [5:0.50] -; CHECK-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [5:0.50] -; CHECK-NEXT: vcmpltps %zmm3, %zmm0, %k1 -; CHECK-NEXT: vcmpltps %zmm2, %zmm1, %k2 -; CHECK-NEXT: kunpckwd %k1, %k2, %k1 -; CHECK-NEXT: kord %k1, %k0, %k0 -; CHECK-NEXT: ktestd %k0, %k0 -; CHECK-NEXT: je .LBB411_2 # sched: [1:1.00] +; CHECK-NEXT: vmovups (%rdi), %zmm2 # sched: [8:0.50] +; CHECK-NEXT: vmovups 64(%rdi), %zmm3 # sched: [8:0.50] +; CHECK-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] +; CHECK-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50] +; CHECK-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00] +; CHECK-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] +; CHECK-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00] +; CHECK-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: ktestd %k0, %k0 # sched: [3:1.00] +; CHECK-NEXT: je .LBB411_2 # sched: [1:0.50] ; CHECK-NEXT: # BB#1: # %L1 -; CHECK-NEXT: vmovaps %zmm0, (%rdi) -; CHECK-NEXT: vmovaps %zmm1, 64(%rdi) +; CHECK-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; CHECK-NEXT: .LBB411_2: # %L2 -; CHECK-NEXT: vmovaps %zmm0, 4(%rdi) -; CHECK-NEXT: vmovaps %zmm1, 68(%rdi) +; CHECK-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: ktest_2: ; SKX: ## BB#0: ; SKX-NEXT: vmovups (%rdi), %zmm2 @@ -6405,9 +6405,9 @@ define <8 x i64> @load_8i1(<8 x i1>* %a) { ; CHECK-LABEL: load_8i1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb (%rdi), %k0 +; CHECK-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] ; CHECK-NEXT: vpmovm2q %k0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: load_8i1: ; SKX: ## BB#0: ; SKX-NEXT: kmovb (%rdi), %k0 @@ -6421,9 +6421,9 @@ define <16 x i32> @load_16i1(<16 x i1>* %a) { ; CHECK-LABEL: load_16i1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovw (%rdi), %k0 +; CHECK-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00] ; CHECK-NEXT: vpmovm2d %k0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: load_16i1: ; SKX: ## BB#0: ; SKX-NEXT: kmovw (%rdi), %k0 @@ -6437,9 +6437,9 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) { ; CHECK-LABEL: load_2i1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb (%rdi), %k0 +; CHECK-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] ; CHECK-NEXT: vpmovm2q %k0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: load_2i1: ; SKX: ## BB#0: ; SKX-NEXT: kmovb (%rdi), %k0 @@ -6453,9 +6453,9 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) { ; CHECK-LABEL: load_4i1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovb (%rdi), %k0 +; CHECK-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00] ; CHECK-NEXT: vpmovm2d %k0, %xmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: load_4i1: ; SKX: ## BB#0: ; SKX-NEXT: kmovb (%rdi), %k0 @@ -6469,9 +6469,9 @@ define <32 x i16> @load_32i1(<32 x i1>* %a) { ; CHECK-LABEL: load_32i1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd (%rdi), %k0 +; CHECK-NEXT: kmovd (%rdi), %k0 # sched: [7:1.00] ; CHECK-NEXT: vpmovm2w %k0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: load_32i1: ; SKX: ## BB#0: ; SKX-NEXT: kmovd (%rdi), %k0 @@ -6485,9 +6485,9 @@ define <64 x i8> @load_64i1(<64 x i1>* %a) { ; CHECK-LABEL: load_64i1: ; CHECK: # BB#0: -; CHECK-NEXT: kmovq (%rdi), %k0 +; CHECK-NEXT: kmovq (%rdi), %k0 # sched: [7:1.00] ; CHECK-NEXT: vpmovm2b %k0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: load_64i1: ; SKX: ## BB#0: ; SKX-NEXT: kmovq (%rdi), %k0 @@ -6501,10 +6501,10 @@ define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { ; CHECK-LABEL: store_8i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_8i1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -6518,10 +6518,10 @@ define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { ; CHECK-LABEL: store_8i1_1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovw2m %xmm0, %k0 -; CHECK-NEXT: kmovb %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_8i1_1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -6536,10 +6536,10 @@ define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { ; CHECK-LABEL: store_16i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %xmm0, %k0 -; CHECK-NEXT: kmovw %k0, (%rdi) -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_16i1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -6553,11 +6553,11 @@ define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { ; CHECK-LABEL: store_32i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: vpmovb2m %ymm0, %k0 -; CHECK-NEXT: kmovd %k0, (%rdi) +; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_32i1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 @@ -6572,11 +6572,11 @@ define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { ; CHECK-LABEL: store_32i1_1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $15, %zmm0, %zmm0 -; CHECK-NEXT: vpmovw2m %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, (%rdi) +; CHECK-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_32i1_1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 @@ -6594,11 +6594,11 @@ ; ; CHECK-LABEL: store_64i1: ; CHECK: # BB#0: -; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0 -; CHECK-NEXT: vpmovb2m %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, (%rdi) +; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:0.50] +; CHECK-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: store_64i1: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 @@ -6613,12 +6613,12 @@ define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) { ; CHECK-LABEL: test_bitcast_v8i1_zext: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovb %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: addl %eax, %eax # sched: [1:0.25] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_bitcast_v8i1_zext: ; SKX: ## BB#0: ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -6638,12 +6638,12 @@ define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) { ; CHECK-LABEL: test_bitcast_v16i1_zext: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k0, %eax +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00] +; CHECK-NEXT: kmovw %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: addl %eax, %eax # sched: [1:0.25] ; CHECK-NEXT: vzeroupper # sched: [4:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %v1 = icmp eq <16 x i32> %a, zeroinitializer %mask1 = bitcast <16 x i1> %v1 to i16 %val = zext i16 %mask1 to i32 @@ -6654,12 +6654,12 @@ define i16 @test_v16i1_add(i16 %x, i16 %y) { ; CHECK-LABEL: test_v16i1_add: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kmovd %esi, %k1 -; CHECK-NEXT: kxorw %k1, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; CHECK-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AX %AX %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_v16i1_add: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -6678,12 +6678,12 @@ define i16 @test_v16i1_sub(i16 %x, i16 %y) { ; CHECK-LABEL: test_v16i1_sub: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kmovd %esi, %k1 -; CHECK-NEXT: kxorw %k1, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; CHECK-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AX %AX %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_v16i1_sub: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -6702,12 +6702,12 @@ define i16 @test_v16i1_mul(i16 %x, i16 %y) { ; CHECK-LABEL: test_v16i1_mul: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kmovd %esi, %k1 -; CHECK-NEXT: kandw %k1, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; CHECK-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AX %AX %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_v16i1_mul: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -6726,12 +6726,12 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) { ; CHECK-LABEL: test_v8i1_add: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kmovd %esi, %k1 -; CHECK-NEXT: kxorb %k1, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; CHECK-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AL %AL %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_v8i1_add: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -6750,12 +6750,12 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) { ; CHECK-LABEL: test_v8i1_sub: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kmovd %esi, %k1 -; CHECK-NEXT: kxorb %k1, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; CHECK-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AL %AL %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_v8i1_sub: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -6774,12 +6774,12 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) { ; CHECK-LABEL: test_v8i1_mul: ; CHECK: # BB#0: -; CHECK-NEXT: kmovd %edi, %k0 -; CHECK-NEXT: kmovd %esi, %k1 -; CHECK-NEXT: kandb %k1, %k0, %k0 -; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: kmovd %edi, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %esi, %k1 # sched: [1:1.00] +; CHECK-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00] +; CHECK-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; CHECK-NEXT: # kill: %AL %AL %EAX -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] ; SKX-LABEL: test_v8i1_mul: ; SKX: ## BB#0: ; SKX-NEXT: kmovd %edi, %k0 @@ -6798,8 +6798,8 @@ define <16 x i32> @_inreg16xi32(i32 %a) { ; CHECK-LABEL: _inreg16xi32: ; CHECK: # BB#0: -; CHECK-NEXT: vpbroadcastd %edi, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = insertelement <16 x i32> undef, i32 %a, i32 0 %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer ret <16 x i32> %c @@ -6808,8 +6808,8 @@ define <8 x i64> @_inreg8xi64(i64 %a) { ; CHECK-LABEL: _inreg8xi64: ; CHECK: # BB#0: -; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = insertelement <8 x i64> undef, i64 %a, i32 0 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer ret <8 x i64> %c @@ -6818,8 +6818,8 @@ define <16 x float> @_ss16xfloat_v4(<4 x float> %a) { ; CHECK-LABEL: _ss16xfloat_v4: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer ret <16 x float> %b } @@ -6827,8 +6827,8 @@ define <16 x float> @_inreg16xfloat(float %a) { ; CHECK-LABEL: _inreg16xfloat: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = insertelement <16 x float> undef, float %a, i32 0 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer ret <16 x float> %c @@ -6837,11 +6837,11 @@ define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) { ; CHECK-LABEL: _ss16xfloat_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 -; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %b = insertelement <16 x float> undef, float %a, i32 0 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer @@ -6852,10 +6852,10 @@ define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) { ; CHECK-LABEL: _ss16xfloat_maskz: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %b = insertelement <16 x float> undef, float %a, i32 0 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer @@ -6866,8 +6866,8 @@ define <16 x float> @_ss16xfloat_load(float* %a.ptr) { ; CHECK-LABEL: _ss16xfloat_load: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load float, float* %a.ptr %b = insertelement <16 x float> undef, float %a, i32 0 %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer @@ -6877,10 +6877,10 @@ define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) { ; CHECK-LABEL: _ss16xfloat_mask_load: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 -; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load float, float* %a.ptr %mask = icmp ne <16 x i32> %mask1, zeroinitializer %b = insertelement <16 x float> undef, float %a, i32 0 @@ -6892,10 +6892,10 @@ define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) { ; CHECK-LABEL: _ss16xfloat_maskz_load: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 -; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load float, float* %a.ptr %mask = icmp ne <16 x i32> %mask1, zeroinitializer %b = insertelement <16 x float> undef, float %a, i32 0 @@ -6907,8 +6907,8 @@ define <8 x double> @_inreg8xdouble(double %a) { ; CHECK-LABEL: _inreg8xdouble: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = insertelement <8 x double> undef, double %a, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer ret <8 x double> %c @@ -6917,11 +6917,11 @@ define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) { ; CHECK-LABEL: _sd8xdouble_mask: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %ymm3, %ymm2, %k1 -; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} +; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %ymm3, %ymm2, %k1 # sched: [3:1.00] +; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %b = insertelement <8 x double> undef, double %a, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer @@ -6932,10 +6932,10 @@ define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) { ; CHECK-LABEL: _sd8xdouble_maskz: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 -; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %b = insertelement <8 x double> undef, double %a, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer @@ -6946,8 +6946,8 @@ define <8 x double> @_sd8xdouble_load(double* %a.ptr) { ; CHECK-LABEL: _sd8xdouble_load: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load double, double* %a.ptr %b = insertelement <8 x double> undef, double %a, i32 0 %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer @@ -6957,10 +6957,10 @@ define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) { ; CHECK-LABEL: _sd8xdouble_mask_load: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 -; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 # sched: [3:1.00] +; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load double, double* %a.ptr %mask = icmp ne <8 x i32> %mask1, zeroinitializer %b = insertelement <8 x double> undef, double %a, i32 0 @@ -6972,10 +6972,10 @@ define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) { ; CHECK-LABEL: _sd8xdouble_maskz_load: ; CHECK: # BB#0: -; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.50] -; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 -; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 # sched: [3:1.00] +; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] +; CHECK-NEXT: retq # sched: [7:1.00] %a = load double, double* %a.ptr %mask = icmp ne <8 x i32> %mask1, zeroinitializer %b = insertelement <8 x double> undef, double %a, i32 0 @@ -6987,8 +6987,8 @@ define <16 x i32> @_xmm16xi32(<16 x i32> %a) { ; CHECK-LABEL: _xmm16xi32: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer ret <16 x i32> %b } @@ -6996,8 +6996,8 @@ define <16 x float> @_xmm16xfloat(<16 x float> %a) { ; CHECK-LABEL: _xmm16xfloat: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer ret <16 x float> %b } @@ -7005,12 +7005,12 @@ define <16 x i32> @test_vbroadcast() { ; CHECK-LABEL: test_vbroadcast: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; CHECK-NEXT: vcmpunordps %zmm0, %zmm0, %k0 +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] +; CHECK-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] ; CHECK-NEXT: vpmovm2d %k0, %zmm0 -; CHECK-NEXT: knotw %k0, %k1 +; CHECK-NEXT: knotw %k0, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %0 = sext <16 x i1> zeroinitializer to <16 x i32> %1 = fcmp uno <16 x float> undef, zeroinitializer @@ -7024,8 +7024,8 @@ define <8 x double> @test_set1_pd(double %d) #2 { ; CHECK-LABEL: test_set1_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %vecinit.i = insertelement <8 x double> undef, double %d, i32 0 %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1 @@ -7041,8 +7041,8 @@ define <8 x i64> @test_set1_epi64(i64 %d) #2 { ; CHECK-LABEL: test_set1_epi64: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0 %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1 @@ -7058,8 +7058,8 @@ define <16 x float> @test_set1_ps(float %f) #2 { ; CHECK-LABEL: test_set1_ps: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %vecinit.i = insertelement <16 x float> undef, float %f, i32 0 %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1 @@ -7083,8 +7083,8 @@ define <16 x i32> @test_set1_epi32(i32 %f) #2 { ; CHECK-LABEL: test_set1_epi32: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vpbroadcastd %edi, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0 %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1 @@ -7110,8 +7110,8 @@ define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) { ; CHECK-LABEL: test_mm512_broadcastsd_pd: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] entry: %0 = extractelement <2 x double> %a, i32 0 %vecinit.i = insertelement <8 x double> undef, double %0, i32 0 @@ -7128,8 +7128,8 @@ define <16 x float> @suff_test1(<8 x float>%a) { ; CHECK-LABEL: suff_test1: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer ret <16 x float>%res } @@ -7137,8 +7137,8 @@ define <8 x double> @suff_test2(<4 x double>%a) { ; CHECK-LABEL: suff_test2: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer ret <8 x double>%res } @@ -7146,8 +7146,8 @@ define <64 x i8> @_invec32xi8(<32 x i8>%a) { ; CHECK-LABEL: _invec32xi8: ; CHECK: # BB#0: -; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer ret <64 x i8>%res } @@ -7155,8 +7155,8 @@ define <32 x i16> @_invec16xi16(<16 x i16>%a) { ; CHECK-LABEL: _invec16xi16: ; CHECK: # BB#0: -; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer ret <32 x i16>%res } @@ -7164,8 +7164,8 @@ define <16 x i32> @_invec8xi32(<8 x i32>%a) { ; CHECK-LABEL: _invec8xi32: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer ret <16 x i32>%res } @@ -7173,8 +7173,8 @@ define <8 x i64> @_invec4xi64(<4 x i64>%a) { ; CHECK-LABEL: _invec4xi64: ; CHECK: # BB#0: -; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer ret <8 x i64>%res } @@ -7186,13 +7186,14 @@ ; CHECK-NEXT: subq $24, %rsp # sched: [1:0.25] ; CHECK-NEXT: .Lcfi0: ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; CHECK-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; CHECK-NEXT: # sched: [1:1.00] ; CHECK-NEXT: callq func_f32 -; CHECK-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:?] +; CHECK-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] +; CHECK-NEXT: # sched: [8:0.50] ; CHECK-NEXT: addq $24, %rsp # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = fadd float %x, %x call void @func_f32(float %a) %b = insertelement <16 x float> undef, float %a, i32 0 @@ -7207,13 +7208,14 @@ ; CHECK-NEXT: subq $24, %rsp # sched: [1:0.25] ; CHECK-NEXT: .Lcfi1: ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; CHECK-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; CHECK-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; CHECK-NEXT: # sched: [1:1.00] ; CHECK-NEXT: callq func_f64 -; CHECK-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:?] +; CHECK-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] +; CHECK-NEXT: # sched: [8:0.50] ; CHECK-NEXT: addq $24, %rsp # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %a = fadd double %x, %x call void @func_f64(double %a) %b = insertelement <8 x double> undef, double %a, i32 0 Index: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -5,22 +5,22 @@ define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) { ; CHECK-LABEL: test_16xi16_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [1:0.50] -; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] +; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %vec2) { ; CHECK-LABEL: test_masked_16xi16_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] ; CHECK-NEXT: movw $-10197, %ax # imm = 0xD82B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm1 {%k1} # sched: [6:2.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -29,12 +29,12 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec) { ; CHECK-LABEL: test_masked_z_16xi16_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] ; CHECK-NEXT: movw $-10197, %ax # imm = 0xD82B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -42,13 +42,13 @@ define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %vec2) { ; CHECK-LABEL: test_masked_16xi16_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] ; CHECK-NEXT: movw $-15864, %ax # imm = 0xC208 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm1 {%k1} # sched: [6:2.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -57,12 +57,12 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec) { ; CHECK-LABEL: test_masked_z_16xi16_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] ; CHECK-NEXT: movw $-15864, %ax # imm = 0xC208 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -70,13 +70,13 @@ define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %vec2) { ; CHECK-LABEL: test_masked_16xi16_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] ; CHECK-NEXT: movw $27562, %ax # imm = 0x6BAA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm1 {%k1} # sched: [6:2.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -85,12 +85,12 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec) { ; CHECK-LABEL: test_masked_z_16xi16_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] ; CHECK-NEXT: movw $27562, %ax # imm = 0x6BAA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -98,22 +98,22 @@ define <16 x i16> @test_16xi16_perm_mask3(<16 x i16> %vec) { ; CHECK-LABEL: test_16xi16_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [1:0.50] -; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] +; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %vec2) { ; CHECK-LABEL: test_masked_16xi16_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] ; CHECK-NEXT: movw $16968, %ax # imm = 0x4248 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm1 {%k1} # sched: [6:2.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -122,12 +122,12 @@ define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec) { ; CHECK-LABEL: test_masked_z_16xi16_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] ; CHECK-NEXT: movw $16968, %ax # imm = 0x4248 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -135,9 +135,9 @@ define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) { ; CHECK-LABEL: test_16xi16_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [1:0.50] -; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] +; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -145,12 +145,12 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2) { ; CHECK-LABEL: test_masked_16xi16_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] ; CHECK-NEXT: movw $-27811, %ax # imm = 0x935D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -160,12 +160,12 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp) { ; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50] ; CHECK-NEXT: movw $-27811, %ax # imm = 0x935D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -175,12 +175,12 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2) { ; CHECK-LABEL: test_masked_16xi16_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] ; CHECK-NEXT: movw $19027, %ax # imm = 0x4A53 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -190,12 +190,12 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp) { ; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50] ; CHECK-NEXT: movw $19027, %ax # imm = 0x4A53 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -205,12 +205,12 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2) { ; CHECK-LABEL: test_masked_16xi16_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] ; CHECK-NEXT: movw $12412, %ax # imm = 0x307C ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -220,12 +220,12 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp) { ; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50] ; CHECK-NEXT: movw $12412, %ax # imm = 0x307C ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -235,9 +235,9 @@ define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) { ; CHECK-LABEL: test_16xi16_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [1:0.50] -; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] +; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -245,12 +245,12 @@ define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2) { ; CHECK-LABEL: test_masked_16xi16_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] ; CHECK-NEXT: movw $12238, %ax # imm = 0x2FCE ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -260,12 +260,12 @@ define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp) { ; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50] ; CHECK-NEXT: movw $12238, %ax # imm = 0x2FCE ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -275,22 +275,22 @@ define <32 x i16> @test_32xi16_perm_mask0(<32 x i16> %vec) { ; CHECK-LABEL: test_32xi16_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [5:0.50] -; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] +; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %vec2) { ; CHECK-LABEL: test_masked_32xi16_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] ; CHECK-NEXT: movl $948454498, %eax # imm = 0x38884462 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm1 {%k1} # sched: [6:2.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -299,12 +299,12 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec) { ; CHECK-LABEL: test_masked_z_32xi16_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50] ; CHECK-NEXT: movl $948454498, %eax # imm = 0x38884462 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -312,13 +312,13 @@ define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %vec2) { ; CHECK-LABEL: test_masked_32xi16_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50] ; CHECK-NEXT: movl $-1516442487, %eax # imm = 0xA59CEC89 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm1 {%k1} # sched: [6:2.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -327,12 +327,12 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec) { ; CHECK-LABEL: test_masked_z_32xi16_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50] ; CHECK-NEXT: movl $-1516442487, %eax # imm = 0xA59CEC89 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -340,13 +340,13 @@ define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %vec2) { ; CHECK-LABEL: test_masked_32xi16_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50] ; CHECK-NEXT: movl $1504501134, %eax # imm = 0x59ACDD8E ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm1 {%k1} # sched: [6:2.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -355,12 +355,12 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec) { ; CHECK-LABEL: test_masked_z_32xi16_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50] ; CHECK-NEXT: movl $1504501134, %eax # imm = 0x59ACDD8E ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -368,22 +368,22 @@ define <32 x i16> @test_32xi16_perm_mask3(<32 x i16> %vec) { ; CHECK-LABEL: test_32xi16_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [5:0.50] -; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] +; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %vec2) { ; CHECK-LABEL: test_masked_32xi16_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] ; CHECK-NEXT: movl $774459490, %eax # imm = 0x2E295062 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm1 {%k1} # sched: [6:2.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -392,12 +392,12 @@ define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec) { ; CHECK-LABEL: test_masked_z_32xi16_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50] ; CHECK-NEXT: movl $774459490, %eax # imm = 0x2E295062 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [6:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -405,9 +405,9 @@ define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) { ; CHECK-LABEL: test_32xi16_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [5:0.50] -; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] +; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -415,12 +415,12 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2) { ; CHECK-LABEL: test_masked_32xi16_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] ; CHECK-NEXT: movl $1431978123, %eax # imm = 0x555A408B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -430,12 +430,12 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp) { ; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50] ; CHECK-NEXT: movl $1431978123, %eax # imm = 0x555A408B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -445,12 +445,12 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2) { ; CHECK-LABEL: test_masked_32xi16_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50] ; CHECK-NEXT: movl $-903561653, %eax # imm = 0xCA24BE4B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -460,12 +460,12 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp) { ; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50] ; CHECK-NEXT: movl $-903561653, %eax # imm = 0xCA24BE4B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -475,12 +475,12 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2) { ; CHECK-LABEL: test_masked_32xi16_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50] ; CHECK-NEXT: movl $-1209035774, %eax # imm = 0xB7EF9402 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -490,12 +490,12 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp) { ; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50] ; CHECK-NEXT: movl $-1209035774, %eax # imm = 0xB7EF9402 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -505,9 +505,9 @@ define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) { ; CHECK-LABEL: test_32xi16_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [5:0.50] -; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] +; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -515,12 +515,12 @@ define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2) { ; CHECK-LABEL: test_masked_32xi16_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] ; CHECK-NEXT: movl $1452798329, %eax # imm = 0x5697F179 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -530,12 +530,12 @@ define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp) { ; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50] ; CHECK-NEXT: movl $1452798329, %eax # imm = 0x5697F179 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [13:2.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -545,21 +545,21 @@ define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) { ; CHECK-LABEL: test_8xi32_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50] ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2) { ; CHECK-LABEL: test_masked_8xi32_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50] ; CHECK-NEXT: movb $-53, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res @@ -568,11 +568,11 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50] ; CHECK-NEXT: movb $-53, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -580,12 +580,12 @@ define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2) { ; CHECK-LABEL: test_masked_8xi32_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50] ; CHECK-NEXT: movb $-89, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res @@ -594,11 +594,11 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,1,2,6,0,0,3] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,1,2,6,0,0,3] sched: [7:0.50] ; CHECK-NEXT: movb $-89, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -606,12 +606,12 @@ define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2) { ; CHECK-LABEL: test_masked_8xi32_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50] ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res @@ -620,11 +620,11 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [3,6,5,5,1,7,3,4] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [3,6,5,5,1,7,3,4] sched: [7:0.50] ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -632,21 +632,21 @@ define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) { ; CHECK-LABEL: test_8xi32_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50] ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2) { ; CHECK-LABEL: test_masked_8xi32_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50] ; CHECK-NEXT: movb $47, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res @@ -655,11 +655,11 @@ define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50] ; CHECK-NEXT: movb $47, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -667,9 +667,9 @@ define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) { ; CHECK-LABEL: test_8xi32_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [1:0.50] -; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res @@ -677,11 +677,11 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2) { ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50] ; CHECK-NEXT: movb $-116, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 @@ -691,11 +691,11 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50] ; CHECK-NEXT: movb $-116, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -705,11 +705,11 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2) { ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50] ; CHECK-NEXT: movb $89, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 @@ -719,11 +719,11 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [4,6,1,7,6,7,6,5] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [4,6,1,7,6,7,6,5] sched: [7:0.50] ; CHECK-NEXT: movb $89, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -733,11 +733,11 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2) { ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50] ; CHECK-NEXT: movb $98, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 @@ -747,11 +747,11 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [6,4,6,1,6,3,6,3] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [6,4,6,1,6,3,6,3] sched: [7:0.50] ; CHECK-NEXT: movb $98, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -761,9 +761,9 @@ define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) { ; CHECK-LABEL: test_8xi32_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [1:0.50] -; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res @@ -771,11 +771,11 @@ define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2) { ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50] ; CHECK-NEXT: movb $-58, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 @@ -785,11 +785,11 @@ define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [1:0.50] +; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50] ; CHECK-NEXT: movb $-58, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -799,22 +799,22 @@ define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) { ; CHECK-LABEL: test_16xi32_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [5:0.50] -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2) { ; CHECK-LABEL: test_masked_16xi32_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] ; CHECK-NEXT: movw $-28063, %ax # imm = 0x9261 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res @@ -823,12 +823,12 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50] ; CHECK-NEXT: movw $-28063, %ax # imm = 0x9261 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -836,13 +836,13 @@ define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2) { ; CHECK-LABEL: test_masked_16xi32_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50] ; CHECK-NEXT: movw $14154, %ax # imm = 0x374A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res @@ -851,12 +851,12 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50] ; CHECK-NEXT: movw $14154, %ax # imm = 0x374A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -864,13 +864,13 @@ define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2) { ; CHECK-LABEL: test_masked_16xi32_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50] ; CHECK-NEXT: movw $6126, %ax # imm = 0x17EE ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res @@ -879,12 +879,12 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50] ; CHECK-NEXT: movw $6126, %ax # imm = 0x17EE ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -892,22 +892,22 @@ define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) { ; CHECK-LABEL: test_16xi32_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [5:0.50] -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2) { ; CHECK-LABEL: test_masked_16xi32_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] ; CHECK-NEXT: movw $-11837, %ax # imm = 0xD1C3 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res @@ -916,12 +916,12 @@ define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50] ; CHECK-NEXT: movw $-11837, %ax # imm = 0xD1C3 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -929,9 +929,9 @@ define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; CHECK-LABEL: test_16xi32_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [5:0.50] -; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] +; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res @@ -939,12 +939,12 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2) { ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] ; CHECK-NEXT: movw $19075, %ax # imm = 0x4A83 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 @@ -954,12 +954,12 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50] ; CHECK-NEXT: movw $19075, %ax # imm = 0x4A83 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -969,12 +969,12 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2) { ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50] ; CHECK-NEXT: movw $27511, %ax # imm = 0x6B77 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 @@ -984,12 +984,12 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm0 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm0 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50] ; CHECK-NEXT: movw $27511, %ax # imm = 0x6B77 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -999,12 +999,12 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2) { ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50] ; CHECK-NEXT: movw $3032, %ax # imm = 0xBD8 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 @@ -1014,12 +1014,12 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm0 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm0 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50] ; CHECK-NEXT: movw $3032, %ax # imm = 0xBD8 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -1029,9 +1029,9 @@ define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; CHECK-LABEL: test_16xi32_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [5:0.50] -; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] +; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res @@ -1039,12 +1039,12 @@ define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2) { ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] ; CHECK-NEXT: movw $8666, %ax # imm = 0x21DA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 @@ -1054,12 +1054,12 @@ define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [5:0.50] +; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50] ; CHECK-NEXT: movw $8666, %ax # imm = 0x21DA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -1070,7 +1070,7 @@ ; CHECK-LABEL: test_4xi64_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> ret <4 x i64> %res } @@ -1078,10 +1078,10 @@ ; CHECK-LABEL: test_masked_4xi64_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [3:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec2 ret <4 x i64> %res @@ -1091,9 +1091,9 @@ ; CHECK-LABEL: test_masked_z_4xi64_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer ret <4 x i64> %res @@ -1102,10 +1102,10 @@ ; CHECK-LABEL: test_masked_4xi64_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [3:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec2 ret <4 x i64> %res @@ -1115,9 +1115,9 @@ ; CHECK-LABEL: test_masked_z_4xi64_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer ret <4 x i64> %res @@ -1126,10 +1126,10 @@ ; CHECK-LABEL: test_masked_4xi64_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [3:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec2 ret <4 x i64> %res @@ -1139,9 +1139,9 @@ ; CHECK-LABEL: test_masked_z_4xi64_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer ret <4 x i64> %res @@ -1150,7 +1150,7 @@ ; CHECK-LABEL: test_4xi64_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> ret <4 x i64> %res } @@ -1158,10 +1158,10 @@ ; CHECK-LABEL: test_masked_4xi64_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [3:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec2 ret <4 x i64> %res @@ -1171,9 +1171,9 @@ ; CHECK-LABEL: test_masked_z_4xi64_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer ret <4 x i64> %res @@ -1181,8 +1181,8 @@ define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) { ; CHECK-LABEL: test_4xi64_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> ret <4 x i64> %res @@ -1191,9 +1191,9 @@ ; CHECK-LABEL: test_masked_4xi64_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec2 @@ -1204,9 +1204,9 @@ ; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -1217,9 +1217,9 @@ ; CHECK-LABEL: test_masked_4xi64_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec2 @@ -1230,9 +1230,9 @@ ; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -1243,9 +1243,9 @@ ; CHECK-LABEL: test_masked_4xi64_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec2 @@ -1256,9 +1256,9 @@ ; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -1268,8 +1268,8 @@ define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) { ; CHECK-LABEL: test_4xi64_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> ret <4 x i64> %res @@ -1278,9 +1278,9 @@ ; CHECK-LABEL: test_masked_4xi64_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec2 @@ -1291,9 +1291,9 @@ ; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i64>, <4 x i64>* %vp %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -1303,21 +1303,21 @@ define <8 x i64> @test_8xi64_perm_mask0(<8 x i64> %vec) { ; CHECK-LABEL: test_8xi64_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [5:0.50] -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [8:0.50] +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res } define <8 x i64> @test_masked_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %vec2) { ; CHECK-LABEL: test_masked_8xi64_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [8:0.50] ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 ret <8 x i64> %res @@ -1326,11 +1326,11 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec) { ; CHECK-LABEL: test_masked_z_8xi64_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [8:0.50] ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -1339,10 +1339,10 @@ ; CHECK-LABEL: test_masked_8xi64_perm_imm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-122, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 ret <8 x i64> %res @@ -1352,9 +1352,9 @@ ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-122, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -1362,12 +1362,12 @@ define <8 x i64> @test_masked_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %vec2) { ; CHECK-LABEL: test_masked_8xi64_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [8:0.50] ; CHECK-NEXT: movb $17, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 ret <8 x i64> %res @@ -1376,11 +1376,11 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask2(<8 x i64> %vec) { ; CHECK-LABEL: test_masked_z_8xi64_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,3,7,3,3,5,4,1] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,3,7,3,3,5,4,1] sched: [8:0.50] ; CHECK-NEXT: movb $17, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -1388,8 +1388,8 @@ define <8 x i64> @test_8xi64_perm_imm_mask3(<8 x i64> %vec) { ; CHECK-LABEL: test_8xi64_perm_imm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res } @@ -1397,10 +1397,10 @@ ; CHECK-LABEL: test_masked_8xi64_perm_imm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 ret <8 x i64> %res @@ -1410,9 +1410,9 @@ ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -1420,12 +1420,12 @@ define <8 x i64> @test_masked_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %vec2) { ; CHECK-LABEL: test_masked_8xi64_perm_mask4: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [8:0.50] ; CHECK-NEXT: movb $-81, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 ret <8 x i64> %res @@ -1434,11 +1434,11 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec) { ; CHECK-LABEL: test_masked_z_8xi64_perm_mask4: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [6,3,1,1,7,4,0,3] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [6,3,1,1,7,4,0,3] sched: [8:0.50] ; CHECK-NEXT: movb $-81, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -1447,10 +1447,10 @@ ; CHECK-LABEL: test_masked_8xi64_perm_imm_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-67, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 ret <8 x i64> %res @@ -1460,9 +1460,9 @@ ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-67, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -1470,21 +1470,21 @@ define <8 x i64> @test_8xi64_perm_mask6(<8 x i64> %vec) { ; CHECK-LABEL: test_8xi64_perm_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [5:0.50] -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [8:0.50] +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res } define <8 x i64> @test_masked_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %vec2) { ; CHECK-LABEL: test_masked_8xi64_perm_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [8:0.50] ; CHECK-NEXT: movb $-86, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 ret <8 x i64> %res @@ -1493,11 +1493,11 @@ define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec) { ; CHECK-LABEL: test_masked_z_8xi64_perm_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [8:0.50] ; CHECK-NEXT: movb $-86, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -1506,10 +1506,10 @@ ; CHECK-LABEL: test_masked_8xi64_perm_imm_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 ret <8 x i64> %res @@ -1519,9 +1519,9 @@ ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -1529,9 +1529,9 @@ define <8 x i64> @test_8xi64_perm_mem_mask0(<8 x i64>* %vp) { ; CHECK-LABEL: test_8xi64_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [5:0.50] -; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [8:0.50] +; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res @@ -1539,11 +1539,11 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %vec2) { ; CHECK-LABEL: test_masked_8xi64_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [8:0.50] ; CHECK-NEXT: movb $-108, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 @@ -1553,11 +1553,11 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [8:0.50] ; CHECK-NEXT: movb $-108, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -1568,9 +1568,9 @@ ; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $125, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 @@ -1581,9 +1581,9 @@ ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $125, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -1593,11 +1593,11 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %vec2) { ; CHECK-LABEL: test_masked_8xi64_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [8:0.50] ; CHECK-NEXT: movb $-77, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 @@ -1607,11 +1607,11 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,2,1,4,1,1,5,5] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,2,1,4,1,1,5,5] sched: [8:0.50] ; CHECK-NEXT: movb $-77, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -1621,8 +1621,8 @@ define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) { ; CHECK-LABEL: test_8xi64_perm_imm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res @@ -1631,9 +1631,9 @@ ; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $55, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 @@ -1644,9 +1644,9 @@ ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $55, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -1656,11 +1656,11 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %vec2) { ; CHECK-LABEL: test_masked_8xi64_perm_mem_mask4: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [8:0.50] ; CHECK-NEXT: movb $68, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 @@ -1670,11 +1670,11 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask4: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [5,0,7,0,3,5,0,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [5,0,7,0,3,5,0,6] sched: [8:0.50] ; CHECK-NEXT: movb $68, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -1685,9 +1685,9 @@ ; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 @@ -1698,9 +1698,9 @@ ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -1710,9 +1710,9 @@ define <8 x i64> @test_8xi64_perm_mem_mask6(<8 x i64>* %vp) { ; CHECK-LABEL: test_8xi64_perm_mem_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [5:0.50] -; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [8:0.50] +; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> ret <8 x i64> %res @@ -1720,11 +1720,11 @@ define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %vec2) { ; CHECK-LABEL: test_masked_8xi64_perm_mem_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [8:0.50] ; CHECK-NEXT: movb $42, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 @@ -1734,11 +1734,11 @@ define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [8:0.50] ; CHECK-NEXT: movb $42, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -1749,9 +1749,9 @@ ; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec2 @@ -1762,9 +1762,9 @@ ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i64>, <8 x i64>* %vp %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -1774,21 +1774,21 @@ define <8 x float> @test_8xfloat_perm_mask0(<8 x float> %vec) { ; CHECK-LABEL: test_8xfloat_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50] ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> ret <8 x float> %res } define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2) { ; CHECK-LABEL: test_masked_8xfloat_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50] ; CHECK-NEXT: movb $33, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec2 ret <8 x float> %res @@ -1797,11 +1797,11 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50] ; CHECK-NEXT: movb $33, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -1809,12 +1809,12 @@ define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> %vec2) { ; CHECK-LABEL: test_masked_8xfloat_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50] ; CHECK-NEXT: movb $-34, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec2 ret <8 x float> %res @@ -1823,11 +1823,11 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,1,0,6,0,5,1] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,1,0,6,0,5,1] sched: [7:0.50] ; CHECK-NEXT: movb $-34, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -1835,12 +1835,12 @@ define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2) { ; CHECK-LABEL: test_masked_8xfloat_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50] ; CHECK-NEXT: movb $-18, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec2 ret <8 x float> %res @@ -1849,11 +1849,11 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [2,5,5,5,4,6,0,5] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [2,5,5,5,4,6,0,5] sched: [7:0.50] ; CHECK-NEXT: movb $-18, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -1861,21 +1861,21 @@ define <8 x float> @test_8xfloat_perm_mask3(<8 x float> %vec) { ; CHECK-LABEL: test_8xfloat_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50] ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> ret <8 x float> %res } define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> %vec2) { ; CHECK-LABEL: test_masked_8xfloat_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50] ; CHECK-NEXT: movb $82, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec2 ret <8 x float> %res @@ -1884,11 +1884,11 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50] ; CHECK-NEXT: movb $82, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -1896,9 +1896,9 @@ define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) { ; CHECK-LABEL: test_8xfloat_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [1:0.50] -; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> ret <8 x float> %res @@ -1906,11 +1906,11 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %vec2) { ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50] ; CHECK-NEXT: movb $61, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec2 @@ -1920,11 +1920,11 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50] ; CHECK-NEXT: movb $61, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -1934,11 +1934,11 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2) { ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50] ; CHECK-NEXT: movb $-124, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec2 @@ -1948,11 +1948,11 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [1,3,7,4,0,6,6,6] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [1,3,7,4,0,6,6,6] sched: [7:0.50] ; CHECK-NEXT: movb $-124, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -1962,11 +1962,11 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %vec2) { ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50] ; CHECK-NEXT: movb $-84, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec2 @@ -1976,11 +1976,11 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [4,5,1,5,6,6,2,4] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [4,5,1,5,6,6,2,4] sched: [7:0.50] ; CHECK-NEXT: movb $-84, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -1990,9 +1990,9 @@ define <8 x float> @test_8xfloat_perm_mem_mask3(<8 x float>* %vp) { ; CHECK-LABEL: test_8xfloat_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [1:0.50] -; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> ret <8 x float> %res @@ -2000,11 +2000,11 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2) { ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50] ; CHECK-NEXT: movb $60, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec2 @@ -2014,11 +2014,11 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [1:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50] ; CHECK-NEXT: movb $60, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -2028,22 +2028,22 @@ define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) { ; CHECK-LABEL: test_16xfloat_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [5:0.50] -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> ret <16 x float> %res } define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2) { ; CHECK-LABEL: test_masked_16xfloat_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] ; CHECK-NEXT: movw $14423, %ax # imm = 0x3857 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec2 ret <16 x float> %res @@ -2052,12 +2052,12 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50] ; CHECK-NEXT: movw $14423, %ax # imm = 0x3857 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -2065,13 +2065,13 @@ define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x float> %vec2) { ; CHECK-LABEL: test_masked_16xfloat_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50] ; CHECK-NEXT: movw $-22757, %ax # imm = 0xA71B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec2 ret <16 x float> %res @@ -2080,12 +2080,12 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50] ; CHECK-NEXT: movw $-22757, %ax # imm = 0xA71B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -2093,13 +2093,13 @@ define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2) { ; CHECK-LABEL: test_masked_16xfloat_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50] ; CHECK-NEXT: movw $-22227, %ax # imm = 0xA92D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec2 ret <16 x float> %res @@ -2108,12 +2108,12 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50] ; CHECK-NEXT: movw $-22227, %ax # imm = 0xA92D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -2121,22 +2121,22 @@ define <16 x float> @test_16xfloat_perm_mask3(<16 x float> %vec) { ; CHECK-LABEL: test_16xfloat_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [5:0.50] -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> ret <16 x float> %res } define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x float> %vec2) { ; CHECK-LABEL: test_masked_16xfloat_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] ; CHECK-NEXT: movw $32420, %ax # imm = 0x7EA4 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec2 ret <16 x float> %res @@ -2145,12 +2145,12 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50] ; CHECK-NEXT: movw $32420, %ax # imm = 0x7EA4 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -2158,9 +2158,9 @@ define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) { ; CHECK-LABEL: test_16xfloat_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [5:0.50] -; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] +; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> ret <16 x float> %res @@ -2168,12 +2168,12 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %vec2) { ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] ; CHECK-NEXT: movw $1441, %ax # imm = 0x5A1 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec2 @@ -2183,12 +2183,12 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50] ; CHECK-NEXT: movw $1441, %ax # imm = 0x5A1 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -2198,12 +2198,12 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2) { ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50] ; CHECK-NEXT: movw $-12684, %ax # imm = 0xCE74 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec2 @@ -2213,12 +2213,12 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50] ; CHECK-NEXT: movw $-12684, %ax # imm = 0xCE74 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -2228,12 +2228,12 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %vec2) { ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50] ; CHECK-NEXT: movw $11066, %ax # imm = 0x2B3A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec2 @@ -2243,12 +2243,12 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50] ; CHECK-NEXT: movw $11066, %ax # imm = 0x2B3A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -2258,9 +2258,9 @@ define <16 x float> @test_16xfloat_perm_mem_mask3(<16 x float>* %vp) { ; CHECK-LABEL: test_16xfloat_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [5:0.50] -; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] +; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> ret <16 x float> %res @@ -2268,12 +2268,12 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2) { ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] ; CHECK-NEXT: movw $-13916, %ax # imm = 0xC9A4 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec2 @@ -2283,12 +2283,12 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [5:0.50] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50] ; CHECK-NEXT: movw $-13916, %ax # imm = 0xC9A4 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x float>, <16 x float>* %vp %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -2299,7 +2299,7 @@ ; CHECK-LABEL: test_4xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> ret <4 x double> %res } @@ -2307,10 +2307,10 @@ ; CHECK-LABEL: test_masked_4xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [3:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec2 ret <4 x double> %res @@ -2320,9 +2320,9 @@ ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -2331,10 +2331,10 @@ ; CHECK-LABEL: test_masked_4xdouble_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [3:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec2 ret <4 x double> %res @@ -2344,9 +2344,9 @@ ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -2355,10 +2355,10 @@ ; CHECK-LABEL: test_masked_4xdouble_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [3:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec2 ret <4 x double> %res @@ -2368,9 +2368,9 @@ ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -2379,7 +2379,7 @@ ; CHECK-LABEL: test_4xdouble_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> ret <4 x double> %res } @@ -2387,10 +2387,10 @@ ; CHECK-LABEL: test_masked_4xdouble_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [3:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec2 ret <4 x double> %res @@ -2400,9 +2400,9 @@ ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -2410,8 +2410,8 @@ define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) { ; CHECK-LABEL: test_4xdouble_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> ret <4 x double> %res @@ -2420,9 +2420,9 @@ ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec2 @@ -2433,9 +2433,9 @@ ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -2446,9 +2446,9 @@ ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec2 @@ -2459,9 +2459,9 @@ ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -2472,9 +2472,9 @@ ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec2 @@ -2485,9 +2485,9 @@ ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -2497,8 +2497,8 @@ define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) { ; CHECK-LABEL: test_4xdouble_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> ret <4 x double> %res @@ -2507,9 +2507,9 @@ ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec2 @@ -2520,9 +2520,9 @@ ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -2532,21 +2532,21 @@ define <8 x double> @test_8xdouble_perm_mask0(<8 x double> %vec) { ; CHECK-LABEL: test_8xdouble_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [5:0.50] -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [8:0.50] +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res } define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2) { ; CHECK-LABEL: test_masked_8xdouble_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [8:0.50] ; CHECK-NEXT: movb $-115, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 ret <8 x double> %res @@ -2555,11 +2555,11 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [8:0.50] ; CHECK-NEXT: movb $-115, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -2568,10 +2568,10 @@ ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 ret <8 x double> %res @@ -2581,9 +2581,9 @@ ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -2591,12 +2591,12 @@ define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2) { ; CHECK-LABEL: test_masked_8xdouble_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [8:0.50] ; CHECK-NEXT: movb $49, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 ret <8 x double> %res @@ -2605,11 +2605,11 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [7,5,5,5,3,5,1,7] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [7,5,5,5,3,5,1,7] sched: [8:0.50] ; CHECK-NEXT: movb $49, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -2617,8 +2617,8 @@ define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) { ; CHECK-LABEL: test_8xdouble_perm_imm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res } @@ -2626,10 +2626,10 @@ ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-57, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 ret <8 x double> %res @@ -2639,9 +2639,9 @@ ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-57, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -2649,12 +2649,12 @@ define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x double> %vec2) { ; CHECK-LABEL: test_masked_8xdouble_perm_mask4: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [8:0.50] ; CHECK-NEXT: movb $-54, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 ret <8 x double> %res @@ -2663,11 +2663,11 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask4: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [3,5,3,4,6,5,7,1] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [3,5,3,4,6,5,7,1] sched: [8:0.50] ; CHECK-NEXT: movb $-54, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -2676,10 +2676,10 @@ ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-41, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 ret <8 x double> %res @@ -2689,9 +2689,9 @@ ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-41, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -2699,21 +2699,21 @@ define <8 x double> @test_8xdouble_perm_mask6(<8 x double> %vec) { ; CHECK-LABEL: test_8xdouble_perm_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [5:0.50] -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [8:0.50] +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res } define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x double> %vec2) { ; CHECK-LABEL: test_masked_8xdouble_perm_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [8:0.50] ; CHECK-NEXT: movb $-65, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm1 {%k1} # sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 ret <8 x double> %res @@ -2722,11 +2722,11 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [8:0.50] ; CHECK-NEXT: movb $-65, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -2735,10 +2735,10 @@ ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $40, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 ret <8 x double> %res @@ -2748,9 +2748,9 @@ ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $40, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -2758,9 +2758,9 @@ define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) { ; CHECK-LABEL: test_8xdouble_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [5:0.50] -; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [8:0.50] +; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res @@ -2768,11 +2768,11 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2) { ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [8:0.50] ; CHECK-NEXT: movb $99, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 @@ -2782,11 +2782,11 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [8:0.50] ; CHECK-NEXT: movb $99, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -2797,9 +2797,9 @@ ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-32, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 @@ -2810,9 +2810,9 @@ ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-32, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -2822,11 +2822,11 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2) { ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [8:0.50] ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 @@ -2836,11 +2836,11 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [6,7,2,7,7,6,2,5] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [6,7,2,7,7,6,2,5] sched: [8:0.50] ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -2850,8 +2850,8 @@ define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) { ; CHECK-LABEL: test_8xdouble_perm_imm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res @@ -2860,9 +2860,9 @@ ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $119, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 @@ -2873,9 +2873,9 @@ ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $119, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -2885,11 +2885,11 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x double> %vec2) { ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask4: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [8:0.50] ; CHECK-NEXT: movb $-45, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 @@ -2899,11 +2899,11 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask4: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [1,1,3,5,6,0,6,0] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [1,1,3,5,6,0,6,0] sched: [8:0.50] ; CHECK-NEXT: movb $-45, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -2914,9 +2914,9 @@ ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $33, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 @@ -2927,9 +2927,9 @@ ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $33, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -2939,9 +2939,9 @@ define <8 x double> @test_8xdouble_perm_mem_mask6(<8 x double>* %vp) { ; CHECK-LABEL: test_8xdouble_perm_mem_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [5:0.50] -; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [8:0.50] +; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> ret <8 x double> %res @@ -2949,11 +2949,11 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x double> %vec2) { ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [8:0.50] ; CHECK-NEXT: movb $-75, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 @@ -2963,11 +2963,11 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [5:0.50] +; CHECK-NEXT: vmovapd {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [8:0.50] ; CHECK-NEXT: movb $-75, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -2978,9 +2978,9 @@ ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $84, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec2 @@ -2991,9 +2991,9 @@ ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $84, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x double>, <8 x double>* %vp %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -3003,8 +3003,8 @@ define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { ; CHECK-LABEL: test_16xi8_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res } @@ -3013,10 +3013,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-10197, %ax # imm = 0xD82B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res @@ -3027,9 +3027,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-10197, %ax # imm = 0xD82B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res @@ -3039,10 +3039,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-15864, %ax # imm = 0xC208 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res @@ -3053,9 +3053,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-15864, %ax # imm = 0xC208 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res @@ -3065,10 +3065,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $27562, %ax # imm = 0x6BAA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res @@ -3079,9 +3079,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $27562, %ax # imm = 0x6BAA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res @@ -3089,8 +3089,8 @@ define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) { ; CHECK-LABEL: test_16xi8_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res } @@ -3099,10 +3099,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $16968, %ax # imm = 0x4248 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> %vec2 ret <16 x i8> %res @@ -3113,9 +3113,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $16968, %ax # imm = 0x4248 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> zeroinitializer ret <16 x i8> %res @@ -3123,9 +3123,9 @@ define <16 x i8> @test_16xi8_perm_mem_mask0(<16 x i8>* %vp) { ; CHECK-LABEL: test_16xi8_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res @@ -3133,12 +3133,12 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %vec2) { ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; CHECK-NEXT: movw $-27811, %ax # imm = 0x935D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> %vec2 @@ -3148,12 +3148,12 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] ; CHECK-NEXT: movw $-27811, %ax # imm = 0x935D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> zeroinitializer @@ -3163,12 +3163,12 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %vec2) { ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; CHECK-NEXT: movw $19027, %ax # imm = 0x4A53 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> %vec2 @@ -3178,12 +3178,12 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] ; CHECK-NEXT: movw $19027, %ax # imm = 0x4A53 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> zeroinitializer @@ -3193,12 +3193,12 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %vec2) { ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; CHECK-NEXT: movw $12412, %ax # imm = 0x307C ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> %vec2 @@ -3208,12 +3208,12 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] ; CHECK-NEXT: movw $12412, %ax # imm = 0x307C ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> zeroinitializer @@ -3223,9 +3223,9 @@ define <16 x i8> @test_16xi8_perm_mem_mask3(<16 x i8>* %vp) { ; CHECK-LABEL: test_16xi8_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> ret <16 x i8> %res @@ -3233,12 +3233,12 @@ define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %vec2) { ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm1 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50] ; CHECK-NEXT: movw $12238, %ax # imm = 0x2FCE ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> %vec2 @@ -3248,12 +3248,12 @@ define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] ; CHECK-NEXT: movw $12238, %ax # imm = 0x2FCE ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i8>, <16 x i8>* %vp %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> %res = select <16 x i1> , <16 x i8> %shuf, <16 x i8> zeroinitializer @@ -3263,8 +3263,8 @@ define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) { ; CHECK-LABEL: test_32xi8_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res } @@ -3273,10 +3273,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $948454498, %eax # imm = 0x38884462 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res @@ -3287,9 +3287,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $948454498, %eax # imm = 0x38884462 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res @@ -3299,10 +3299,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1516442487, %eax # imm = 0xA59CEC89 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res @@ -3313,9 +3313,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1516442487, %eax # imm = 0xA59CEC89 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res @@ -3325,10 +3325,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1504501134, %eax # imm = 0x59ACDD8E ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res @@ -3339,9 +3339,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1504501134, %eax # imm = 0x59ACDD8E ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res @@ -3349,8 +3349,8 @@ define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) { ; CHECK-LABEL: test_32xi8_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res } @@ -3359,10 +3359,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $774459490, %eax # imm = 0x2E295062 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> %vec2 ret <32 x i8> %res @@ -3373,9 +3373,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $774459490, %eax # imm = 0x2E295062 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> zeroinitializer ret <32 x i8> %res @@ -3383,9 +3383,9 @@ define <32 x i8> @test_32xi8_perm_mem_mask0(<32 x i8>* %vp) { ; CHECK-LABEL: test_32xi8_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [1:0.50] -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res @@ -3393,12 +3393,12 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %vec2) { ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm1 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; CHECK-NEXT: movl $1431978123, %eax # imm = 0x555A408B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> %vec2 @@ -3408,12 +3408,12 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] ; CHECK-NEXT: movl $1431978123, %eax # imm = 0x555A408B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> zeroinitializer @@ -3423,12 +3423,12 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %vec2) { ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm1 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; CHECK-NEXT: movl $-903561653, %eax # imm = 0xCA24BE4B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> %vec2 @@ -3438,12 +3438,12 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] ; CHECK-NEXT: movl $-903561653, %eax # imm = 0xCA24BE4B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> zeroinitializer @@ -3453,12 +3453,12 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %vec2) { ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm1 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; CHECK-NEXT: movl $-1209035774, %eax # imm = 0xB7EF9402 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> %vec2 @@ -3468,12 +3468,12 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] ; CHECK-NEXT: movl $-1209035774, %eax # imm = 0xB7EF9402 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> zeroinitializer @@ -3483,9 +3483,9 @@ define <32 x i8> @test_32xi8_perm_mem_mask3(<32 x i8>* %vp) { ; CHECK-LABEL: test_32xi8_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [1:0.50] -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> ret <32 x i8> %res @@ -3493,12 +3493,12 @@ define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %vec2) { ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm1 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50] ; CHECK-NEXT: movl $1452798329, %eax # imm = 0x5697F179 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> %vec2 @@ -3508,12 +3508,12 @@ define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [1:0.50] +; CHECK-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50] ; CHECK-NEXT: movl $1452798329, %eax # imm = 0x5697F179 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i8>, <32 x i8>* %vp %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> %res = select <32 x i1> , <32 x i8> %shuf, <32 x i8> zeroinitializer @@ -3523,8 +3523,8 @@ define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) { ; CHECK-LABEL: test_64xi8_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res } @@ -3533,10 +3533,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movabsq $3680399704764602881, %rax # imm = 0x3313680829F25A01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res @@ -3547,9 +3547,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movabsq $3680399704764602881, %rax # imm = 0x3313680829F25A01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res @@ -3559,10 +3559,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movabsq $3029806472256067585, %rax # imm = 0x2A0C08EF15009801 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res @@ -3573,9 +3573,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movabsq $3029806472256067585, %rax # imm = 0x2A0C08EF15009801 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res @@ -3585,10 +3585,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movabsq $1110016799796225, %rax # imm = 0x3F18DED0BEC01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res @@ -3599,9 +3599,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movabsq $1110016799796225, %rax # imm = 0x3F18DED0BEC01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res @@ -3609,8 +3609,8 @@ define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) { ; CHECK-LABEL: test_64xi8_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res } @@ -3619,10 +3619,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movabsq $839183534234450945, %rax # imm = 0xBA560FA6B66BC01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> %vec2 ret <64 x i8> %res @@ -3633,9 +3633,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movabsq $839183534234450945, %rax # imm = 0xBA560FA6B66BC01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> zeroinitializer ret <64 x i8> %res @@ -3643,9 +3643,9 @@ define <64 x i8> @test_64xi8_perm_mem_mask0(<64 x i8>* %vp) { ; CHECK-LABEL: test_64xi8_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res @@ -3653,12 +3653,12 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %vec2) { ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] ; CHECK-NEXT: movabsq $3164984076108002305, %rax # imm = 0x2BEC483F982F7401 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> %vec2 @@ -3668,12 +3668,12 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] ; CHECK-NEXT: movabsq $3164984076108002305, %rax # imm = 0x2BEC483F982F7401 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> zeroinitializer @@ -3683,12 +3683,12 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %vec2) { ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] ; CHECK-NEXT: movabsq $3421658227176024577, %rax # imm = 0x2F7C2C07659EAA01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> %vec2 @@ -3698,12 +3698,12 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] ; CHECK-NEXT: movabsq $3421658227176024577, %rax # imm = 0x2F7C2C07659EAA01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> zeroinitializer @@ -3713,12 +3713,12 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %vec2) { ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] ; CHECK-NEXT: movabsq $3085252902658394625, %rax # imm = 0x2AD1052B29324A01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> %vec2 @@ -3728,12 +3728,12 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] ; CHECK-NEXT: movabsq $3085252902658394625, %rax # imm = 0x2AD1052B29324A01 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> zeroinitializer @@ -3743,9 +3743,9 @@ define <64 x i8> @test_64xi8_perm_mem_mask3(<64 x i8>* %vp) { ; CHECK-LABEL: test_64xi8_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [5:0.50] -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> ret <64 x i8> %res @@ -3753,12 +3753,12 @@ define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %vec2) { ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50] ; CHECK-NEXT: movabsq $29622951609754113, %rax # imm = 0x693DEAE3E5E201 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> %vec2 @@ -3768,12 +3768,12 @@ define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp) { ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [5:0.50] +; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50] ; CHECK-NEXT: movabsq $29622951609754113, %rax # imm = 0x693DEAE3E5E201 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovq %rax, %k1 -; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovq %rax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <64 x i8>, <64 x i8>* %vp %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> %res = select <64 x i1> , <64 x i8> %shuf, <64 x i8> zeroinitializer @@ -3784,7 +3784,7 @@ ; CHECK-LABEL: test_8xi16_perm_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } @@ -3792,10 +3792,10 @@ ; CHECK-LABEL: test_masked_8xi16_perm_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-82, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res @@ -3805,9 +3805,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-82, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res @@ -3816,10 +3816,10 @@ ; CHECK-LABEL: test_masked_8xi16_perm_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $43, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res @@ -3829,9 +3829,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $43, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res @@ -3840,10 +3840,10 @@ ; CHECK-LABEL: test_masked_8xi16_perm_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $20, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res @@ -3853,9 +3853,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $20, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res @@ -3864,7 +3864,7 @@ ; CHECK-LABEL: test_8xi16_perm_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } @@ -3872,10 +3872,10 @@ ; CHECK-LABEL: test_masked_8xi16_perm_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-20, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res @@ -3885,9 +3885,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-20, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res @@ -3896,10 +3896,10 @@ ; CHECK-LABEL: test_masked_8xi16_perm_high_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-104, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res @@ -3909,9 +3909,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-104, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res @@ -3920,10 +3920,10 @@ ; CHECK-LABEL: test_masked_8xi16_perm_low_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res @@ -3933,9 +3933,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res @@ -3944,7 +3944,7 @@ ; CHECK-LABEL: test_8xi16_perm_high_mask6: ; CHECK: # BB#0: ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res } @@ -3952,10 +3952,10 @@ ; CHECK-LABEL: test_masked_8xi16_perm_high_mask6: ; CHECK: # BB#0: ; CHECK-NEXT: movb $117, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res @@ -3965,9 +3965,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask6: ; CHECK: # BB#0: ; CHECK-NEXT: movb $117, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res @@ -3976,10 +3976,10 @@ ; CHECK-LABEL: test_masked_8xi16_perm_low_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $39, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 ret <8 x i16> %res @@ -3989,9 +3989,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $39, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer ret <8 x i16> %res @@ -3999,8 +3999,8 @@ define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) { ; CHECK-LABEL: test_8xi16_perm_high_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res @@ -4009,9 +4009,9 @@ ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-83, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 @@ -4022,9 +4022,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-83, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -4035,9 +4035,9 @@ ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-108, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 @@ -4048,9 +4048,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-108, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -4061,9 +4061,9 @@ ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-58, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 @@ -4074,9 +4074,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-58, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -4086,8 +4086,8 @@ define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) { ; CHECK-LABEL: test_8xi16_perm_low_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res @@ -4096,9 +4096,9 @@ ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $74, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 @@ -4109,9 +4109,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $74, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -4122,9 +4122,9 @@ ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-81, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 @@ -4135,9 +4135,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-81, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -4148,9 +4148,9 @@ ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $53, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 @@ -4161,9 +4161,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $53, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -4173,8 +4173,8 @@ define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) { ; CHECK-LABEL: test_8xi16_perm_high_mem_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> ret <8 x i16> %res @@ -4183,9 +4183,9 @@ ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask6: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-121, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 @@ -4196,9 +4196,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask6: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-121, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -4209,9 +4209,9 @@ ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $87, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> %vec2 @@ -4222,9 +4222,9 @@ ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $87, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i16>, <8 x i16>* %vp %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> %res = select <8 x i1> , <8 x i16> %shuf, <8 x i16> zeroinitializer @@ -4235,7 +4235,7 @@ ; CHECK-LABEL: test_16xi16_perm_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } @@ -4244,10 +4244,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3495, %ax # imm = 0xF259 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -4258,9 +4258,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3495, %ax # imm = 0xF259 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -4270,10 +4270,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-11903, %ax # imm = 0xD181 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -4284,9 +4284,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-11903, %ax # imm = 0xD181 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -4296,10 +4296,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-14510, %ax # imm = 0xC752 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -4310,9 +4310,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-14510, %ax # imm = 0xC752 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -4321,7 +4321,7 @@ ; CHECK-LABEL: test_16xi16_perm_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } @@ -4330,10 +4330,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-16563, %ax # imm = 0xBF4D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -4344,9 +4344,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-16563, %ax # imm = 0xBF4D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -4356,10 +4356,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $12298, %ax # imm = 0x300A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -4370,9 +4370,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $12298, %ax # imm = 0x300A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -4382,10 +4382,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-29565, %ax # imm = 0x8C83 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -4396,9 +4396,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-29565, %ax # imm = 0x8C83 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -4407,7 +4407,7 @@ ; CHECK-LABEL: test_16xi16_perm_high_mask6: ; CHECK: # BB#0: ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res } @@ -4416,10 +4416,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $27779, %ax # imm = 0x6C83 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -4430,9 +4430,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $27779, %ax # imm = 0x6C83 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -4442,10 +4442,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3292, %ax # imm = 0xF324 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 ret <16 x i16> %res @@ -4456,9 +4456,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3292, %ax # imm = 0xF324 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer ret <16 x i16> %res @@ -4466,8 +4466,8 @@ define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) { ; CHECK-LABEL: test_16xi16_perm_high_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -4477,9 +4477,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12838, %ax # imm = 0xCDDA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -4491,9 +4491,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12838, %ax # imm = 0xCDDA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -4505,9 +4505,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $14962, %ax # imm = 0x3A72 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -4519,9 +4519,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $14962, %ax # imm = 0x3A72 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -4533,9 +4533,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $1029, %ax # imm = 0x405 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -4547,9 +4547,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $1029, %ax # imm = 0x405 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -4559,8 +4559,8 @@ define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) { ; CHECK-LABEL: test_16xi16_perm_low_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -4570,9 +4570,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30862, %ax # imm = 0x8772 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -4584,9 +4584,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30862, %ax # imm = 0x8772 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -4598,9 +4598,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3845, %ax # imm = 0xF0FB ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -4612,9 +4612,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3845, %ax # imm = 0xF0FB ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -4626,9 +4626,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-20955, %ax # imm = 0xAE25 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -4640,9 +4640,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-20955, %ax # imm = 0xAE25 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -4652,8 +4652,8 @@ define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) { ; CHECK-LABEL: test_16xi16_perm_high_mem_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> ret <16 x i16> %res @@ -4663,9 +4663,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-24190, %ax # imm = 0xA182 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -4677,9 +4677,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-24190, %ax # imm = 0xA182 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -4691,9 +4691,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-24392, %ax # imm = 0xA0B8 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> %vec2 @@ -4705,9 +4705,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-24392, %ax # imm = 0xA0B8 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i16>, <16 x i16>* %vp %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> %res = select <16 x i1> , <16 x i16> %shuf, <16 x i16> zeroinitializer @@ -4717,8 +4717,8 @@ define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) { ; CHECK-LABEL: test_32xi16_perm_high_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } @@ -4727,10 +4727,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1671867126, %eax # imm = 0x63A6AAF6 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -4741,9 +4741,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1671867126, %eax # imm = 0x63A6AAF6 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -4753,10 +4753,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-514766311, %eax # imm = 0xE1514A19 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -4767,9 +4767,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-514766311, %eax # imm = 0xE1514A19 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -4779,10 +4779,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $165000787, %eax # imm = 0x9D5B653 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -4793,9 +4793,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $165000787, %eax # imm = 0x9D5B653 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -4803,8 +4803,8 @@ define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) { ; CHECK-LABEL: test_32xi16_perm_low_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } @@ -4813,10 +4813,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1998504075, %eax # imm = 0x771EC08B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -4827,9 +4827,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1998504075, %eax # imm = 0x771EC08B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -4839,10 +4839,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-730778639, %eax # imm = 0xD47133F1 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -4853,9 +4853,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-730778639, %eax # imm = 0xD47133F1 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -4865,10 +4865,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $544659762, %eax # imm = 0x2076D932 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -4879,9 +4879,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $544659762, %eax # imm = 0x2076D932 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -4889,8 +4889,8 @@ define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) { ; CHECK-LABEL: test_32xi16_perm_high_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res } @@ -4899,10 +4899,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1243446456, %eax # imm = 0xB5E28348 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -4913,9 +4913,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1243446456, %eax # imm = 0xB5E28348 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -4925,10 +4925,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1409246810, %eax # imm = 0x53FF665A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 ret <32 x i16> %res @@ -4939,9 +4939,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1409246810, %eax # imm = 0x53FF665A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer ret <32 x i16> %res @@ -4949,8 +4949,8 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) { ; CHECK-LABEL: test_32xi16_perm_high_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -4960,9 +4960,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1911488810, %eax # imm = 0x8E10FED6 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -4974,9 +4974,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1911488810, %eax # imm = 0x8E10FED6 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -4988,9 +4988,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1098876619, %eax # imm = 0xBE807935 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -5002,9 +5002,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1098876619, %eax # imm = 0xBE807935 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -5016,9 +5016,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1583892148, %eax # imm = 0xA197B94C ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -5030,9 +5030,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1583892148, %eax # imm = 0xA197B94C ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -5042,8 +5042,8 @@ define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) { ; CHECK-LABEL: test_32xi16_perm_low_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -5053,9 +5053,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-216128444, %eax # imm = 0xF31E2444 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -5067,9 +5067,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-216128444, %eax # imm = 0xF31E2444 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -5081,9 +5081,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1480468153, %eax # imm = 0x583E26B9 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -5095,9 +5095,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $1480468153, %eax # imm = 0x583E26B9 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -5107,12 +5107,12 @@ define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2) { ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask5: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] +; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] ; CHECK-NEXT: movl $-1778617447, %eax # imm = 0x95FC7399 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -5122,12 +5122,12 @@ define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp) { ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask5: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00] ; CHECK-NEXT: movl $-1778617447, %eax # imm = 0x95FC7399 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -5137,8 +5137,8 @@ define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) { ; CHECK-LABEL: test_32xi16_perm_high_mem_mask6: ; CHECK: # BB#0: -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> ret <32 x i16> %res @@ -5148,9 +5148,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $355619267, %eax # imm = 0x153251C3 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -5162,9 +5162,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $355619267, %eax # imm = 0x153251C3 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -5176,9 +5176,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1890659259, %eax # imm = 0x8F4ED445 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> %vec2 @@ -5190,9 +5190,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movl $-1890659259, %eax # imm = 0x8F4ED445 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <32 x i16>, <32 x i16>* %vp %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> %res = select <32 x i1> , <32 x i16> %shuf, <32 x i16> zeroinitializer @@ -5203,7 +5203,7 @@ ; CHECK-LABEL: test_4xi32_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res } @@ -5211,10 +5211,10 @@ ; CHECK-LABEL: test_masked_4xi32_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res @@ -5224,9 +5224,9 @@ ; CHECK-LABEL: test_masked_z_4xi32_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res @@ -5235,10 +5235,10 @@ ; CHECK-LABEL: test_masked_4xi32_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res @@ -5248,9 +5248,9 @@ ; CHECK-LABEL: test_masked_z_4xi32_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res @@ -5259,10 +5259,10 @@ ; CHECK-LABEL: test_masked_4xi32_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res @@ -5272,9 +5272,9 @@ ; CHECK-LABEL: test_masked_z_4xi32_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res @@ -5283,7 +5283,7 @@ ; CHECK-LABEL: test_4xi32_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res } @@ -5291,10 +5291,10 @@ ; CHECK-LABEL: test_masked_4xi32_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> %vec2 ret <4 x i32> %res @@ -5304,9 +5304,9 @@ ; CHECK-LABEL: test_masked_z_4xi32_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> zeroinitializer ret <4 x i32> %res @@ -5314,8 +5314,8 @@ define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) { ; CHECK-LABEL: test_4xi32_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res @@ -5324,9 +5324,9 @@ ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> %vec2 @@ -5337,9 +5337,9 @@ ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> zeroinitializer @@ -5350,9 +5350,9 @@ ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> %vec2 @@ -5363,9 +5363,9 @@ ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> zeroinitializer @@ -5376,9 +5376,9 @@ ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> %vec2 @@ -5389,9 +5389,9 @@ ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> zeroinitializer @@ -5401,8 +5401,8 @@ define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) { ; CHECK-LABEL: test_4xi32_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> ret <4 x i32> %res @@ -5411,9 +5411,9 @@ ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> %vec2 @@ -5424,9 +5424,9 @@ ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <4 x i32>, <4 x i32>* %vp %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %res = select <4 x i1> , <4 x i32> %shuf, <4 x i32> zeroinitializer @@ -5437,7 +5437,7 @@ ; CHECK-LABEL: test2_8xi32_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } @@ -5445,10 +5445,10 @@ ; CHECK-LABEL: test2_masked_8xi32_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-99, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res @@ -5458,9 +5458,9 @@ ; CHECK-LABEL: test2_masked_z_8xi32_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-99, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -5469,10 +5469,10 @@ ; CHECK-LABEL: test2_masked_8xi32_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-90, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res @@ -5482,9 +5482,9 @@ ; CHECK-LABEL: test2_masked_z_8xi32_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-90, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -5493,10 +5493,10 @@ ; CHECK-LABEL: test2_masked_8xi32_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res @@ -5506,9 +5506,9 @@ ; CHECK-LABEL: test2_masked_z_8xi32_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -5517,7 +5517,7 @@ ; CHECK-LABEL: test2_8xi32_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res } @@ -5525,10 +5525,10 @@ ; CHECK-LABEL: test2_masked_8xi32_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $116, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.25] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 ret <8 x i32> %res @@ -5538,9 +5538,9 @@ ; CHECK-LABEL: test2_masked_z_8xi32_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $116, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -5548,8 +5548,8 @@ define <8 x i32> @test2_8xi32_perm_mem_mask0(<8 x i32>* %vp) { ; CHECK-LABEL: test2_8xi32_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res @@ -5558,9 +5558,9 @@ ; CHECK-LABEL: test2_masked_8xi32_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-25, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 @@ -5571,9 +5571,9 @@ ; CHECK-LABEL: test2_masked_z_8xi32_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-25, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -5584,9 +5584,9 @@ ; CHECK-LABEL: test2_masked_8xi32_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-97, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 @@ -5597,9 +5597,9 @@ ; CHECK-LABEL: test2_masked_z_8xi32_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-97, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -5610,9 +5610,9 @@ ; CHECK-LABEL: test2_masked_8xi32_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $73, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 @@ -5623,9 +5623,9 @@ ; CHECK-LABEL: test2_masked_z_8xi32_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $73, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -5635,8 +5635,8 @@ define <8 x i32> @test2_8xi32_perm_mem_mask3(<8 x i32>* %vp) { ; CHECK-LABEL: test2_8xi32_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> ret <8 x i32> %res @@ -5645,9 +5645,9 @@ ; CHECK-LABEL: test2_masked_8xi32_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec2 @@ -5658,9 +5658,9 @@ ; CHECK-LABEL: test2_masked_z_8xi32_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <8 x i32>, <8 x i32>* %vp %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -5670,8 +5670,8 @@ define <16 x i32> @test2_16xi32_perm_mask0(<16 x i32> %vec) { ; CHECK-LABEL: test2_16xi32_perm_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } @@ -5680,10 +5680,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $18453, %ax # imm = 0x4815 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res @@ -5694,9 +5694,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $18453, %ax # imm = 0x4815 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -5706,10 +5706,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $11142, %ax # imm = 0x2B86 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res @@ -5720,9 +5720,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $11142, %ax # imm = 0x2B86 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -5732,10 +5732,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $15610, %ax # imm = 0x3CFA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res @@ -5746,9 +5746,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $15610, %ax # imm = 0x3CFA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -5756,8 +5756,8 @@ define <16 x i32> @test2_16xi32_perm_mask3(<16 x i32> %vec) { ; CHECK-LABEL: test2_16xi32_perm_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res } @@ -5766,10 +5766,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $14814, %ax # imm = 0x39DE ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 ret <16 x i32> %res @@ -5780,9 +5780,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $14814, %ax # imm = 0x39DE ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -5790,8 +5790,8 @@ define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) { ; CHECK-LABEL: test2_16xi32_perm_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res @@ -5801,9 +5801,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $7334, %ax # imm = 0x1CA6 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 @@ -5815,9 +5815,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $7334, %ax # imm = 0x1CA6 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -5829,9 +5829,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-25463, %ax # imm = 0x9C89 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 @@ -5843,9 +5843,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-25463, %ax # imm = 0x9C89 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -5857,9 +5857,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-14529, %ax # imm = 0xC73F ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 @@ -5871,9 +5871,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-14529, %ax # imm = 0xC73F ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -5883,8 +5883,8 @@ define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) { ; CHECK-LABEL: test2_16xi32_perm_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> ret <16 x i32> %res @@ -5894,9 +5894,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-21392, %ax # imm = 0xAC70 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec2 @@ -5908,9 +5908,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-21392, %ax # imm = 0xAC70 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec = load <16 x i32>, <16 x i32>* %vp %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -5921,7 +5921,7 @@ ; CHECK-LABEL: test2_8xfloat_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } @@ -5930,9 +5930,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $-41, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -5943,9 +5943,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $-41, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -5955,9 +5955,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $-63, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -5968,9 +5968,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $-63, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -5980,9 +5980,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $107, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -5993,9 +5993,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $107, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -6004,7 +6004,7 @@ ; CHECK-LABEL: test2_8xfloat_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } @@ -6013,9 +6013,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $66, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -6026,9 +6026,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $66, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -6036,8 +6036,8 @@ define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -6045,11 +6045,11 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $-24, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -6059,11 +6059,11 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $-24, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -6073,11 +6073,11 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $-6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -6087,11 +6087,11 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $-6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -6101,11 +6101,11 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $-50, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -6115,11 +6115,11 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $-50, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -6129,8 +6129,8 @@ define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -6138,11 +6138,11 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3) { ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $-26, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -6152,11 +6152,11 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $-26, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -6166,8 +6166,8 @@ define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2) { ; CHECK-LABEL: test_16xfloat_shuff_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } @@ -6176,10 +6176,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-11480, %ax # imm = 0xD328 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -6190,9 +6190,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-11480, %ax # imm = 0xD328 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -6202,10 +6202,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-21749, %ax # imm = 0xAB0B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -6216,9 +6216,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-21749, %ax # imm = 0xAB0B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -6227,10 +6227,10 @@ ; CHECK-LABEL: test_16xfloat_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $75, %ax # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -6240,9 +6240,9 @@ ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $75, %ax # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -6250,8 +6250,8 @@ define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) { ; CHECK-LABEL: test_16xfloat_shuff_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } @@ -6260,10 +6260,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $32347, %ax # imm = 0x7E5B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -6274,9 +6274,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $32347, %ax # imm = 0x7E5B ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -6284,8 +6284,8 @@ define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; CHECK-LABEL: test_16xfloat_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -6295,10 +6295,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-19232, %ax # imm = 0xB4E0 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -6310,9 +6310,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-19232, %ax # imm = 0xB4E0 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -6324,10 +6324,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-29660, %ax # imm = 0x8C24 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -6339,9 +6339,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-29660, %ax # imm = 0x8C24 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -6353,10 +6353,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12160, %ax # imm = 0xD080 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -6368,9 +6368,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12160, %ax # imm = 0xD080 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -6380,8 +6380,8 @@ define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; CHECK-LABEL: test_16xfloat_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -6391,10 +6391,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30129, %ax # imm = 0x8A4F ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -6406,9 +6406,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30129, %ax # imm = 0x8A4F ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -6419,7 +6419,7 @@ ; CHECK-LABEL: test_4xdouble_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } @@ -6428,9 +6428,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -6441,9 +6441,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -6453,9 +6453,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -6466,9 +6466,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -6478,9 +6478,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -6491,9 +6491,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -6502,7 +6502,7 @@ ; CHECK-LABEL: test_4xdouble_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } @@ -6511,9 +6511,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -6524,9 +6524,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -6534,8 +6534,8 @@ define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -6543,11 +6543,11 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -6557,11 +6557,11 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -6571,11 +6571,11 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -6585,11 +6585,11 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -6599,11 +6599,11 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -6613,11 +6613,11 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -6627,8 +6627,8 @@ define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -6636,11 +6636,11 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3) { ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -6650,11 +6650,11 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -6664,8 +6664,8 @@ define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) { ; CHECK-LABEL: test_8xdouble_shuff_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } @@ -6673,10 +6673,10 @@ ; CHECK-LABEL: test_8xdouble_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -6686,9 +6686,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -6697,10 +6697,10 @@ ; CHECK-LABEL: test_8xdouble_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-70, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -6710,9 +6710,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-70, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -6721,10 +6721,10 @@ ; CHECK-LABEL: test_8xdouble_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $30, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -6734,9 +6734,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $30, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -6744,8 +6744,8 @@ define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) { ; CHECK-LABEL: test_8xdouble_shuff_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } @@ -6753,10 +6753,10 @@ ; CHECK-LABEL: test_8xdouble_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $56, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -6766,9 +6766,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $56, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -6776,8 +6776,8 @@ define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; CHECK-LABEL: test_8xdouble_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -6786,10 +6786,10 @@ ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $95, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -6800,9 +6800,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $95, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -6813,10 +6813,10 @@ ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -6827,9 +6827,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -6840,10 +6840,10 @@ ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -6854,9 +6854,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -6866,8 +6866,8 @@ define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; CHECK-LABEL: test_8xdouble_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -6876,10 +6876,10 @@ ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -6890,9 +6890,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -6903,7 +6903,7 @@ ; CHECK-LABEL: test_8xi32_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> ret <8 x i32> %res } @@ -6912,9 +6912,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $26, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec3 ret <8 x i32> %res @@ -6925,9 +6925,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $26, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -6937,9 +6937,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $-4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec3 ret <8 x i32> %res @@ -6950,9 +6950,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $-4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -6962,9 +6962,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $51, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec3 ret <8 x i32> %res @@ -6975,9 +6975,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $51, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -6986,7 +6986,7 @@ ; CHECK-LABEL: test_8xi32_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> ret <8 x i32> %res } @@ -6995,9 +6995,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $92, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec3 ret <8 x i32> %res @@ -7008,9 +7008,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $92, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer ret <8 x i32> %res @@ -7018,8 +7018,8 @@ define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; CHECK-LABEL: test_8xi32_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> ret <8 x i32> %res @@ -7027,11 +7027,11 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3) { ; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $64, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec3 @@ -7041,11 +7041,11 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $64, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -7055,11 +7055,11 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3) { ; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $-104, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec3 @@ -7069,11 +7069,11 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $-104, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -7083,11 +7083,11 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3) { ; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $113, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec3 @@ -7097,11 +7097,11 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $113, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -7111,8 +7111,8 @@ define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; CHECK-LABEL: test_8xi32_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> ret <8 x i32> %res @@ -7120,11 +7120,11 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3) { ; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $45, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> %vec3 @@ -7134,11 +7134,11 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) { ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $45, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i32>, <8 x i32>* %vec2p %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i32> %shuf, <8 x i32> zeroinitializer @@ -7148,8 +7148,8 @@ define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) { ; CHECK-LABEL: test_16xi32_shuff_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> ret <16 x i32> %res } @@ -7158,10 +7158,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $2995, %ax # imm = 0xBB3 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec3 ret <16 x i32> %res @@ -7172,9 +7172,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $2995, %ax # imm = 0xBB3 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -7184,10 +7184,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $18408, %ax # imm = 0x47E8 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec3 ret <16 x i32> %res @@ -7198,9 +7198,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $18408, %ax # imm = 0x47E8 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -7210,10 +7210,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $15737, %ax # imm = 0x3D79 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec3 ret <16 x i32> %res @@ -7224,9 +7224,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $15737, %ax # imm = 0x3D79 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -7234,8 +7234,8 @@ define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) { ; CHECK-LABEL: test_16xi32_shuff_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> ret <16 x i32> %res } @@ -7244,10 +7244,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3073, %ax # imm = 0xF3FF ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec3 ret <16 x i32> %res @@ -7258,9 +7258,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3073, %ax # imm = 0xF3FF ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer ret <16 x i32> %res @@ -7268,8 +7268,8 @@ define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; CHECK-LABEL: test_16xi32_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> ret <16 x i32> %res @@ -7279,10 +7279,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8166, %ax # imm = 0xE01A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec3 @@ -7294,9 +7294,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8166, %ax # imm = 0xE01A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -7308,10 +7308,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28302, %ax # imm = 0x9172 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec3 @@ -7323,9 +7323,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28302, %ax # imm = 0x9172 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -7337,10 +7337,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $27158, %ax # imm = 0x6A16 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec3 @@ -7352,9 +7352,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $27158, %ax # imm = 0x6A16 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -7364,8 +7364,8 @@ define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) { ; CHECK-LABEL: test_16xi32_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> ret <16 x i32> %res @@ -7375,10 +7375,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $26363, %ax # imm = 0x66FB ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> %vec3 @@ -7390,9 +7390,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $26363, %ax # imm = 0x66FB ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x i32>, <16 x i32>* %vec2p %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %res = select <16 x i1> , <16 x i32> %shuf, <16 x i32> zeroinitializer @@ -7403,7 +7403,7 @@ ; CHECK-LABEL: test_4xi64_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> ret <4 x i64> %res } @@ -7412,9 +7412,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec3 ret <4 x i64> %res @@ -7425,9 +7425,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer ret <4 x i64> %res @@ -7437,9 +7437,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec3 ret <4 x i64> %res @@ -7450,9 +7450,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer ret <4 x i64> %res @@ -7462,9 +7462,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec3 ret <4 x i64> %res @@ -7475,9 +7475,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer ret <4 x i64> %res @@ -7486,7 +7486,7 @@ ; CHECK-LABEL: test_4xi64_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> ret <4 x i64> %res } @@ -7495,9 +7495,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec3 ret <4 x i64> %res @@ -7508,9 +7508,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00] ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer ret <4 x i64> %res @@ -7518,8 +7518,8 @@ define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; CHECK-LABEL: test_4xi64_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> ret <4 x i64> %res @@ -7527,11 +7527,11 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3) { ; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec3 @@ -7541,11 +7541,11 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -7555,11 +7555,11 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3) { ; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec3 @@ -7569,11 +7569,11 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -7583,11 +7583,11 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3) { ; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec3 @@ -7597,11 +7597,11 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -7611,8 +7611,8 @@ define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; CHECK-LABEL: test_4xi64_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> ret <4 x i64> %res @@ -7620,11 +7620,11 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3) { ; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> %vec3 @@ -7634,11 +7634,11 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) { ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [3:1.00] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00] ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x i64>, <4 x i64>* %vec2p %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> %res = select <4 x i1> , <4 x i64> %shuf, <4 x i64> zeroinitializer @@ -7648,8 +7648,8 @@ define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) { ; CHECK-LABEL: test_8xi64_shuff_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> ret <8 x i64> %res } @@ -7657,10 +7657,10 @@ ; CHECK-LABEL: test_8xi64_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-15, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec3 ret <8 x i64> %res @@ -7670,9 +7670,9 @@ ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-15, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -7681,10 +7681,10 @@ ; CHECK-LABEL: test_8xi64_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-17, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec3 ret <8 x i64> %res @@ -7694,9 +7694,9 @@ ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-17, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -7705,10 +7705,10 @@ ; CHECK-LABEL: test_8xi64_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-24, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec3 ret <8 x i64> %res @@ -7718,9 +7718,9 @@ ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-24, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -7728,8 +7728,8 @@ define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) { ; CHECK-LABEL: test_8xi64_shuff_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> ret <8 x i64> %res } @@ -7737,10 +7737,10 @@ ; CHECK-LABEL: test_8xi64_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec3 ret <8 x i64> %res @@ -7750,9 +7750,9 @@ ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer ret <8 x i64> %res @@ -7760,8 +7760,8 @@ define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; CHECK-LABEL: test_8xi64_shuff_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> ret <8 x i64> %res @@ -7770,10 +7770,10 @@ ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec3 @@ -7784,9 +7784,9 @@ ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -7797,10 +7797,10 @@ ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec3 @@ -7811,9 +7811,9 @@ ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -7824,10 +7824,10 @@ ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $42, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec3 @@ -7838,9 +7838,9 @@ ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $42, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -7850,8 +7850,8 @@ define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) { ; CHECK-LABEL: test_8xi64_shuff_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> ret <8 x i64> %res @@ -7860,10 +7860,10 @@ ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> %vec3 @@ -7874,9 +7874,9 @@ ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x i64>, <8 x i64>* %vec2p %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> %res = select <8 x i1> , <8 x i64> %shuf, <8 x i64> zeroinitializer @@ -7887,7 +7887,7 @@ ; CHECK-LABEL: test_4xfloat_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res } @@ -7895,10 +7895,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 ret <4 x float> %res @@ -7908,9 +7908,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer ret <4 x float> %res @@ -7919,10 +7919,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 ret <4 x float> %res @@ -7932,9 +7932,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer ret <4 x float> %res @@ -7943,10 +7943,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 ret <4 x float> %res @@ -7956,9 +7956,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer ret <4 x float> %res @@ -7967,7 +7967,7 @@ ; CHECK-LABEL: test_4xfloat_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res } @@ -7975,10 +7975,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 ret <4 x float> %res @@ -7988,9 +7988,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer ret <4 x float> %res @@ -7998,8 +7998,8 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { ; CHECK-LABEL: test_4xfloat_unpack_low_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res @@ -8008,10 +8008,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 @@ -8022,9 +8022,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer @@ -8035,10 +8035,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 @@ -8049,9 +8049,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer @@ -8062,10 +8062,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 @@ -8076,9 +8076,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer @@ -8088,8 +8088,8 @@ define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { ; CHECK-LABEL: test_4xfloat_unpack_low_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res @@ -8098,10 +8098,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 @@ -8112,9 +8112,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer @@ -8125,7 +8125,7 @@ ; CHECK-LABEL: test_8xfloat_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } @@ -8133,10 +8133,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $122, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -8146,9 +8146,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $122, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -8157,10 +8157,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-107, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -8170,9 +8170,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-107, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -8181,10 +8181,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-25, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -8194,9 +8194,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-25, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -8205,7 +8205,7 @@ ; CHECK-LABEL: test_8xfloat_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } @@ -8213,10 +8213,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-127, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -8226,9 +8226,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-127, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -8236,8 +8236,8 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_unpack_low_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -8246,10 +8246,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $72, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -8260,9 +8260,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $72, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -8273,10 +8273,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-64, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -8287,9 +8287,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-64, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -8300,10 +8300,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -8314,9 +8314,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -8326,8 +8326,8 @@ define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_unpack_low_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -8336,10 +8336,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $64, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -8350,9 +8350,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $64, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -8362,8 +8362,8 @@ define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) { ; CHECK-LABEL: test_16xfloat_unpack_low_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } @@ -8372,10 +8372,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5916, %ax # imm = 0xE8E4 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -8386,9 +8386,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5916, %ax # imm = 0xE8E4 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -8398,10 +8398,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-1130, %ax # imm = 0xFB96 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -8412,9 +8412,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-1130, %ax # imm = 0xFB96 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -8424,10 +8424,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12439, %ax # imm = 0xCF69 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -8438,9 +8438,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12439, %ax # imm = 0xCF69 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -8448,8 +8448,8 @@ define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) { ; CHECK-LABEL: test_16xfloat_unpack_low_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } @@ -8458,10 +8458,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6413, %ax # imm = 0xE6F3 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -8472,9 +8472,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6413, %ax # imm = 0xE6F3 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -8482,8 +8482,8 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; CHECK-LABEL: test_16xfloat_unpack_low_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -8493,10 +8493,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $20326, %ax # imm = 0x4F66 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -8508,9 +8508,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $20326, %ax # imm = 0x4F66 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -8522,10 +8522,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-17707, %ax # imm = 0xBAD5 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -8537,9 +8537,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-17707, %ax # imm = 0xBAD5 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -8551,10 +8551,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6631, %ax # imm = 0xE619 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -8566,9 +8566,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6631, %ax # imm = 0xE619 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -8578,8 +8578,8 @@ define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; CHECK-LABEL: test_16xfloat_unpack_low_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -8589,10 +8589,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-20711, %ax # imm = 0xAF19 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -8604,9 +8604,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-20711, %ax # imm = 0xAF19 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -8617,7 +8617,7 @@ ; CHECK-LABEL: test_2xdouble_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> ret <2 x double> %res } @@ -8625,10 +8625,10 @@ ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> %vec3 ret <2 x double> %res @@ -8638,9 +8638,9 @@ ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> zeroinitializer ret <2 x double> %res @@ -8649,10 +8649,10 @@ ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> %vec3 ret <2 x double> %res @@ -8662,9 +8662,9 @@ ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> zeroinitializer ret <2 x double> %res @@ -8672,8 +8672,8 @@ define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { ; CHECK-LABEL: test_2xdouble_unpack_low_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> ret <2 x double> %res @@ -8682,10 +8682,10 @@ ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> %vec3 @@ -8696,9 +8696,9 @@ ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> zeroinitializer @@ -8709,10 +8709,10 @@ ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> %vec3 @@ -8723,9 +8723,9 @@ ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> zeroinitializer @@ -8736,7 +8736,7 @@ ; CHECK-LABEL: test_4xdouble_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } @@ -8744,10 +8744,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -8757,9 +8757,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -8768,10 +8768,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -8781,9 +8781,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -8792,10 +8792,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -8805,9 +8805,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -8816,7 +8816,7 @@ ; CHECK-LABEL: test_4xdouble_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } @@ -8824,10 +8824,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -8837,9 +8837,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -8847,8 +8847,8 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_unpack_low_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -8857,10 +8857,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -8871,9 +8871,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -8884,10 +8884,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -8898,9 +8898,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -8911,10 +8911,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -8925,9 +8925,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -8937,8 +8937,8 @@ define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_unpack_low_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -8947,10 +8947,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -8961,9 +8961,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -8973,8 +8973,8 @@ define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) { ; CHECK-LABEL: test_8xdouble_unpack_low_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } @@ -8982,10 +8982,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-73, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -8995,9 +8995,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-73, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -9006,10 +9006,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $102, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -9019,9 +9019,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $102, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -9030,10 +9030,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-46, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -9043,9 +9043,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-46, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -9053,8 +9053,8 @@ define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) { ; CHECK-LABEL: test_8xdouble_unpack_low_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } @@ -9062,10 +9062,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-86, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -9075,9 +9075,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-86, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -9085,8 +9085,8 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; CHECK-LABEL: test_8xdouble_unpack_low_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -9095,10 +9095,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -9109,9 +9109,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -9122,10 +9122,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -9136,9 +9136,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -9149,10 +9149,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -9163,9 +9163,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -9175,8 +9175,8 @@ define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; CHECK-LABEL: test_8xdouble_unpack_low_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -9185,10 +9185,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -9199,9 +9199,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -9212,7 +9212,7 @@ ; CHECK-LABEL: test_4xfloat_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res } @@ -9220,10 +9220,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 ret <4 x float> %res @@ -9233,9 +9233,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer ret <4 x float> %res @@ -9244,10 +9244,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 ret <4 x float> %res @@ -9257,9 +9257,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer ret <4 x float> %res @@ -9268,10 +9268,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 ret <4 x float> %res @@ -9281,9 +9281,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer ret <4 x float> %res @@ -9292,7 +9292,7 @@ ; CHECK-LABEL: test_4xfloat_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res } @@ -9300,10 +9300,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 ret <4 x float> %res @@ -9313,9 +9313,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer ret <4 x float> %res @@ -9323,8 +9323,8 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) { ; CHECK-LABEL: test_4xfloat_unpack_high_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res @@ -9333,10 +9333,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 @@ -9347,9 +9347,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer @@ -9360,10 +9360,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 @@ -9374,9 +9374,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer @@ -9387,10 +9387,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 @@ -9401,9 +9401,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer @@ -9413,8 +9413,8 @@ define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) { ; CHECK-LABEL: test_4xfloat_unpack_high_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> ret <4 x float> %res @@ -9423,10 +9423,10 @@ ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> %vec3 @@ -9437,9 +9437,9 @@ ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x float>, <4 x float>* %vec2p %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> %res = select <4 x i1> , <4 x float> %shuf, <4 x float> zeroinitializer @@ -9450,7 +9450,7 @@ ; CHECK-LABEL: test_8xfloat_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } @@ -9458,10 +9458,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $21, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -9471,9 +9471,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $21, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -9482,10 +9482,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $82, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -9495,9 +9495,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $82, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -9506,10 +9506,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-126, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -9519,9 +9519,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-126, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -9530,7 +9530,7 @@ ; CHECK-LABEL: test_8xfloat_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res } @@ -9538,10 +9538,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-19, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 ret <8 x float> %res @@ -9551,9 +9551,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-19, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res @@ -9561,8 +9561,8 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_unpack_high_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -9571,10 +9571,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $28, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -9585,9 +9585,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $28, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -9598,10 +9598,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-115, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -9612,9 +9612,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-115, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -9625,10 +9625,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -9639,9 +9639,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -9651,8 +9651,8 @@ define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) { ; CHECK-LABEL: test_8xfloat_unpack_high_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> ret <8 x float> %res @@ -9661,10 +9661,10 @@ ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> %vec3 @@ -9675,9 +9675,9 @@ ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x float>, <8 x float>* %vec2p %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> %res = select <8 x i1> , <8 x float> %shuf, <8 x float> zeroinitializer @@ -9687,8 +9687,8 @@ define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) { ; CHECK-LABEL: test_16xfloat_unpack_high_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } @@ -9697,10 +9697,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12160, %ax # imm = 0xD080 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -9711,9 +9711,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12160, %ax # imm = 0xD080 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -9723,10 +9723,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30129, %ax # imm = 0x8A4F ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -9737,9 +9737,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30129, %ax # imm = 0x8A4F ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -9749,10 +9749,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-2371, %ax # imm = 0xF6BD ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -9763,9 +9763,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-2371, %ax # imm = 0xF6BD ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -9773,8 +9773,8 @@ define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) { ; CHECK-LABEL: test_16xfloat_unpack_high_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res } @@ -9783,10 +9783,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26006, %ax # imm = 0x9A6A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: vmovaps %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 ret <16 x float> %res @@ -9797,9 +9797,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26006, %ax # imm = 0x9A6A ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res @@ -9807,8 +9807,8 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) { ; CHECK-LABEL: test_16xfloat_unpack_high_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -9818,10 +9818,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -9833,9 +9833,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -9847,10 +9847,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -9862,9 +9862,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -9876,10 +9876,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -9891,9 +9891,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -9903,8 +9903,8 @@ define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) { ; CHECK-LABEL: test_16xfloat_unpack_high_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> ret <16 x float> %res @@ -9914,10 +9914,10 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $25225, %ax # imm = 0x6289 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: vmovaps %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> %vec3 @@ -9929,9 +9929,9 @@ ; CHECK: # BB#0: ; CHECK-NEXT: movw $25225, %ax # imm = 0x6289 ; CHECK-NEXT: # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <16 x float>, <16 x float>* %vec2p %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> %res = select <16 x i1> , <16 x float> %shuf, <16 x float> zeroinitializer @@ -9942,7 +9942,7 @@ ; CHECK-LABEL: test_2xdouble_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> ret <2 x double> %res } @@ -9950,10 +9950,10 @@ ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> %vec3 ret <2 x double> %res @@ -9963,9 +9963,9 @@ ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> zeroinitializer ret <2 x double> %res @@ -9974,10 +9974,10 @@ ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> %vec3 ret <2 x double> %res @@ -9987,9 +9987,9 @@ ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> zeroinitializer ret <2 x double> %res @@ -9997,8 +9997,8 @@ define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) { ; CHECK-LABEL: test_2xdouble_unpack_high_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> ret <2 x double> %res @@ -10007,10 +10007,10 @@ ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> %vec3 @@ -10021,9 +10021,9 @@ ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> zeroinitializer @@ -10034,10 +10034,10 @@ ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> %vec3 @@ -10048,9 +10048,9 @@ ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <2 x double>, <2 x double>* %vec2p %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> %res = select <2 x i1> , <2 x double> %shuf, <2 x double> zeroinitializer @@ -10061,7 +10061,7 @@ ; CHECK-LABEL: test_4xdouble_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } @@ -10069,10 +10069,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -10082,9 +10082,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -10093,10 +10093,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -10106,9 +10106,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -10117,10 +10117,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -10130,9 +10130,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -10141,7 +10141,7 @@ ; CHECK-LABEL: test_4xdouble_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res } @@ -10149,10 +10149,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 ret <4 x double> %res @@ -10162,9 +10162,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res @@ -10172,8 +10172,8 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_unpack_high_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -10182,10 +10182,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -10196,9 +10196,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -10209,10 +10209,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -10223,9 +10223,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -10236,10 +10236,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -10250,9 +10250,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -10262,8 +10262,8 @@ define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) { ; CHECK-LABEL: test_4xdouble_unpack_high_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> ret <4 x double> %res @@ -10272,10 +10272,10 @@ ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> %vec3 @@ -10286,9 +10286,9 @@ ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <4 x double>, <4 x double>* %vec2p %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> %res = select <4 x i1> , <4 x double> %shuf, <4 x double> zeroinitializer @@ -10298,8 +10298,8 @@ define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) { ; CHECK-LABEL: test_8xdouble_unpack_high_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } @@ -10307,10 +10307,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-27, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -10320,9 +10320,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-27, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -10331,10 +10331,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-21, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -10344,9 +10344,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-21, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -10355,10 +10355,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-118, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -10368,9 +10368,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-118, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -10378,8 +10378,8 @@ define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) { ; CHECK-LABEL: test_8xdouble_unpack_high_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res } @@ -10387,10 +10387,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $100, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 ret <8 x double> %res @@ -10400,9 +10400,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $100, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res @@ -10410,8 +10410,8 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) { ; CHECK-LABEL: test_8xdouble_unpack_high_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -10420,10 +10420,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -10434,9 +10434,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -10447,10 +10447,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $71, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -10461,9 +10461,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $71, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -10474,10 +10474,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-49, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -10488,9 +10488,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-49, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer @@ -10500,8 +10500,8 @@ define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) { ; CHECK-LABEL: test_8xdouble_unpack_high_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> ret <8 x double> %res @@ -10510,10 +10510,10 @@ ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-40, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> %vec3 @@ -10524,9 +10524,9 @@ ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-40, %al # sched: [1:0.25] -; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] -; CHECK-NEXT: retq # sched: [2:1.00] +; CHECK-NEXT: kmovd %eax, %k1 # sched: [1:1.00] +; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00] +; CHECK-NEXT: retq # sched: [7:1.00] %vec2 = load <8 x double>, <8 x double>* %vec2p %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> %res = select <8 x i1> , <8 x double> %shuf, <8 x double> zeroinitializer Index: llvm/trunk/test/CodeGen/X86/fma-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fma-schedule.ll +++ llvm/trunk/test/CodeGen/X86/fma-schedule.ll @@ -41,9 +41,9 @@ ; ; SKX-LABEL: test_vfmadd213pd: ; SKX: # BB#0: -; SKX-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmadd213pd: ; ZNVER1: # BB#0: @@ -83,9 +83,9 @@ ; ; SKX-LABEL: test_vfmadd213pd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmadd213pd_ymm: ; ZNVER1: # BB#0: @@ -125,9 +125,9 @@ ; ; SKX-LABEL: test_vfmadd213ps: ; SKX: # BB#0: -; SKX-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmadd213ps: ; ZNVER1: # BB#0: @@ -167,9 +167,9 @@ ; ; SKX-LABEL: test_vfmadd213ps_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmadd213ps_ymm: ; ZNVER1: # BB#0: @@ -209,9 +209,9 @@ ; ; SKX-LABEL: test_vfmadd213sd: ; SKX: # BB#0: -; SKX-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmadd213sd: ; ZNVER1: # BB#0: @@ -251,9 +251,9 @@ ; ; SKX-LABEL: test_vfmadd213ss: ; SKX: # BB#0: -; SKX-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmadd213ss: ; ZNVER1: # BB#0: @@ -305,9 +305,9 @@ ; ; SKX-LABEL: test_vfmaddsubpd: ; SKX: # BB#0: -; SKX-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmaddsubpd: ; ZNVER1: # BB#0: @@ -347,9 +347,9 @@ ; ; SKX-LABEL: test_vfmaddsubpd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmaddsubpd_ymm: ; ZNVER1: # BB#0: @@ -389,9 +389,9 @@ ; ; SKX-LABEL: test_vfmaddsubps: ; SKX: # BB#0: -; SKX-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmaddsubps: ; ZNVER1: # BB#0: @@ -431,9 +431,9 @@ ; ; SKX-LABEL: test_vfmaddsubps_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmaddsubps_ymm: ; ZNVER1: # BB#0: @@ -485,9 +485,9 @@ ; ; SKX-LABEL: test_vfmsubaddpd: ; SKX: # BB#0: -; SKX-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsubaddpd: ; ZNVER1: # BB#0: @@ -527,9 +527,9 @@ ; ; SKX-LABEL: test_vfmsubaddpd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsubaddpd_ymm: ; ZNVER1: # BB#0: @@ -569,9 +569,9 @@ ; ; SKX-LABEL: test_vfmsubaddps: ; SKX: # BB#0: -; SKX-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsubaddps: ; ZNVER1: # BB#0: @@ -611,9 +611,9 @@ ; ; SKX-LABEL: test_vfmsubaddps_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsubaddps_ymm: ; ZNVER1: # BB#0: @@ -665,9 +665,9 @@ ; ; SKX-LABEL: test_vfmsub213pd: ; SKX: # BB#0: -; SKX-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsub213pd: ; ZNVER1: # BB#0: @@ -707,9 +707,9 @@ ; ; SKX-LABEL: test_vfmsub213pd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsub213pd_ymm: ; ZNVER1: # BB#0: @@ -749,9 +749,9 @@ ; ; SKX-LABEL: test_vfmsub213ps: ; SKX: # BB#0: -; SKX-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsub213ps: ; ZNVER1: # BB#0: @@ -791,9 +791,9 @@ ; ; SKX-LABEL: test_vfmsub213ps_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsub213ps_ymm: ; ZNVER1: # BB#0: @@ -833,9 +833,9 @@ ; ; SKX-LABEL: test_vfmsub213sd: ; SKX: # BB#0: -; SKX-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsub213sd: ; ZNVER1: # BB#0: @@ -875,9 +875,9 @@ ; ; SKX-LABEL: test_vfmsub213ss: ; SKX: # BB#0: -; SKX-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfmsub213ss: ; ZNVER1: # BB#0: @@ -929,9 +929,9 @@ ; ; SKX-LABEL: test_vfnmadd213pd: ; SKX: # BB#0: -; SKX-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmadd213pd: ; ZNVER1: # BB#0: @@ -971,9 +971,9 @@ ; ; SKX-LABEL: test_vfnmadd213pd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmadd213pd_ymm: ; ZNVER1: # BB#0: @@ -1013,9 +1013,9 @@ ; ; SKX-LABEL: test_vfnmadd213ps: ; SKX: # BB#0: -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmadd213ps: ; ZNVER1: # BB#0: @@ -1055,9 +1055,9 @@ ; ; SKX-LABEL: test_vfnmadd213ps_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmadd213ps_ymm: ; ZNVER1: # BB#0: @@ -1097,9 +1097,9 @@ ; ; SKX-LABEL: test_vfnmadd213sd: ; SKX: # BB#0: -; SKX-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmadd213sd: ; ZNVER1: # BB#0: @@ -1139,9 +1139,9 @@ ; ; SKX-LABEL: test_vfnmadd213ss: ; SKX: # BB#0: -; SKX-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmadd213ss: ; ZNVER1: # BB#0: @@ -1193,9 +1193,9 @@ ; ; SKX-LABEL: test_vfnmsub213pd: ; SKX: # BB#0: -; SKX-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmsub213pd: ; ZNVER1: # BB#0: @@ -1235,9 +1235,9 @@ ; ; SKX-LABEL: test_vfnmsub213pd_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmsub213pd_ymm: ; ZNVER1: # BB#0: @@ -1277,9 +1277,9 @@ ; ; SKX-LABEL: test_vfnmsub213ps: ; SKX: # BB#0: -; SKX-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmsub213ps: ; ZNVER1: # BB#0: @@ -1319,9 +1319,9 @@ ; ; SKX-LABEL: test_vfnmsub213ps_ymm: ; SKX: # BB#0: -; SKX-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmsub213ps_ymm: ; ZNVER1: # BB#0: @@ -1361,9 +1361,9 @@ ; ; SKX-LABEL: test_vfnmsub213sd: ; SKX: # BB#0: -; SKX-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmsub213sd: ; ZNVER1: # BB#0: @@ -1403,9 +1403,9 @@ ; ; SKX-LABEL: test_vfnmsub213ss: ; SKX: # BB#0: -; SKX-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_vfnmsub213ss: ; ZNVER1: # BB#0: Index: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll +++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll @@ -69,9 +69,9 @@ ; ; SKX-LABEL: f32_no_estimate: ; SKX: # BB#0: -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [11:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 1.0, %x ret float %div } @@ -151,10 +151,10 @@ ; ; SKX-LABEL: f32_one_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 1.0, %x ret float %div } @@ -268,14 +268,14 @@ ; ; SKX-LABEL: f32_two_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 1.0, %x ret float %div } @@ -332,9 +332,9 @@ ; ; SKX-LABEL: v4f32_no_estimate: ; SKX: # BB#0: -; SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [1:0.50] +; SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50] ; SKX-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [11:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -416,10 +416,10 @@ ; ; SKX-LABEL: v4f32_one_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -533,14 +533,14 @@ ; ; SKX-LABEL: v4f32_two_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50] +; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -600,9 +600,9 @@ ; ; SKX-LABEL: v8f32_no_estimate: ; SKX: # BB#0: -; SKX-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [1:0.50] +; SKX-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50] ; SKX-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [11:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -691,10 +691,10 @@ ; ; SKX-LABEL: v8f32_one_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 -; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -821,14 +821,14 @@ ; ; SKX-LABEL: v8f32_two_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 -; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50] +; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } Index: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll +++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll @@ -62,9 +62,9 @@ ; ; SKX-LABEL: f32_no_step_2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 -; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] +; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 1234.0, %x ret float %div } @@ -152,11 +152,11 @@ ; ; SKX-LABEL: f32_one_step_2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 3456.0, %x ret float %div } @@ -252,12 +252,12 @@ ; ; SKX-LABEL: f32_one_step_2_divs: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [4:0.50] -; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [9:0.50] +; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] +; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 3456.0, %x %div2 = fdiv fast float %div, %x ret float %div2 @@ -380,15 +380,15 @@ ; ; SKX-LABEL: f32_two_step_2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 6789.0, %x ret float %div } @@ -478,11 +478,11 @@ ; ; SKX-LABEL: v4f32_one_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -580,12 +580,12 @@ ; ; SKX-LABEL: v4f32_one_step_2_divs: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [4:0.50] -; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 # sched: [10:0.50] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50] +; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x %div2 = fdiv fast <4 x float> %div, %x ret <4 x float> %div2 @@ -708,15 +708,15 @@ ; ; SKX-LABEL: v4f32_two_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [1:0.50] +; SKX-NEXT: vrcp14ps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -814,11 +814,11 @@ ; ; SKX-LABEL: v8f32_one_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 -; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -925,12 +925,12 @@ ; ; SKX-LABEL: v8f32_one_step_2_divs: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 -; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [4:0.50] -; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 # sched: [11:0.50] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [11:0.50] +; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x %div2 = fdiv fast <8 x float> %div, %x ret <8 x float> %div2 @@ -1067,15 +1067,15 @@ ; ; SKX-LABEL: v8f32_two_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm1 -; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [1:0.50] +; SKX-NEXT: vrcp14ps %ymm0, %ymm1 # sched: [4:1.00] +; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [4:0.50] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [4:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -1124,8 +1124,8 @@ ; ; SKX-LABEL: v8f32_no_step: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -1183,9 +1183,9 @@ ; ; SKX-LABEL: v8f32_no_step2: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %ymm0, %ymm0 -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %ymm0, %ymm0 # sched: [4:1.00] +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] +; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } Index: llvm/trunk/test/CodeGen/X86/sse-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll @@ -49,9 +49,9 @@ ; ; SKX-LABEL: test_addps: ; SKX: # BB#0: -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addps: ; BTVER2: # BB#0: @@ -109,9 +109,9 @@ ; ; SKX-LABEL: test_addss: ; SKX: # BB#0: -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addss: ; BTVER2: # BB#0: @@ -173,9 +173,9 @@ ; ; SKX-LABEL: test_andps: ; SKX: # BB#0: -; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andps: ; BTVER2: # BB#0: @@ -241,9 +241,9 @@ ; ; SKX-LABEL: test_andnotps: ; SKX: # BB#0: -; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # BB#0: @@ -313,11 +313,11 @@ ; ; SKX-LABEL: test_cmpps: ; SKX: # BB#0: -; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 -; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %k1 -; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %k1 # sched: [9:1.00] +; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: @@ -382,7 +382,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cmpss: ; BTVER2: # BB#0: @@ -494,16 +494,16 @@ ; SKX-LABEL: test_comiss: ; SKX: # BB#0: ; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; SKX-NEXT: setnp %al # sched: [1:1.00] -; SKX-NEXT: sete %cl # sched: [1:1.00] +; SKX-NEXT: setnp %al # sched: [1:0.50] +; SKX-NEXT: sete %cl # sched: [1:0.50] ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] ; SKX-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] -; SKX-NEXT: setnp %al # sched: [1:1.00] -; SKX-NEXT: sete %dl # sched: [1:1.00] +; SKX-NEXT: setnp %al # sched: [1:0.50] +; SKX-NEXT: sete %dl # sched: [1:0.50] ; SKX-NEXT: andb %al, %dl # sched: [1:0.25] ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_comiss: ; BTVER2: # BB#0: @@ -587,8 +587,8 @@ ; SKX: # BB#0: ; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtsi2ss: ; BTVER2: # BB#0: @@ -657,8 +657,8 @@ ; SKX: # BB#0: ; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] ; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtsi2ssq: ; BTVER2: # BB#0: @@ -726,9 +726,9 @@ ; SKX-LABEL: test_cvtss2si: ; SKX: # BB#0: ; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] -; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [6:1.00] +; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00] ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtss2si: ; BTVER2: # BB#0: @@ -799,9 +799,9 @@ ; SKX-LABEL: test_cvtss2siq: ; SKX: # BB#0: ; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] -; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [6:1.00] +; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00] ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtss2siq: ; BTVER2: # BB#0: @@ -872,9 +872,9 @@ ; SKX-LABEL: test_cvttss2si: ; SKX: # BB#0: ; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] -; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [6:1.00] +; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00] ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvttss2si: ; BTVER2: # BB#0: @@ -942,9 +942,9 @@ ; SKX-LABEL: test_cvttss2siq: ; SKX: # BB#0: ; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] -; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [6:1.00] +; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00] ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvttss2siq: ; BTVER2: # BB#0: @@ -1006,8 +1006,8 @@ ; SKX-LABEL: test_divps: ; SKX: # BB#0: ; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] -; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_divps: ; BTVER2: # BB#0: @@ -1066,8 +1066,8 @@ ; SKX-LABEL: test_divss: ; SKX: # BB#0: ; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00] -; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_divss: ; BTVER2: # BB#0: @@ -1126,8 +1126,8 @@ ; SKX-LABEL: test_ldmxcsr: ; SKX: # BB#0: ; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ldmxcsr: ; BTVER2: # BB#0: @@ -1189,7 +1189,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: @@ -1249,8 +1249,8 @@ ; SKX-LABEL: test_maxss: ; SKX: # BB#0: ; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maxss: ; BTVER2: # BB#0: @@ -1311,7 +1311,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: @@ -1371,8 +1371,8 @@ ; SKX-LABEL: test_minss: ; SKX: # BB#0: ; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_minss: ; BTVER2: # BB#0: @@ -1437,10 +1437,10 @@ ; ; SKX-LABEL: test_movaps: ; SKX: # BB#0: -; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [1:0.50] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movaps: ; BTVER2: # BB#0: @@ -1503,7 +1503,7 @@ ; SKX-LABEL: test_movhlps: ; SKX: # BB#0: ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movhlps: ; BTVER2: # BB#0: @@ -1567,10 +1567,10 @@ ; ; SKX-LABEL: test_movhps: ; SKX: # BB#0: -; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movhps: ; BTVER2: # BB#0: @@ -1637,8 +1637,8 @@ ; SKX-LABEL: test_movlhps: ; SKX: # BB#0: ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movlhps: ; BTVER2: # BB#0: @@ -1701,10 +1701,10 @@ ; ; SKX-LABEL: test_movlps: ; SKX: # BB#0: -; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movlps: ; BTVER2: # BB#0: @@ -1765,7 +1765,7 @@ ; SKX-LABEL: test_movmskps: ; SKX: # BB#0: ; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: @@ -1821,7 +1821,7 @@ ; SKX-LABEL: test_movntps: ; SKX: # BB#0: ; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: @@ -1881,10 +1881,10 @@ ; ; SKX-LABEL: test_movss_mem: ; SKX: # BB#0: -; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:0.50] -; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movss_mem: ; BTVER2: # BB#0: @@ -1945,7 +1945,7 @@ ; SKX-LABEL: test_movss_reg: ; SKX: # BB#0: ; SKX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movss_reg: ; BTVER2: # BB#0: @@ -2005,10 +2005,10 @@ ; ; SKX-LABEL: test_movups: ; SKX: # BB#0: -; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [1:0.50] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movups: ; BTVER2: # BB#0: @@ -2068,9 +2068,9 @@ ; ; SKX-LABEL: test_mulps: ; SKX: # BB#0: -; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mulps: ; BTVER2: # BB#0: @@ -2128,9 +2128,9 @@ ; ; SKX-LABEL: test_mulss: ; SKX: # BB#0: -; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mulss: ; BTVER2: # BB#0: @@ -2192,9 +2192,9 @@ ; ; SKX-LABEL: test_orps: ; SKX: # BB#0: -; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_orps: ; BTVER2: # BB#0: @@ -2256,8 +2256,8 @@ ; ; SKX-LABEL: test_prefetchnta: ; SKX: # BB#0: -; SKX-NEXT: prefetchnta (%rdi) # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_prefetchnta: ; BTVER2: # BB#0: @@ -2320,10 +2320,10 @@ ; ; SKX-LABEL: test_rcpps: ; SKX: # BB#0: -; SKX-NEXT: vrcp14ps %xmm0, %xmm0 -; SKX-NEXT: vrcp14ps (%rdi), %xmm1 -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrcp14ps %xmm0, %xmm0 # sched: [4:1.00] +; SKX-NEXT: vrcp14ps (%rdi), %xmm1 # sched: [10:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # BB#0: @@ -2400,10 +2400,10 @@ ; SKX-LABEL: test_rcpss: ; SKX: # BB#0: ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rcpss: ; BTVER2: # BB#0: @@ -2477,10 +2477,10 @@ ; ; SKX-LABEL: test_rsqrtps: ; SKX: # BB#0: -; SKX-NEXT: vrsqrt14ps %xmm0, %xmm0 -; SKX-NEXT: vrsqrt14ps (%rdi), %xmm1 -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vrsqrt14ps %xmm0, %xmm0 # sched: [4:1.00] +; SKX-NEXT: vrsqrt14ps (%rdi), %xmm1 # sched: [10:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # BB#0: @@ -2557,10 +2557,10 @@ ; SKX-LABEL: test_rsqrtss: ; SKX: # BB#0: ; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rsqrtss: ; BTVER2: # BB#0: @@ -2626,8 +2626,8 @@ ; ; SKX-LABEL: test_sfence: ; SKX: # BB#0: -; SKX-NEXT: sfence # sched: [1:0.33] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: sfence # sched: [2:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sfence: ; BTVER2: # BB#0: @@ -2687,8 +2687,8 @@ ; SKX-LABEL: test_shufps: ; SKX: # BB#0: ; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_shufps: ; BTVER2: # BB#0: @@ -2754,9 +2754,9 @@ ; SKX-LABEL: test_sqrtps: ; SKX: # BB#0: ; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00] -; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [12:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # BB#0: @@ -2833,10 +2833,10 @@ ; SKX-LABEL: test_sqrtss: ; SKX: # BB#0: ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00] -; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [1:0.50] +; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] ; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtss: ; BTVER2: # BB#0: @@ -2900,9 +2900,9 @@ ; ; SKX-LABEL: test_stmxcsr: ; SKX: # BB#0: -; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_stmxcsr: ; BTVER2: # BB#0: @@ -2962,9 +2962,9 @@ ; ; SKX-LABEL: test_subps: ; SKX: # BB#0: -; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_subps: ; BTVER2: # BB#0: @@ -3022,9 +3022,9 @@ ; ; SKX-LABEL: test_subss: ; SKX: # BB#0: -; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_subss: ; BTVER2: # BB#0: @@ -3131,16 +3131,16 @@ ; SKX-LABEL: test_ucomiss: ; SKX: # BB#0: ; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; SKX-NEXT: setnp %al # sched: [1:1.00] -; SKX-NEXT: sete %cl # sched: [1:1.00] +; SKX-NEXT: setnp %al # sched: [1:0.50] +; SKX-NEXT: sete %cl # sched: [1:0.50] ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] ; SKX-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] -; SKX-NEXT: setnp %al # sched: [1:1.00] -; SKX-NEXT: sete %dl # sched: [1:1.00] +; SKX-NEXT: setnp %al # sched: [1:0.50] +; SKX-NEXT: sete %dl # sched: [1:0.50] ; SKX-NEXT: andb %al, %dl # sched: [1:0.25] ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ucomiss: ; BTVER2: # BB#0: @@ -3221,8 +3221,8 @@ ; SKX-LABEL: test_unpckhps: ; SKX: # BB#0: ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # BB#0: @@ -3285,8 +3285,8 @@ ; SKX-LABEL: test_unpcklps: ; SKX: # BB#0: ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # BB#0: @@ -3348,9 +3348,9 @@ ; ; SKX-LABEL: test_xorps: ; SKX: # BB#0: -; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_xorps: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll @@ -49,9 +49,9 @@ ; ; SKX-LABEL: test_addpd: ; SKX: # BB#0: -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addpd: ; BTVER2: # BB#0: @@ -109,9 +109,9 @@ ; ; SKX-LABEL: test_addsd: ; SKX: # BB#0: -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addsd: ; BTVER2: # BB#0: @@ -175,10 +175,10 @@ ; ; SKX-LABEL: test_andpd: ; SKX: # BB#0: -; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andpd: ; BTVER2: # BB#0: @@ -249,10 +249,10 @@ ; ; SKX-LABEL: test_andnotpd: ; SKX: # BB#0: -; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # BB#0: @@ -325,11 +325,11 @@ ; ; SKX-LABEL: test_cmppd: ; SKX: # BB#0: -; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 -; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %k1 -; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %k1 # sched: [9:1.00] +; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2q %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: @@ -394,7 +394,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cmpsd: ; BTVER2: # BB#0: @@ -506,16 +506,16 @@ ; SKX-LABEL: test_comisd: ; SKX: # BB#0: ; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; SKX-NEXT: setnp %al # sched: [1:1.00] -; SKX-NEXT: sete %cl # sched: [1:1.00] +; SKX-NEXT: setnp %al # sched: [1:0.50] +; SKX-NEXT: sete %cl # sched: [1:0.50] ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] ; SKX-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; SKX-NEXT: setnp %al # sched: [1:1.00] -; SKX-NEXT: sete %dl # sched: [1:1.00] +; SKX-NEXT: setnp %al # sched: [1:0.50] +; SKX-NEXT: sete %dl # sched: [1:0.50] ; SKX-NEXT: andb %al, %dl # sched: [1:0.25] ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_comisd: ; BTVER2: # BB#0: @@ -598,9 +598,9 @@ ; SKX-LABEL: test_cvtdq2pd: ; SKX: # BB#0: ; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: @@ -671,9 +671,9 @@ ; SKX-LABEL: test_cvtdq2ps: ; SKX: # BB#0: ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [4:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: @@ -743,8 +743,8 @@ ; SKX: # BB#0: ; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # BB#0: @@ -815,8 +815,8 @@ ; SKX: # BB#0: ; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: @@ -886,9 +886,9 @@ ; SKX-LABEL: test_cvtps2dq: ; SKX: # BB#0: ; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [4:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # BB#0: @@ -958,9 +958,9 @@ ; SKX-LABEL: test_cvtps2pd: ; SKX: # BB#0: ; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [4:0.50] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtps2pd: ; BTVER2: # BB#0: @@ -1030,9 +1030,9 @@ ; SKX-LABEL: test_cvtsd2si: ; SKX: # BB#0: ; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-NEXT: vcvtsd2si (%rdi), %eax # sched: [6:1.00] +; SKX-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00] ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtsd2si: ; BTVER2: # BB#0: @@ -1103,9 +1103,9 @@ ; SKX-LABEL: test_cvtsd2siq: ; SKX: # BB#0: ; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-NEXT: vcvtsd2si (%rdi), %rax # sched: [6:1.00] +; SKX-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00] ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtsd2siq: ; BTVER2: # BB#0: @@ -1183,10 +1183,10 @@ ; SKX-LABEL: test_cvtsd2ss: ; SKX: # BB#0: ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] +; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtsd2ss: ; BTVER2: # BB#0: @@ -1257,8 +1257,8 @@ ; SKX: # BB#0: ; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtsi2sd: ; BTVER2: # BB#0: @@ -1327,8 +1327,8 @@ ; SKX: # BB#0: ; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtsi2sdq: ; BTVER2: # BB#0: @@ -1405,10 +1405,10 @@ ; SKX-LABEL: test_cvtss2sd: ; SKX: # BB#0: ; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:0.50] +; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtss2sd: ; BTVER2: # BB#0: @@ -1480,8 +1480,8 @@ ; SKX: # BB#0: ; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvttpd2dq: ; BTVER2: # BB#0: @@ -1552,9 +1552,9 @@ ; SKX-LABEL: test_cvttps2dq: ; SKX: # BB#0: ; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [4:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvttps2dq: ; BTVER2: # BB#0: @@ -1622,9 +1622,9 @@ ; SKX-LABEL: test_cvttsd2si: ; SKX: # BB#0: ; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-NEXT: vcvttsd2si (%rdi), %eax # sched: [6:1.00] +; SKX-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00] ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvttsd2si: ; BTVER2: # BB#0: @@ -1692,9 +1692,9 @@ ; SKX-LABEL: test_cvttsd2siq: ; SKX: # BB#0: ; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-NEXT: vcvttsd2si (%rdi), %rax # sched: [6:1.00] +; SKX-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00] ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvttsd2siq: ; BTVER2: # BB#0: @@ -1756,8 +1756,8 @@ ; SKX-LABEL: test_divpd: ; SKX: # BB#0: ; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] -; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_divpd: ; BTVER2: # BB#0: @@ -1816,8 +1816,8 @@ ; SKX-LABEL: test_divsd: ; SKX: # BB#0: ; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] -; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_divsd: ; BTVER2: # BB#0: @@ -1876,7 +1876,7 @@ ; SKX-LABEL: test_lfence: ; SKX: # BB#0: ; SKX-NEXT: lfence # sched: [2:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lfence: ; BTVER2: # BB#0: @@ -1931,8 +1931,8 @@ ; ; SKX-LABEL: test_mfence: ; SKX: # BB#0: -; SKX-NEXT: mfence # sched: [2:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: mfence # sched: [3:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mfence: ; BTVER2: # BB#0: @@ -1985,8 +1985,8 @@ ; ; SKX-LABEL: test_maskmovdqu: ; SKX: # BB#0: -; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maskmovdqu: ; BTVER2: # BB#0: @@ -2043,7 +2043,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: @@ -2103,8 +2103,8 @@ ; SKX-LABEL: test_maxsd: ; SKX: # BB#0: ; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_maxsd: ; BTVER2: # BB#0: @@ -2165,7 +2165,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: @@ -2225,8 +2225,8 @@ ; SKX-LABEL: test_minsd: ; SKX: # BB#0: ; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_minsd: ; BTVER2: # BB#0: @@ -2291,10 +2291,10 @@ ; ; SKX-LABEL: test_movapd: ; SKX: # BB#0: -; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movapd: ; BTVER2: # BB#0: @@ -2360,10 +2360,10 @@ ; ; SKX-LABEL: test_movdqa: ; SKX: # BB#0: -; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movdqa: ; BTVER2: # BB#0: @@ -2429,10 +2429,10 @@ ; ; SKX-LABEL: test_movdqu: ; SKX: # BB#0: -; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] +; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movdqu: ; BTVER2: # BB#0: @@ -2516,13 +2516,13 @@ ; ; SKX-LABEL: test_movd: ; SKX: # BB#0: -; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [1:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-NEXT: vmovd %edi, %xmm2 # sched: [1:1.00] +; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm2 # sched: [1:0.33] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovd %xmm2, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movd: ; BTVER2: # BB#0: @@ -2617,13 +2617,13 @@ ; ; SKX-LABEL: test_movd_64: ; SKX: # BB#0: -; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; SKX-NEXT: vmovq %rdi, %xmm2 # sched: [1:1.00] +; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm2 # sched: [1:0.33] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovq %xmm2, (%rsi) # sched: [1:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movd_64: ; BTVER2: # BB#0: @@ -2700,10 +2700,10 @@ ; ; SKX-LABEL: test_movhpd: ; SKX: # BB#0: -; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movhpd: ; BTVER2: # BB#0: @@ -2772,10 +2772,10 @@ ; ; SKX-LABEL: test_movlpd: ; SKX: # BB#0: -; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movlpd: ; BTVER2: # BB#0: @@ -2835,7 +2835,7 @@ ; SKX-LABEL: test_movmskpd: ; SKX: # BB#0: ; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: @@ -2892,9 +2892,9 @@ ; ; SKX-LABEL: test_movntdqa: ; SKX: # BB#0: -; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # BB#0: @@ -2951,9 +2951,9 @@ ; ; SKX-LABEL: test_movntpd: ; SKX: # BB#0: -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: @@ -3016,10 +3016,10 @@ ; ; SKX-LABEL: test_movq_mem: ; SKX: # BB#0: -; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movq_mem: ; BTVER2: # BB#0: @@ -3083,9 +3083,9 @@ ; ; SKX-LABEL: test_movq_reg: ; SKX: # BB#0: -; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movq_reg: ; BTVER2: # BB#0: @@ -3148,10 +3148,10 @@ ; ; SKX-LABEL: test_movsd_mem: ; SKX: # BB#0: -; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [1:0.50] -; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movsd_mem: ; BTVER2: # BB#0: @@ -3213,7 +3213,7 @@ ; SKX-LABEL: test_movsd_reg: ; SKX: # BB#0: ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movsd_reg: ; BTVER2: # BB#0: @@ -3273,10 +3273,10 @@ ; ; SKX-LABEL: test_movupd: ; SKX: # BB#0: -; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movupd: ; BTVER2: # BB#0: @@ -3336,9 +3336,9 @@ ; ; SKX-LABEL: test_mulpd: ; SKX: # BB#0: -; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # BB#0: @@ -3396,9 +3396,9 @@ ; ; SKX-LABEL: test_mulsd: ; SKX: # BB#0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mulsd: ; BTVER2: # BB#0: @@ -3462,10 +3462,10 @@ ; ; SKX-LABEL: test_orpd: ; SKX: # BB#0: -; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_orpd: ; BTVER2: # BB#0: @@ -3535,8 +3535,8 @@ ; SKX-LABEL: test_packssdw: ; SKX: # BB#0: ; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_packssdw: ; BTVER2: # BB#0: @@ -3601,8 +3601,8 @@ ; SKX-LABEL: test_packsswb: ; SKX: # BB#0: ; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_packsswb: ; BTVER2: # BB#0: @@ -3667,8 +3667,8 @@ ; SKX-LABEL: test_packuswb: ; SKX: # BB#0: ; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_packuswb: ; BTVER2: # BB#0: @@ -3732,9 +3732,9 @@ ; ; SKX-LABEL: test_paddb: ; SKX: # BB#0: -; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_paddb: ; BTVER2: # BB#0: @@ -3796,9 +3796,9 @@ ; ; SKX-LABEL: test_paddd: ; SKX: # BB#0: -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_paddd: ; BTVER2: # BB#0: @@ -3856,9 +3856,9 @@ ; ; SKX-LABEL: test_paddq: ; SKX: # BB#0: -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_paddq: ; BTVER2: # BB#0: @@ -3920,9 +3920,9 @@ ; ; SKX-LABEL: test_paddsb: ; SKX: # BB#0: -; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_paddsb: ; BTVER2: # BB#0: @@ -3985,9 +3985,9 @@ ; ; SKX-LABEL: test_paddsw: ; SKX: # BB#0: -; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_paddsw: ; BTVER2: # BB#0: @@ -4050,9 +4050,9 @@ ; ; SKX-LABEL: test_paddusb: ; SKX: # BB#0: -; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_paddusb: ; BTVER2: # BB#0: @@ -4115,9 +4115,9 @@ ; ; SKX-LABEL: test_paddusw: ; SKX: # BB#0: -; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_paddusw: ; BTVER2: # BB#0: @@ -4180,9 +4180,9 @@ ; ; SKX-LABEL: test_paddw: ; SKX: # BB#0: -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_paddw: ; BTVER2: # BB#0: @@ -4246,10 +4246,10 @@ ; ; SKX-LABEL: test_pand: ; SKX: # BB#0: -; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pand: ; BTVER2: # BB#0: @@ -4322,10 +4322,10 @@ ; ; SKX-LABEL: test_pandn: ; SKX: # BB#0: -; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pandn: ; BTVER2: # BB#0: @@ -4392,9 +4392,9 @@ ; ; SKX-LABEL: test_pavgb: ; SKX: # BB#0: -; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pavgb: ; BTVER2: # BB#0: @@ -4466,9 +4466,9 @@ ; ; SKX-LABEL: test_pavgw: ; SKX: # BB#0: -; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pavgw: ; BTVER2: # BB#0: @@ -4544,11 +4544,11 @@ ; ; SKX-LABEL: test_pcmpeqb: ; SKX: # BB#0: -; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %k1 -; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %k1 # sched: [9:1.00] +; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpeqb: ; BTVER2: # BB#0: @@ -4618,11 +4618,11 @@ ; ; SKX-LABEL: test_pcmpeqd: ; SKX: # BB#0: -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 -; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00] +; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpeqd: ; BTVER2: # BB#0: @@ -4692,11 +4692,11 @@ ; ; SKX-LABEL: test_pcmpeqw: ; SKX: # BB#0: -; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 -; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 -; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 # sched: [9:1.00] +; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2w %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpeqw: ; BTVER2: # BB#0: @@ -4767,11 +4767,11 @@ ; ; SKX-LABEL: test_pcmpgtb: ; SKX: # BB#0: -; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 -; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 -; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 # sched: [9:1.00] +; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpgtb: ; BTVER2: # BB#0: @@ -4842,11 +4842,11 @@ ; ; SKX-LABEL: test_pcmpgtd: ; SKX: # BB#0: -; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 -; SKX-NEXT: korw %k1, %k0, %k0 +; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00] +; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpgtd: ; BTVER2: # BB#0: @@ -4917,11 +4917,11 @@ ; ; SKX-LABEL: test_pcmpgtw: ; SKX: # BB#0: -; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 -; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 -; SKX-NEXT: korb %k1, %k0, %k0 +; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 # sched: [9:1.00] +; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2w %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpgtw: ; BTVER2: # BB#0: @@ -4985,7 +4985,7 @@ ; SKX: # BB#0: ; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: # kill: %AX %AX %EAX -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # BB#0: @@ -5046,8 +5046,8 @@ ; SKX-LABEL: test_pinsrw: ; SKX: # BB#0: ; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pinsrw: ; BTVER2: # BB#0: @@ -5114,8 +5114,8 @@ ; SKX-LABEL: test_pmaddwd: ; SKX: # BB#0: ; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmaddwd: ; BTVER2: # BB#0: @@ -5179,9 +5179,9 @@ ; ; SKX-LABEL: test_pmaxsw: ; SKX: # BB#0: -; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmaxsw: ; BTVER2: # BB#0: @@ -5244,9 +5244,9 @@ ; ; SKX-LABEL: test_pmaxub: ; SKX: # BB#0: -; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmaxub: ; BTVER2: # BB#0: @@ -5309,9 +5309,9 @@ ; ; SKX-LABEL: test_pminsw: ; SKX: # BB#0: -; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pminsw: ; BTVER2: # BB#0: @@ -5374,9 +5374,9 @@ ; ; SKX-LABEL: test_pminub: ; SKX: # BB#0: -; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pminub: ; BTVER2: # BB#0: @@ -5432,7 +5432,7 @@ ; SKX-LABEL: test_pmovmskb: ; SKX: # BB#0: ; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovmskb: ; BTVER2: # BB#0: @@ -5488,8 +5488,8 @@ ; SKX-LABEL: test_pmulhuw: ; SKX: # BB#0: ; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmulhuw: ; BTVER2: # BB#0: @@ -5549,8 +5549,8 @@ ; SKX-LABEL: test_pmulhw: ; SKX: # BB#0: ; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmulhw: ; BTVER2: # BB#0: @@ -5610,8 +5610,8 @@ ; SKX-LABEL: test_pmullw: ; SKX: # BB#0: ; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmullw: ; BTVER2: # BB#0: @@ -5678,8 +5678,8 @@ ; SKX-LABEL: test_pmuludq: ; SKX: # BB#0: ; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmuludq: ; BTVER2: # BB#0: @@ -5745,10 +5745,10 @@ ; ; SKX-LABEL: test_por: ; SKX: # BB#0: -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_por: ; BTVER2: # BB#0: @@ -5818,8 +5818,8 @@ ; SKX-LABEL: test_psadbw: ; SKX: # BB#0: ; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psadbw: ; BTVER2: # BB#0: @@ -5888,9 +5888,9 @@ ; SKX-LABEL: test_pshufd: ; SKX: # BB#0: ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pshufd: ; BTVER2: # BB#0: @@ -5960,9 +5960,9 @@ ; SKX-LABEL: test_pshufhw: ; SKX: # BB#0: ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pshufhw: ; BTVER2: # BB#0: @@ -6032,9 +6032,9 @@ ; SKX-LABEL: test_pshuflw: ; SKX: # BB#0: ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pshuflw: ; BTVER2: # BB#0: @@ -6102,9 +6102,9 @@ ; SKX-LABEL: test_pslld: ; SKX: # BB#0: ; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pslld: ; BTVER2: # BB#0: @@ -6168,7 +6168,7 @@ ; SKX-LABEL: test_pslldq: ; SKX: # BB#0: ; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pslldq: ; BTVER2: # BB#0: @@ -6229,9 +6229,9 @@ ; SKX-LABEL: test_psllq: ; SKX: # BB#0: ; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psllq: ; BTVER2: # BB#0: @@ -6301,9 +6301,9 @@ ; SKX-LABEL: test_psllw: ; SKX: # BB#0: ; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psllw: ; BTVER2: # BB#0: @@ -6373,9 +6373,9 @@ ; SKX-LABEL: test_psrad: ; SKX: # BB#0: ; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psrad: ; BTVER2: # BB#0: @@ -6445,9 +6445,9 @@ ; SKX-LABEL: test_psraw: ; SKX: # BB#0: ; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psraw: ; BTVER2: # BB#0: @@ -6517,9 +6517,9 @@ ; SKX-LABEL: test_psrld: ; SKX: # BB#0: ; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psrld: ; BTVER2: # BB#0: @@ -6583,7 +6583,7 @@ ; SKX-LABEL: test_psrldq: ; SKX: # BB#0: ; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psrldq: ; BTVER2: # BB#0: @@ -6644,9 +6644,9 @@ ; SKX-LABEL: test_psrlq: ; SKX: # BB#0: ; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psrlq: ; BTVER2: # BB#0: @@ -6716,9 +6716,9 @@ ; SKX-LABEL: test_psrlw: ; SKX: # BB#0: ; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psrlw: ; BTVER2: # BB#0: @@ -6785,9 +6785,9 @@ ; ; SKX-LABEL: test_psubb: ; SKX: # BB#0: -; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psubb: ; BTVER2: # BB#0: @@ -6849,9 +6849,9 @@ ; ; SKX-LABEL: test_psubd: ; SKX: # BB#0: -; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psubd: ; BTVER2: # BB#0: @@ -6909,9 +6909,9 @@ ; ; SKX-LABEL: test_psubq: ; SKX: # BB#0: -; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psubq: ; BTVER2: # BB#0: @@ -6973,9 +6973,9 @@ ; ; SKX-LABEL: test_psubsb: ; SKX: # BB#0: -; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psubsb: ; BTVER2: # BB#0: @@ -7038,9 +7038,9 @@ ; ; SKX-LABEL: test_psubsw: ; SKX: # BB#0: -; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psubsw: ; BTVER2: # BB#0: @@ -7103,9 +7103,9 @@ ; ; SKX-LABEL: test_psubusb: ; SKX: # BB#0: -; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psubusb: ; BTVER2: # BB#0: @@ -7168,9 +7168,9 @@ ; ; SKX-LABEL: test_psubusw: ; SKX: # BB#0: -; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psubusw: ; BTVER2: # BB#0: @@ -7233,9 +7233,9 @@ ; ; SKX-LABEL: test_psubw: ; SKX: # BB#0: -; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psubw: ; BTVER2: # BB#0: @@ -7298,8 +7298,8 @@ ; SKX-LABEL: test_punpckhbw: ; SKX: # BB#0: ; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_punpckhbw: ; BTVER2: # BB#0: @@ -7366,9 +7366,9 @@ ; SKX-LABEL: test_punpckhdq: ; SKX: # BB#0: ; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_punpckhdq: ; BTVER2: # BB#0: @@ -7436,9 +7436,9 @@ ; SKX-LABEL: test_punpckhqdq: ; SKX: # BB#0: ; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_punpckhqdq: ; BTVER2: # BB#0: @@ -7504,8 +7504,8 @@ ; SKX-LABEL: test_punpckhwd: ; SKX: # BB#0: ; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_punpckhwd: ; BTVER2: # BB#0: @@ -7568,8 +7568,8 @@ ; SKX-LABEL: test_punpcklbw: ; SKX: # BB#0: ; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_punpcklbw: ; BTVER2: # BB#0: @@ -7636,9 +7636,9 @@ ; SKX-LABEL: test_punpckldq: ; SKX: # BB#0: ; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_punpckldq: ; BTVER2: # BB#0: @@ -7706,9 +7706,9 @@ ; SKX-LABEL: test_punpcklqdq: ; SKX: # BB#0: ; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_punpcklqdq: ; BTVER2: # BB#0: @@ -7774,8 +7774,8 @@ ; SKX-LABEL: test_punpcklwd: ; SKX: # BB#0: ; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_punpcklwd: ; BTVER2: # BB#0: @@ -7839,10 +7839,10 @@ ; ; SKX-LABEL: test_pxor: ; SKX: # BB#0: -; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pxor: ; BTVER2: # BB#0: @@ -7910,9 +7910,9 @@ ; SKX-LABEL: test_shufpd: ; SKX: # BB#0: ; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # BB#0: @@ -7981,9 +7981,9 @@ ; SKX-LABEL: test_sqrtpd: ; SKX: # BB#0: ; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00] -; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [18:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # BB#0: @@ -8060,10 +8060,10 @@ ; SKX-LABEL: test_sqrtsd: ; SKX: # BB#0: ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00] -; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [1:0.50] +; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] ; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtsd: ; BTVER2: # BB#0: @@ -8127,9 +8127,9 @@ ; ; SKX-LABEL: test_subpd: ; SKX: # BB#0: -; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_subpd: ; BTVER2: # BB#0: @@ -8187,9 +8187,9 @@ ; ; SKX-LABEL: test_subsd: ; SKX: # BB#0: -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_subsd: ; BTVER2: # BB#0: @@ -8296,16 +8296,16 @@ ; SKX-LABEL: test_ucomisd: ; SKX: # BB#0: ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; SKX-NEXT: setnp %al # sched: [1:1.00] -; SKX-NEXT: sete %cl # sched: [1:1.00] +; SKX-NEXT: setnp %al # sched: [1:0.50] +; SKX-NEXT: sete %cl # sched: [1:0.50] ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] ; SKX-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; SKX-NEXT: setnp %al # sched: [1:1.00] -; SKX-NEXT: sete %dl # sched: [1:1.00] +; SKX-NEXT: setnp %al # sched: [1:0.50] +; SKX-NEXT: sete %dl # sched: [1:0.50] ; SKX-NEXT: andb %al, %dl # sched: [1:0.25] ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ucomisd: ; BTVER2: # BB#0: @@ -8388,9 +8388,9 @@ ; SKX-LABEL: test_unpckhpd: ; SKX: # BB#0: ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # BB#0: @@ -8464,9 +8464,9 @@ ; SKX-LABEL: test_unpcklpd: ; SKX: # BB#0: ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # BB#0: @@ -8533,10 +8533,10 @@ ; ; SKX-LABEL: test_xorpd: ; SKX: # BB#0: -; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll @@ -49,9 +49,9 @@ ; ; SKX-LABEL: test_addsubpd: ; SKX: # BB#0: -; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # BB#0: @@ -110,9 +110,9 @@ ; ; SKX-LABEL: test_addsubps: ; SKX: # BB#0: -; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # BB#0: @@ -172,8 +172,8 @@ ; SKX-LABEL: test_haddpd: ; SKX: # BB#0: ; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: @@ -233,8 +233,8 @@ ; SKX-LABEL: test_haddps: ; SKX: # BB#0: ; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: @@ -294,8 +294,8 @@ ; SKX-LABEL: test_hsubpd: ; SKX: # BB#0: ; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: @@ -355,8 +355,8 @@ ; SKX-LABEL: test_hsubps: ; SKX: # BB#0: ; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: @@ -411,8 +411,8 @@ ; ; SKX-LABEL: test_lddqu: ; SKX: # BB#0: -; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # BB#0: @@ -476,7 +476,7 @@ ; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] ; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25] ; SKX-NEXT: monitor # sched: [100:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_monitor: ; BTVER2: # BB#0: @@ -543,9 +543,9 @@ ; SKX-LABEL: test_movddup: ; SKX: # BB#0: ; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [1:0.50] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # BB#0: @@ -614,9 +614,9 @@ ; SKX-LABEL: test_movshdup: ; SKX: # BB#0: ; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [1:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # BB#0: @@ -685,9 +685,9 @@ ; SKX-LABEL: test_movsldup: ; SKX: # BB#0: ; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [1:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # BB#0: @@ -757,7 +757,7 @@ ; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] ; SKX-NEXT: mwait # sched: [20:2.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mwait: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll @@ -48,10 +48,10 @@ ; SKX-LABEL: test_blendpd: ; SKX: # BB#0: ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [1:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [6:0.50] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # BB#0: @@ -106,9 +106,9 @@ ; ; SKX-LABEL: test_blendps: ; SKX: # BB#0: -; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendps: ; BTVER2: # BB#0: @@ -167,8 +167,8 @@ ; SKX-LABEL: test_blendvpd: ; SKX: # BB#0: ; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: @@ -228,8 +228,8 @@ ; SKX-LABEL: test_blendvps: ; SKX: # BB#0: ; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: @@ -283,8 +283,8 @@ ; SKX-LABEL: test_dppd: ; SKX: # BB#0: ; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_dppd: ; BTVER2: # BB#0: @@ -338,8 +338,8 @@ ; SKX-LABEL: test_dpps: ; SKX: # BB#0: ; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33] -; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.33] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_dpps: ; BTVER2: # BB#0: @@ -393,8 +393,8 @@ ; SKX-LABEL: test_insertps: ; SKX: # BB#0: ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_insertps: ; BTVER2: # BB#0: @@ -442,8 +442,8 @@ ; ; SKX-LABEL: test_movntdqa: ; SKX: # BB#0: -; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # BB#0: @@ -493,8 +493,8 @@ ; SKX-LABEL: test_mpsadbw: ; SKX: # BB#0: ; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [4:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_mpsadbw: ; BTVER2: # BB#0: @@ -549,8 +549,8 @@ ; SKX-LABEL: test_packusdw: ; SKX: # BB#0: ; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_packusdw: ; BTVER2: # BB#0: @@ -611,8 +611,8 @@ ; SKX-LABEL: test_pblendvb: ; SKX: # BB#0: ; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] -; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:0.67] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pblendvb: ; BTVER2: # BB#0: @@ -666,8 +666,8 @@ ; SKX-LABEL: test_pblendw: ; SKX: # BB#0: ; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pblendw: ; BTVER2: # BB#0: @@ -719,11 +719,11 @@ ; ; SKX-LABEL: test_pcmpeqq: ; SKX: # BB#0: -; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 +; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2q %k0, %xmm0 -; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 +; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 # sched: [9:1.00] ; SKX-NEXT: vpmovm2q %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpeqq: ; BTVER2: # BB#0: @@ -778,8 +778,8 @@ ; SKX-LABEL: test_pextrb: ; SKX: # BB#0: ; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] -; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pextrb: ; BTVER2: # BB#0: @@ -833,8 +833,8 @@ ; SKX-LABEL: test_pextrd: ; SKX: # BB#0: ; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] -; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pextrd: ; BTVER2: # BB#0: @@ -887,8 +887,8 @@ ; SKX-LABEL: test_pextrq: ; SKX: # BB#0: ; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] -; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pextrq: ; BTVER2: # BB#0: @@ -941,8 +941,8 @@ ; SKX-LABEL: test_pextrw: ; SKX: # BB#0: ; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] -; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # BB#0: @@ -995,9 +995,9 @@ ; ; SKX-LABEL: test_phminposuw: ; SKX: # BB#0: -; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [4:0.50] +; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_phminposuw: ; BTVER2: # BB#0: @@ -1051,8 +1051,8 @@ ; SKX-LABEL: test_pinsrb: ; SKX: # BB#0: ; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pinsrb: ; BTVER2: # BB#0: @@ -1105,8 +1105,8 @@ ; SKX-LABEL: test_pinsrd: ; SKX: # BB#0: ; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pinsrd: ; BTVER2: # BB#0: @@ -1164,9 +1164,9 @@ ; SKX-LABEL: test_pinsrq: ; SKX: # BB#0: ; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pinsrq: ; BTVER2: # BB#0: @@ -1221,9 +1221,9 @@ ; ; SKX-LABEL: test_pmaxsb: ; SKX: # BB#0: -; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmaxsb: ; BTVER2: # BB#0: @@ -1276,9 +1276,9 @@ ; ; SKX-LABEL: test_pmaxsd: ; SKX: # BB#0: -; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmaxsd: ; BTVER2: # BB#0: @@ -1331,9 +1331,9 @@ ; ; SKX-LABEL: test_pmaxud: ; SKX: # BB#0: -; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmaxud: ; BTVER2: # BB#0: @@ -1386,9 +1386,9 @@ ; ; SKX-LABEL: test_pmaxuw: ; SKX: # BB#0: -; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmaxuw: ; BTVER2: # BB#0: @@ -1441,9 +1441,9 @@ ; ; SKX-LABEL: test_pminsb: ; SKX: # BB#0: -; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pminsb: ; BTVER2: # BB#0: @@ -1496,9 +1496,9 @@ ; ; SKX-LABEL: test_pminsd: ; SKX: # BB#0: -; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pminsd: ; BTVER2: # BB#0: @@ -1551,9 +1551,9 @@ ; ; SKX-LABEL: test_pminud: ; SKX: # BB#0: -; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pminud: ; BTVER2: # BB#0: @@ -1606,9 +1606,9 @@ ; ; SKX-LABEL: test_pminuw: ; SKX: # BB#0: -; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pminuw: ; BTVER2: # BB#0: @@ -1668,9 +1668,9 @@ ; SKX-LABEL: test_pmovsxbw: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovsxbw: ; BTVER2: # BB#0: @@ -1733,9 +1733,9 @@ ; SKX-LABEL: test_pmovsxbd: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovsxbd: ; BTVER2: # BB#0: @@ -1798,9 +1798,9 @@ ; SKX-LABEL: test_pmovsxbq: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovsxbq: ; BTVER2: # BB#0: @@ -1863,9 +1863,9 @@ ; SKX-LABEL: test_pmovsxdq: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovsxdq: ; BTVER2: # BB#0: @@ -1928,9 +1928,9 @@ ; SKX-LABEL: test_pmovsxwd: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovsxwd: ; BTVER2: # BB#0: @@ -1993,9 +1993,9 @@ ; SKX-LABEL: test_pmovsxwq: ; SKX: # BB#0: ; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovsxwq: ; BTVER2: # BB#0: @@ -2058,9 +2058,9 @@ ; SKX-LABEL: test_pmovzxbw: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; SKX-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovzxbw: ; BTVER2: # BB#0: @@ -2123,9 +2123,9 @@ ; SKX-LABEL: test_pmovzxbd: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; SKX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovzxbd: ; BTVER2: # BB#0: @@ -2188,9 +2188,9 @@ ; SKX-LABEL: test_pmovzxbq: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; SKX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovzxbq: ; BTVER2: # BB#0: @@ -2253,9 +2253,9 @@ ; SKX-LABEL: test_pmovzxdq: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; SKX-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovzxdq: ; BTVER2: # BB#0: @@ -2318,9 +2318,9 @@ ; SKX-LABEL: test_pmovzxwd: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; SKX-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovzxwd: ; BTVER2: # BB#0: @@ -2383,9 +2383,9 @@ ; SKX-LABEL: test_pmovzxwq: ; SKX: # BB#0: ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; SKX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmovzxwq: ; BTVER2: # BB#0: @@ -2442,8 +2442,8 @@ ; SKX-LABEL: test_pmuldq: ; SKX: # BB#0: ; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmuldq: ; BTVER2: # BB#0: @@ -2498,8 +2498,8 @@ ; SKX-LABEL: test_pmulld: ; SKX: # BB#0: ; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [14:0.67] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmulld: ; BTVER2: # BB#0: @@ -2572,12 +2572,12 @@ ; SKX-LABEL: test_ptest: ; SKX: # BB#0: ; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] -; SKX-NEXT: setb %al # sched: [1:1.00] -; SKX-NEXT: vptest (%rdi), %xmm0 # sched: [3:1.00] -; SKX-NEXT: setb %cl # sched: [1:1.00] +; SKX-NEXT: setb %al # sched: [1:0.50] +; SKX-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00] +; SKX-NEXT: setb %cl # sched: [1:0.50] ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] ; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_ptest: ; BTVER2: # BB#0: @@ -2646,9 +2646,9 @@ ; SKX-LABEL: test_roundpd: ; SKX: # BB#0: ; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:0.67] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:0.67] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: @@ -2711,9 +2711,9 @@ ; SKX-LABEL: test_roundps: ; SKX: # BB#0: ; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:0.67] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:0.67] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: @@ -2777,9 +2777,9 @@ ; SKX-LABEL: test_roundsd: ; SKX: # BB#0: ; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundsd: ; BTVER2: # BB#0: @@ -2843,9 +2843,9 @@ ; SKX-LABEL: test_roundss: ; SKX: # BB#0: ; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] +; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundss: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll @@ -50,7 +50,7 @@ ; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_8: ; BTVER2: # BB#0: @@ -113,7 +113,7 @@ ; SKX-NEXT: crc32w %si, %edi # sched: [3:1.00] ; SKX-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_16: ; BTVER2: # BB#0: @@ -176,7 +176,7 @@ ; SKX-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; SKX-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_32_32: ; BTVER2: # BB#0: @@ -239,7 +239,7 @@ ; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; SKX-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_64_8: ; BTVER2: # BB#0: @@ -302,7 +302,7 @@ ; SKX-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; SKX-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: crc32_64_64: ; BTVER2: # BB#0: @@ -398,10 +398,10 @@ ; SKX-NEXT: movl %ecx, %esi # sched: [1:0.25] ; SKX-NEXT: movl $7, %eax # sched: [1:0.25] ; SKX-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00] +; SKX-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] ; SKX-NEXT: # kill: %ECX %ECX %RCX ; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpestri: ; BTVER2: # BB#0: @@ -494,8 +494,8 @@ ; SKX-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] ; SKX-NEXT: movl $7, %eax # sched: [1:0.25] ; SKX-NEXT: movl $7, %edx # sched: [1:0.25] -; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpestrm: ; BTVER2: # BB#0: @@ -573,10 +573,10 @@ ; SKX: # BB#0: ; SKX-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] ; SKX-NEXT: movl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [10:3.00] +; SKX-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] ; SKX-NEXT: # kill: %ECX %ECX %RCX ; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpistri: ; BTVER2: # BB#0: @@ -637,8 +637,8 @@ ; SKX-LABEL: test_pcmpistrm: ; SKX: # BB#0: ; SKX-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] -; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpistrm: ; BTVER2: # BB#0: @@ -691,11 +691,11 @@ ; ; SKX-LABEL: test_pcmpgtq: ; SKX: # BB#0: -; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 +; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2q %k0, %xmm0 -; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 +; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 # sched: [9:1.00] ; SKX-NEXT: vpmovm2q %k0, %xmm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pcmpgtq: ; BTVER2: # BB#0: @@ -750,8 +750,8 @@ ; SKX-LABEL: test_pclmulqdq: ; SKX: # BB#0: ; SKX-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] -; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pclmulqdq: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll +++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll @@ -56,10 +56,10 @@ ; ; SKX-LABEL: test_pabsb: ; SKX: # BB#0: -; SKX-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pabsb: ; BTVER2: # BB#0: @@ -128,10 +128,10 @@ ; ; SKX-LABEL: test_pabsd: ; SKX: # BB#0: -; SKX-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pabsd: ; BTVER2: # BB#0: @@ -200,10 +200,10 @@ ; ; SKX-LABEL: test_pabsw: ; SKX: # BB#0: -; SKX-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpabsw (%rdi), %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pabsw: ; BTVER2: # BB#0: @@ -271,8 +271,8 @@ ; SKX-LABEL: test_palignr: ; SKX: # BB#0: ; SKX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_palignr: ; BTVER2: # BB#0: @@ -331,8 +331,8 @@ ; SKX-LABEL: test_phaddd: ; SKX: # BB#0: ; SKX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_phaddd: ; BTVER2: # BB#0: @@ -392,8 +392,8 @@ ; SKX-LABEL: test_phaddsw: ; SKX: # BB#0: ; SKX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_phaddsw: ; BTVER2: # BB#0: @@ -453,8 +453,8 @@ ; SKX-LABEL: test_phaddw: ; SKX: # BB#0: ; SKX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_phaddw: ; BTVER2: # BB#0: @@ -514,8 +514,8 @@ ; SKX-LABEL: test_phsubd: ; SKX: # BB#0: ; SKX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_phsubd: ; BTVER2: # BB#0: @@ -575,8 +575,8 @@ ; SKX-LABEL: test_phsubsw: ; SKX: # BB#0: ; SKX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_phsubsw: ; BTVER2: # BB#0: @@ -636,8 +636,8 @@ ; SKX-LABEL: test_phsubw: ; SKX: # BB#0: ; SKX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_phsubw: ; BTVER2: # BB#0: @@ -697,8 +697,8 @@ ; SKX-LABEL: test_pmaddubsw: ; SKX: # BB#0: ; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmaddubsw: ; BTVER2: # BB#0: @@ -759,8 +759,8 @@ ; SKX-LABEL: test_pmulhrsw: ; SKX: # BB#0: ; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pmulhrsw: ; BTVER2: # BB#0: @@ -820,8 +820,8 @@ ; SKX-LABEL: test_pshufb: ; SKX: # BB#0: ; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_pshufb: ; BTVER2: # BB#0: @@ -884,9 +884,9 @@ ; ; SKX-LABEL: test_psignb: ; SKX: # BB#0: -; SKX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psignb: ; BTVER2: # BB#0: @@ -949,9 +949,9 @@ ; ; SKX-LABEL: test_psignd: ; SKX: # BB#0: -; SKX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psignd: ; BTVER2: # BB#0: @@ -1014,9 +1014,9 @@ ; ; SKX-LABEL: test_psignw: ; SKX: # BB#0: -; SKX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_psignw: ; BTVER2: # BB#0: