Index: llvm/trunk/lib/Target/X86/X86SchedHaswell.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedHaswell.td +++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td @@ -23,8 +23,8 @@ // Based on the LSD (loop-stream detector) queue size and benchmarking data. let LoopMicroOpBufferSize = 50; - // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow - // the scheduler to assign a default model to unrecognized opcodes. + // This flag is set to allow the scheduler to assign a default model to + // unrecognized opcodes. let CompleteModel = 0; } @@ -267,1914 +267,3251 @@ def : WriteRes; def : WriteRes; -//================ Exceptions ================// - -//-- Specific Scheduling Models --// - -// Starting with P0. -def WriteP0 : SchedWriteRes<[HWPort0]>; - -def WriteP0_P1_Lat4 : SchedWriteRes<[HWPort0, HWPort1]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; -} +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// -def WriteP0_P1_Lat4Ld : SchedWriteRes<[HWPort0, HWPort1, HWPort23]> { - let Latency = 8; +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 5; let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; + let ResourceCycles = [1, 2]; } -def WriteP01 : SchedWriteRes<[HWPort01]>; +// x,m / v,v,m. +def : WriteRes { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; +} -def Write2P01 : SchedWriteRes<[HWPort01]> { - let NumMicroOps = 2; +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; } -def Write3P01 : SchedWriteRes<[HWPort01]> { +// v <- v,m. +def : WriteRes { + let Latency = 6; let NumMicroOps = 3; + let ResourceCycles = [1, 2, 1]; } -def WriteP015 : SchedWriteRes<[HWPort015]>; +// Remaining instrs. -def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> { - let NumMicroOps = 2; +def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> { + let Latency = 0; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def WriteP06 : SchedWriteRes<[HWPort06]>; +def: InstRW<[HWWriteResGroup0], (instregex "LDDQUrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVD64rm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVD64to64rm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVQ64rm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOV32rm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOV64toPQIrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOV8rm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVAPDrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVAPSrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVDDUPrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVDI2PDIrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVDQArm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVDQUrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVNTDQArm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVSHDUPrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVSLDUPrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVSSrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVSX32rm16")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVSX32rm8")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVUPDrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVUPSrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVZX32rm16")>; +def: InstRW<[HWWriteResGroup0], (instregex "MOVZX32rm8")>; +def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHNTA")>; +def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHT0")>; +def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHT1")>; +def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHT2")>; +def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTF128")>; +def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTI128")>; +def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VLDDQUYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VLDDQUrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOV64toPQIrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPDYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPDrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPSYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPSrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVDDUPYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVDDUPrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQAYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQArm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQUYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQUrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVNTDQAYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVNTDQArm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVSDrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVSHDUPrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVSLDUPrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVSSrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPDYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPDrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPSYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPSrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTDYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTDrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTQYrm")>; +def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTQrm")>; + +def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> { + let Latency = 0; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup1], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[HWWriteResGroup1], (instregex "MMX_MOVD64mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MMX_MOVNTQmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MMX_MOVQ64mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOV64mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOV8mi")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOV8mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVAPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVAPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVDQAmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVDQUmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVHPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVHPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVLPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVLPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVNTDQmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVNTI_64mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVNTImr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVNTPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVNTPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVPDI2DImr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVPQI2QImr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVPQIto64mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVSSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVUPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "MOVUPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VEXTRACTF128mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VEXTRACTI128mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVAPDYmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVAPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVAPSYmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVAPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVDQAYmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVDQAmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVDQUYmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVDQUmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVHPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVHPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVLPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVLPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTDQYmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTDQmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTPDYmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTPSYmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVPDI2DImr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVPQI2QImr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVPQIto64mr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVSDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVSSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVUPDYmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVUPDmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVUPSYmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMOVUPSmr")>; +def: InstRW<[HWWriteResGroup1], (instregex "VMPTRSTm")>; -def Write2P06 : SchedWriteRes<[HWPort06]> { +def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> { let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [2]; + let NumMicroOps = 1; + let ResourceCycles = [1]; } +def: InstRW<[HWWriteResGroup2], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_MOVD64grr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PMOVMSKBrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLDri")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLDrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLQri")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLQrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLWri")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLWrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRADri")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRADrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRAWri")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRAWrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLDri")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLDrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLQri")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLQrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLWri")>; +def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLWrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MOVPDI2DIrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "MOVPQIto64rr")>; +def: InstRW<[HWWriteResGroup2], (instregex "PSLLDri")>; +def: InstRW<[HWWriteResGroup2], (instregex "PSLLQri")>; +def: InstRW<[HWWriteResGroup2], (instregex "PSLLWri")>; +def: InstRW<[HWWriteResGroup2], (instregex "PSRADri")>; +def: InstRW<[HWWriteResGroup2], (instregex "PSRAWri")>; +def: InstRW<[HWWriteResGroup2], (instregex "PSRLDri")>; +def: InstRW<[HWWriteResGroup2], (instregex "PSRLQri")>; +def: InstRW<[HWWriteResGroup2], (instregex "PSRLWri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "VMOVPQIto64rr")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSLLDri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSLLDrm")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSLLQri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSLLQrm")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSLLVQYrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSLLVQrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSLLWri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSLLWrm")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRADYri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRADri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRAWYri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRAWri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRLDYri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRLDri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRLQYri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRLQri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRLVQYrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRLVQrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRLWYri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VPSRLWri")>; +def: InstRW<[HWWriteResGroup2], (instregex "VTESTPDYrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "VTESTPDrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "VTESTPSYrr")>; +def: InstRW<[HWWriteResGroup2], (instregex "VTESTPSrr")>; -def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [3]; +def HWWriteResGroup3 : SchedWriteRes<[HWPort1]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; } +def: InstRW<[HWWriteResGroup3], (instregex "MASKMOVDQU64")>; +def: InstRW<[HWWriteResGroup3], (instregex "MMX_MASKMOVQ64")>; -def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { - let NumMicroOps = 2; +def HWWriteResGroup4 : SchedWriteRes<[HWPort5]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; } +def: InstRW<[HWWriteResGroup4], (instregex "ANDNPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "ANDNPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "ANDPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "ANDPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "INSERTPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "KORTESTBrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64to64rr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_PSHUFWri")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKHBWirr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKHDQirr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKHWDirr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKLBWirr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKLDQirr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKLWDirr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOV64toPQIrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVAPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVAPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVDDUPrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVDI2PDIrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVHLPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVLHPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVSDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVSHDUPrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVSLDUPrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVSSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVUPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MOVUPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "ORPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "ORPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PACKSSDWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PACKSSWBrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PACKUSDWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PACKUSWBrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PALIGNRrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "PBLENDWrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXBDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXBQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXBWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXWDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXWQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXBDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXBQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXBWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXWDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXWQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PSHUFBrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PSHUFDri")>; +def: InstRW<[HWWriteResGroup4], (instregex "PSHUFHWri")>; +def: InstRW<[HWWriteResGroup4], (instregex "PSHUFLWri")>; +def: InstRW<[HWWriteResGroup4], (instregex "PSLLDQri")>; +def: InstRW<[HWWriteResGroup4], (instregex "PSRLDQri")>; +def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKHBWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKHDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKHWDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKLBWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKLDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKLWDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "SHUFPDrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "SHUFPSrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "UNPCKHPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "UNPCKHPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "UNPCKLPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "UNPCKLPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VANDNPDYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VANDNPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VANDNPSYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VANDNPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VANDPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VANDPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VANDPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VBROADCASTSSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VGATHERQPSZrm")>; +def: InstRW<[HWWriteResGroup4], (instregex "VINSERTPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOV64toPQIrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOV64toPQIrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPDYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPSYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVDDUPYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVDDUPrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVHLPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVLHPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVSDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVSHDUPrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVSLDUPrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVSSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPDYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPSYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VORPDYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VORPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VORPSYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VORPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPACKSSDWYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPACKSSDWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPACKSSWBYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPACKSSWBrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPACKUSDWYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPACKUSDWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPACKUSWBYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPACKUSWBrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPALIGNRYrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPALIGNRrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPBLENDWYrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPBLENDWrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPBROADCASTDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPBROADCASTQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPDri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPDrm")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPSri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPSrm")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXBDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXBQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXBWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXWDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXWQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXBDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXBQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXBWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXWDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXWQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFBrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFBrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFDYri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFDri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFHWri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFLWYri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFLWri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSLLDQYri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSLLDQri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSRLDQYri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPSRLDQri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHBWYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHDQYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHQDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHWDYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLBWrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLDQYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLQDQYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLWDYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLWDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VSHUFPDYrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VSHUFPDrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VSHUFPSYrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VSHUFPSrri")>; +def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKHPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKHPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKLPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKLPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VXORPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "VXORPSrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "XORPDrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "XORPSrr")>; -def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { - let NumMicroOps = 3; - let ResourceCycles = [2, 1]; +def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; } +def: InstRW<[HWWriteResGroup5], (instregex "JMP64r")>; -def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> { - let Latency = 2; - let ResourceCycles = [2]; -} -def Write2P0156_Lat2Ld : SchedWriteRes<[HWPort0156, HWPort23]> { - let Latency = 6; - let ResourceCycles = [2, 1]; +def HWWriteResGroup6 : SchedWriteRes<[HWPort01]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; } +def: InstRW<[HWWriteResGroup6], (instregex "FINCSTP")>; +def: InstRW<[HWWriteResGroup6], (instregex "FNOP")>; -def Write5P0156 : SchedWriteRes<[HWPort0156]> { - let NumMicroOps = 5; - let ResourceCycles = [5]; +def HWWriteResGroup7 : SchedWriteRes<[HWPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; } +def: InstRW<[HWWriteResGroup7], (instregex "BT32ri8")>; +def: InstRW<[HWWriteResGroup7], (instregex "BT32rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "BTC32ri8")>; +def: InstRW<[HWWriteResGroup7], (instregex "BTC32rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "BTR32ri8")>; +def: InstRW<[HWWriteResGroup7], (instregex "BTR32rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "BTS32ri8")>; +def: InstRW<[HWWriteResGroup7], (instregex "BTS32rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "CDQ")>; +def: InstRW<[HWWriteResGroup7], (instregex "CQO")>; +def: InstRW<[HWWriteResGroup7], (instregex "RORX32ri")>; +def: InstRW<[HWWriteResGroup7], (instregex "RORX64ri")>; +def: InstRW<[HWWriteResGroup7], (instregex "SAR32ri")>; +def: InstRW<[HWWriteResGroup7], (instregex "SAR64r1")>; +def: InstRW<[HWWriteResGroup7], (instregex "SAR8r1")>; +def: InstRW<[HWWriteResGroup7], (instregex "SAR8ri")>; +def: InstRW<[HWWriteResGroup7], (instregex "SARX32rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SARX64rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETAEr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETBr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETEr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETGEr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETGr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETLEr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETLr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETNEr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETNOr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETNPr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETNSr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETOr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETPr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SETSr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHL32ri")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHL64r1")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHL8r1")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHL8ri")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHLX32rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHLX64rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHR32ri")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHR64r1")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHR8r1")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHR8ri")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHRX32rr")>; +def: InstRW<[HWWriteResGroup7], (instregex "SHRX64rr")>; -def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { +def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> { let Latency = 1; - let ResourceCycles = [1, 2, 1]; + let NumMicroOps = 1; + let ResourceCycles = [1]; } +def: InstRW<[HWWriteResGroup8], (instregex "ANDN32rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "ANDN64rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BLSI32rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BLSI64rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BLSMSK32rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BLSMSK64rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BLSR32rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BLSR64rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BZHI32rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "BZHI64rr")>; +def: InstRW<[HWWriteResGroup8], (instregex "LEA64_32r")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PABSBrr64")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PABSDrr64")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PABSWrr64")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDDirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDQirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDSBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDSWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDUSBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDUSWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PAVGBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PAVGWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPEQBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPEQDirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPEQWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPGTBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPGTDirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPGTWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PMAXSWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PMAXUBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PMINSWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PMINUBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBDirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBQirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBSBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBSWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBUSBirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBUSWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBWirr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PABSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PABSDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PABSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PADDBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PADDDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PADDQrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PADDSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PADDSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PADDUSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PADDUSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PADDWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PAVGBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PAVGWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PCMPEQBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PCMPEQDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PCMPEQQrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PCMPEQWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PCMPGTBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PCMPGTDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PCMPGTWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMAXSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMAXSDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMAXSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMAXUBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMAXUDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMAXUWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMINSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMINSDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMINSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMINUBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMINUDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PMINUWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSIGNBrr128")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSIGNDrr128")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSIGNWrr128")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSUBBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSUBDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSUBQrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSUBSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSUBSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSUBUSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSUBUSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "PSUBWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPABSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPABSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPABSDYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPABSDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPABSWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPABSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDQYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDQrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDUSBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDUSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDUSWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDUSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPADDWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPAVGBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPAVGBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPAVGWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPAVGWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQQrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTDYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSDYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUDYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINSBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINSDYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINSDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINSWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINUBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINUBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINUDYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINUDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINUWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPMINUWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNBYrr256")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNBrr128")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNDYrr256")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNDrr128")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNWYrr256")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNWrr128")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBDYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBDrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBQYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBQrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBSBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBSWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBUSBYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBUSBrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBUSWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBUSWrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBWYrr")>; +def: InstRW<[HWWriteResGroup8], (instregex "VPSUBWrr")>; -def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { +def HWWriteResGroup9 : SchedWriteRes<[HWPort015]> { let Latency = 1; - let ResourceCycles = [2, 2, 1]; + let NumMicroOps = 1; + let ResourceCycles = [1]; } +def: InstRW<[HWWriteResGroup9], (instregex "BLENDPDrri")>; +def: InstRW<[HWWriteResGroup9], (instregex "BLENDPSrri")>; +def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVQ64rr")>; +def: InstRW<[HWWriteResGroup9], (instregex "MMX_PANDNirr")>; +def: InstRW<[HWWriteResGroup9], (instregex "MMX_PANDirr")>; +def: InstRW<[HWWriteResGroup9], (instregex "MMX_PORirr")>; +def: InstRW<[HWWriteResGroup9], (instregex "MMX_PXORirr")>; +def: InstRW<[HWWriteResGroup9], (instregex "MOVDQArr")>; +def: InstRW<[HWWriteResGroup9], (instregex "MOVDQUrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "MOVPQI2QIrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "PANDNrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "PANDrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "PORrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "PXORrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPDYrri")>; +def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPDrri")>; +def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPSYrri")>; +def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPSrri")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQAYrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQArr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQUYrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQUrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPANDNYrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPANDNrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPANDYrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPANDrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPBLENDDYrri")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPBLENDDrri")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPORYrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPORrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPXORYrr")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPXORrr")>; -def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { +def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> { let Latency = 1; - let ResourceCycles = [3, 2, 1]; + let NumMicroOps = 1; + let ResourceCycles = [1]; } +def: InstRW<[HWWriteResGroup10], (instregex "ADD32ri8")>; +def: InstRW<[HWWriteResGroup10], (instregex "ADD32rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "ADD8ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "ADD8rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "AND32ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "AND64ri8")>; +def: InstRW<[HWWriteResGroup10], (instregex "AND64rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "AND8ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "AND8rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "CBW")>; +def: InstRW<[HWWriteResGroup10], (instregex "CLC")>; +def: InstRW<[HWWriteResGroup10], (instregex "CMC")>; +def: InstRW<[HWWriteResGroup10], (instregex "CMP16ri8")>; +def: InstRW<[HWWriteResGroup10], (instregex "CMP32i32")>; +def: InstRW<[HWWriteResGroup10], (instregex "CMP64rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "CMP8ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "CMP8rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "CWDE")>; +def: InstRW<[HWWriteResGroup10], (instregex "DEC64r")>; +def: InstRW<[HWWriteResGroup10], (instregex "DEC8r")>; +def: InstRW<[HWWriteResGroup10], (instregex "INC64r")>; +def: InstRW<[HWWriteResGroup10], (instregex "INC8r")>; +def: InstRW<[HWWriteResGroup10], (instregex "LAHF")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOV32rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOV8ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOV8rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOVSX32rr16")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOVSX32rr8")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOVZX32rr16")>; +def: InstRW<[HWWriteResGroup10], (instregex "MOVZX32rr8")>; +def: InstRW<[HWWriteResGroup10], (instregex "NEG64r")>; +def: InstRW<[HWWriteResGroup10], (instregex "NEG8r")>; +def: InstRW<[HWWriteResGroup10], (instregex "NOOP")>; +def: InstRW<[HWWriteResGroup10], (instregex "NOT64r")>; +def: InstRW<[HWWriteResGroup10], (instregex "NOT8r")>; +def: InstRW<[HWWriteResGroup10], (instregex "OR64ri8")>; +def: InstRW<[HWWriteResGroup10], (instregex "OR64rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "OR8ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "OR8rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "SAHF")>; +def: InstRW<[HWWriteResGroup10], (instregex "SGDT64m")>; +def: InstRW<[HWWriteResGroup10], (instregex "SIDT64m")>; +def: InstRW<[HWWriteResGroup10], (instregex "SLDT16m")>; +def: InstRW<[HWWriteResGroup10], (instregex "SMSW16m")>; +def: InstRW<[HWWriteResGroup10], (instregex "STC")>; +def: InstRW<[HWWriteResGroup10], (instregex "STRm")>; +def: InstRW<[HWWriteResGroup10], (instregex "SUB64ri8")>; +def: InstRW<[HWWriteResGroup10], (instregex "SUB64rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "SUB8ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "SUB8rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "SYSCALL")>; +def: InstRW<[HWWriteResGroup10], (instregex "TEST64rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "TEST8ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "TEST8rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "XCHG64rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "XOR32rr")>; +def: InstRW<[HWWriteResGroup10], (instregex "XOR64ri8")>; +def: InstRW<[HWWriteResGroup10], (instregex "XOR8ri")>; +def: InstRW<[HWWriteResGroup10], (instregex "XOR8rr")>; -// Starting with P1. -def WriteP1 : SchedWriteRes<[HWPort1]>; - -def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { +def HWWriteResGroup11 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 1; let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> { - let Latency = 3; -} -def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> { - let Latency = 7; -} +def: InstRW<[HWWriteResGroup11], (instregex "CVTPS2PDrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "CVTSS2SDrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSLLDrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSLLQrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSLLWrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRADrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRAWrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRLDrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRLQrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRLWrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VCVTPH2PSYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VCVTPH2PSrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VCVTPS2PDrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VCVTSS2SDrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSLLDYri")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSLLQYri")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSLLVQYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSLLVQrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSLLWYri")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSRADYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSRAWYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSRLDYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSRLQYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSRLVQYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSRLVQrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VPSRLWYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VTESTPDYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VTESTPDrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VTESTPSYrm")>; +def: InstRW<[HWWriteResGroup11], (instregex "VTESTPSrm")>; -def Write2P1 : SchedWriteRes<[HWPort1]> { +def HWWriteResGroup12 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 1; let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def Write2P1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { - let NumMicroOps = 3; - let ResourceCycles = [2, 1]; -} -def WriteP15 : SchedWriteRes<[HWPort15]>; -def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> { - let Latency = 4; + let ResourceCycles = [1,1]; } +def: InstRW<[HWWriteResGroup12], (instregex "ANDNPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "ANDNPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "ANDPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "ANDPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "INSERTPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PINSRWirmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PSHUFWmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKHBWirm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKHDQirm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKHWDirm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKLBWirm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKLDQirm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKLWDirm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MOVHPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MOVHPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MOVLPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "MOVLPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "ORPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "ORPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PACKSSDWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PACKSSWBrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PACKUSDWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PACKUSWBrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PALIGNRrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "PBLENDWrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "PINSRBrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PINSRDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PINSRQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PINSRWrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXBDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXBQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXBWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXWDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXWQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXBDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXBQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXBWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXWDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXWQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PSHUFBrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PSHUFDmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "PSHUFHWmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "PSHUFLWmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKHBWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKHDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKHWDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKLBWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKLDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKLWDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "SHUFPDrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "SHUFPSrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "UNPCKHPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "UNPCKHPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "UNPCKLPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "UNPCKLPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VANDNPDYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VANDNPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VANDNPSYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VANDNPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VANDPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VANDPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VANDPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VANDPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VINSERTPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VMOVHPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VMOVHPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VMOVLPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VMOVLPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VORPDYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VORPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VORPSYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VORPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPACKSSDWYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPACKSSDWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPACKSSWBYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPACKSSWBrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPACKUSDWYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPACKUSDWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPACKUSWBYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPACKUSWBrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPALIGNRYrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPALIGNRrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPBLENDWYrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPBLENDWrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPDYri")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPDmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPDmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPDri")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPSYri")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPSmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPSmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPSri")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPINSRBrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPINSRDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPINSRQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPINSRWrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXBDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXBQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXBWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXWDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXWQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXBDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXBQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXBWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXWDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXWQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFBrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFBrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFDYmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFDmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFHWmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFHWmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFLWYmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFLWmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHBWYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHDQYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHWDYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLDQYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLQDQYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLWDYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLWDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VSHUFPDYrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VSHUFPDrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VSHUFPSYrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VSHUFPSrmi")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKHPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKHPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKHPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKHPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKLPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKLPSYrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKLPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VXORPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VXORPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VXORPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "VXORPSrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "XORPDrm")>; +def: InstRW<[HWWriteResGroup12], (instregex "XORPSrm")>; -def WriteP1_P5_Lat4 : SchedWriteRes<[HWPort1, HWPort5]> { - let Latency = 4; +def HWWriteResGroup13 : SchedWriteRes<[HWPort6,HWPort23]> { + let Latency = 1; let NumMicroOps = 2; - let ResourceCycles = [1, 1]; + let ResourceCycles = [1,1]; } +def: InstRW<[HWWriteResGroup13], (instregex "FARJMP64")>; +def: InstRW<[HWWriteResGroup13], (instregex "JMP64m")>; -def WriteP1_P5_Lat4Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; +def HWWriteResGroup14 : SchedWriteRes<[HWPort23,HWPort0]> { + let Latency = 1; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } +def: InstRW<[HWWriteResGroup14], (instregex "BT64mi8")>; +def: InstRW<[HWWriteResGroup14], (instregex "RORX32mi")>; +def: InstRW<[HWWriteResGroup14], (instregex "RORX64mi")>; +def: InstRW<[HWWriteResGroup14], (instregex "SARX32rm")>; +def: InstRW<[HWWriteResGroup14], (instregex "SARX64rm")>; +def: InstRW<[HWWriteResGroup14], (instregex "SHLX32rm")>; +def: InstRW<[HWWriteResGroup14], (instregex "SHLX64rm")>; +def: InstRW<[HWWriteResGroup14], (instregex "SHRX32rm")>; +def: InstRW<[HWWriteResGroup14], (instregex "SHRX64rm")>; -def WriteP1_P5_Lat6 : SchedWriteRes<[HWPort1, HWPort5]> { - let Latency = 6; +def HWWriteResGroup15 : SchedWriteRes<[HWPort23,HWPort15]> { + let Latency = 1; let NumMicroOps = 2; - let ResourceCycles = [1, 1]; + let ResourceCycles = [1,1]; } +def: InstRW<[HWWriteResGroup15], (instregex "ANDN32rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "ANDN64rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "BLSI32rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "BLSI64rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "BLSMSK32rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "BLSMSK64rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "BLSR32rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "BLSR64rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "BZHI32rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "BZHI64rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PABSBrm64")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PABSDrm64")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PABSWrm64")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDDirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDQirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDSBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDSWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDUSBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDUSWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PAVGBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PAVGWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPEQBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPEQDirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPEQWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPGTBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPGTDirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPGTWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PMAXSWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PMAXUBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PMINSWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PMINUBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSIGNWrm64")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBDirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBQirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBSBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBSWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBUSBirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBUSWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBWirm")>; +def: InstRW<[HWWriteResGroup15], (instregex "MOVBE64rm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PABSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PABSDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PABSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PADDBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PADDDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PADDQrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PADDSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PADDSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PADDUSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PADDUSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PADDWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PAVGBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PAVGWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PCMPEQBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PCMPEQDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PCMPEQQrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PCMPEQWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PCMPGTBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PCMPGTDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PCMPGTWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMAXSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMAXSDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMAXSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMAXUBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMAXUDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMAXUWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMINSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMINSDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMINSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMINUBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMINUDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PMINUWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSIGNBrm128")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSIGNDrm128")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSIGNWrm128")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSUBBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSUBDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSUBQrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSUBSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSUBSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSUBUSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSUBUSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "PSUBWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPABSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPABSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPABSDYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPABSDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPABSWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPABSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDQYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDQrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDUSBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDUSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDUSWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDUSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPADDWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPAVGBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPAVGBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPAVGWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPAVGWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQQrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQQrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTDYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSDYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUDYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINSBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINSDYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINSDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINSWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINUBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINUBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINUDYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINUDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINUWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPMINUWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNBYrm256")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNBrm128")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNDYrm256")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNDrm128")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNWYrm256")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNWrm128")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBDYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBDrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBQYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBQrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBSBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBSWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBUSBYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBUSBrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBUSWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBUSWrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBWYrm")>; +def: InstRW<[HWWriteResGroup15], (instregex "VPSUBWrm")>; -def WriteP1_P5_Lat6Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; +def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort015]> { + let Latency = 1; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } +def: InstRW<[HWWriteResGroup16], (instregex "BLENDPDrmi")>; +def: InstRW<[HWWriteResGroup16], (instregex "BLENDPSrmi")>; +def: InstRW<[HWWriteResGroup16], (instregex "MMX_PANDNirm")>; +def: InstRW<[HWWriteResGroup16], (instregex "MMX_PANDirm")>; +def: InstRW<[HWWriteResGroup16], (instregex "MMX_PORirm")>; +def: InstRW<[HWWriteResGroup16], (instregex "MMX_PXORirm")>; +def: InstRW<[HWWriteResGroup16], (instregex "PANDNrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "PANDrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "PORrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "PXORrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VBLENDPDYrmi")>; +def: InstRW<[HWWriteResGroup16], (instregex "VBLENDPDrmi")>; +def: InstRW<[HWWriteResGroup16], (instregex "VBLENDPSYrmi")>; +def: InstRW<[HWWriteResGroup16], (instregex "VBLENDPSrmi")>; +def: InstRW<[HWWriteResGroup16], (instregex "VINSERTF128rm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VINSERTI128rm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPANDNYrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPANDNrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPANDYrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPANDrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPBLENDDYrmi")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPBLENDDrmi")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPORYrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPORrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPXORYrm")>; +def: InstRW<[HWWriteResGroup16], (instregex "VPXORrm")>; -// Starting with P2. -def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> { +def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort0156]> { let Latency = 1; - let ResourceCycles = [2, 1]; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } +def: InstRW<[HWWriteResGroup17], (instregex "ADD64rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "ADD8rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "AND64rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "AND8rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "CMP64mi8")>; +def: InstRW<[HWWriteResGroup17], (instregex "CMP64mr")>; +def: InstRW<[HWWriteResGroup17], (instregex "CMP64rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "CMP8mi")>; +def: InstRW<[HWWriteResGroup17], (instregex "CMP8mr")>; +def: InstRW<[HWWriteResGroup17], (instregex "CMP8rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "OR64rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "OR8rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "POP64r")>; +def: InstRW<[HWWriteResGroup17], (instregex "SUB64rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "SUB8rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "TEST64rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "TEST8mi")>; +def: InstRW<[HWWriteResGroup17], (instregex "TEST8rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "XOR64rm")>; +def: InstRW<[HWWriteResGroup17], (instregex "XOR8rm")>; -// Starting with P5. -def WriteP5 : SchedWriteRes<[HWPort5]>; -def WriteP5Ld : SchedWriteRes<[HWPort5, HWPort23]> { - let Latency = 5; +def HWWriteResGroup18 : SchedWriteRes<[HWPort237,HWPort0156]> { + let Latency = 1; let NumMicroOps = 2; - let ResourceCycles = [1, 1]; + let ResourceCycles = [1,1]; } +def: InstRW<[HWWriteResGroup18], (instregex "SFENCE")>; -// Notation: -// - r: register. -// - mm: 64 bit mmx register. -// - x = 128 bit xmm register. -// - (x)mm = mmx or xmm register. -// - y = 256 bit ymm register. -// - v = any vector register. -// - m = memory. - -//=== Integer Instructions ===// -//-- Move instructions --// - -// MOV. -// r16,m. -def : InstRW<[WriteALULd], (instregex "MOV16rm")>; - -// MOVSX, MOVZX. -// r,m. -def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; - -// CMOVcc. -// r,r. -def : InstRW<[Write2P0156_Lat2], - (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>; -// r,m. -def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], - (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>; - -// XCHG. -// r,r. -def WriteXCHG : SchedWriteRes<[HWPort0156]> { - let Latency = 2; - let ResourceCycles = [3]; +def HWWriteResGroup19 : SchedWriteRes<[HWPort4,HWPort5,HWPort237]> { + let Latency = 1; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } +def: InstRW<[HWWriteResGroup19], (instregex "EXTRACTPSmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "PEXTRBmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "PEXTRDmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "PEXTRQmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "PEXTRWmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "STMXCSR")>; +def: InstRW<[HWWriteResGroup19], (instregex "VEXTRACTPSmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "VPEXTRBmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "VPEXTRDmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "VPEXTRQmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "VPEXTRWmr")>; +def: InstRW<[HWWriteResGroup19], (instregex "VSTMXCSR")>; -def : InstRW<[WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>; +def HWWriteResGroup20 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> { + let Latency = 1; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[HWWriteResGroup20], (instregex "FNSTCW16m")>; -// r,m. -def WriteXCHGrm : SchedWriteRes<[]> { - let Latency = 21; - let NumMicroOps = 8; +def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort237,HWPort0]> { + let Latency = 1; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteXCHGrm], (instregex "XCHG(8|16|32|64)rm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETAEm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETBm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETEm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETGEm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETGm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETLEm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETLm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETNEm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETNOm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETNPm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETNSm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETOm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETPm")>; +def: InstRW<[HWWriteResGroup21], (instregex "SETSm")>; -// XLAT. -def WriteXLAT : SchedWriteRes<[]> { - let Latency = 7; +def HWWriteResGroup22 : SchedWriteRes<[HWPort4,HWPort237,HWPort15]> { + let Latency = 1; let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteXLAT], (instregex "XLAT")>; +def: InstRW<[HWWriteResGroup22], (instregex "MOVBE64mr")>; -// PUSH. -// m. -def : InstRW<[Write2P237_P4], (instregex "PUSH(16|32)rmm")>; +def HWWriteResGroup23 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> { + let Latency = 1; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[HWWriteResGroup23], (instregex "PUSH64i8")>; +def: InstRW<[HWWriteResGroup23], (instregex "PUSH64r")>; +def: InstRW<[HWWriteResGroup23], (instregex "STOSB")>; +def: InstRW<[HWWriteResGroup23], (instregex "STOSL")>; +def: InstRW<[HWWriteResGroup23], (instregex "STOSQ")>; +def: InstRW<[HWWriteResGroup23], (instregex "STOSW")>; -// PUSHF. -def WritePushF : SchedWriteRes<[HWPort1, HWPort4, HWPort237, HWPort06]> { +def HWWriteResGroup24 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0]> { + let Latency = 1; let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WritePushF], (instregex "PUSHF(16|32)")>; +def: InstRW<[HWWriteResGroup24], (instregex "BTC64mi8")>; +def: InstRW<[HWWriteResGroup24], (instregex "BTR64mi8")>; +def: InstRW<[HWWriteResGroup24], (instregex "BTS64mi8")>; +def: InstRW<[HWWriteResGroup24], (instregex "SAR64m1")>; +def: InstRW<[HWWriteResGroup24], (instregex "SAR64mi")>; +def: InstRW<[HWWriteResGroup24], (instregex "SAR8m1")>; +def: InstRW<[HWWriteResGroup24], (instregex "SAR8mi")>; +def: InstRW<[HWWriteResGroup24], (instregex "SHL64m1")>; +def: InstRW<[HWWriteResGroup24], (instregex "SHL64mi")>; +def: InstRW<[HWWriteResGroup24], (instregex "SHL8m1")>; +def: InstRW<[HWWriteResGroup24], (instregex "SHL8mi")>; +def: InstRW<[HWWriteResGroup24], (instregex "SHR64m1")>; +def: InstRW<[HWWriteResGroup24], (instregex "SHR64mi")>; +def: InstRW<[HWWriteResGroup24], (instregex "SHR8m1")>; +def: InstRW<[HWWriteResGroup24], (instregex "SHR8mi")>; -// PUSHA. -def WritePushA : SchedWriteRes<[]> { - let NumMicroOps = 19; +def HWWriteResGroup25 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { + let Latency = 1; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WritePushA], (instregex "PUSHA(16|32)")>; +def: InstRW<[HWWriteResGroup25], (instregex "ADD64mi8")>; +def: InstRW<[HWWriteResGroup25], (instregex "ADD64mr")>; +def: InstRW<[HWWriteResGroup25], (instregex "ADD8mi")>; +def: InstRW<[HWWriteResGroup25], (instregex "ADD8mr")>; +def: InstRW<[HWWriteResGroup25], (instregex "AND64mi8")>; +def: InstRW<[HWWriteResGroup25], (instregex "AND64mr")>; +def: InstRW<[HWWriteResGroup25], (instregex "AND8mi")>; +def: InstRW<[HWWriteResGroup25], (instregex "AND8mr")>; +def: InstRW<[HWWriteResGroup25], (instregex "DEC64m")>; +def: InstRW<[HWWriteResGroup25], (instregex "DEC8m")>; +def: InstRW<[HWWriteResGroup25], (instregex "INC64m")>; +def: InstRW<[HWWriteResGroup25], (instregex "INC8m")>; +def: InstRW<[HWWriteResGroup25], (instregex "NEG64m")>; +def: InstRW<[HWWriteResGroup25], (instregex "NEG8m")>; +def: InstRW<[HWWriteResGroup25], (instregex "NOT64m")>; +def: InstRW<[HWWriteResGroup25], (instregex "NOT8m")>; +def: InstRW<[HWWriteResGroup25], (instregex "OR64mi8")>; +def: InstRW<[HWWriteResGroup25], (instregex "OR64mr")>; +def: InstRW<[HWWriteResGroup25], (instregex "OR8mi")>; +def: InstRW<[HWWriteResGroup25], (instregex "OR8mr")>; +def: InstRW<[HWWriteResGroup25], (instregex "POP64rmm")>; +def: InstRW<[HWWriteResGroup25], (instregex "PUSH64rmm")>; +def: InstRW<[HWWriteResGroup25], (instregex "SUB64mi8")>; +def: InstRW<[HWWriteResGroup25], (instregex "SUB64mr")>; +def: InstRW<[HWWriteResGroup25], (instregex "SUB8mi")>; +def: InstRW<[HWWriteResGroup25], (instregex "SUB8mr")>; +def: InstRW<[HWWriteResGroup25], (instregex "XOR64mi8")>; +def: InstRW<[HWWriteResGroup25], (instregex "XOR64mr")>; +def: InstRW<[HWWriteResGroup25], (instregex "XOR8mi")>; +def: InstRW<[HWWriteResGroup25], (instregex "XOR8mr")>; -// POP. -// m. -def : InstRW<[Write2P237_P4], (instregex "POP(16|32)rmm")>; - -// POPF. -def WritePopF : SchedWriteRes<[]> { - let NumMicroOps = 9; +def HWWriteResGroup26 : SchedWriteRes<[HWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; } -def : InstRW<[WritePopF], (instregex "POPF(16|32)")>; +def: InstRW<[HWWriteResGroup26], (instregex "BLENDVPDrr0")>; +def: InstRW<[HWWriteResGroup26], (instregex "BLENDVPSrr0")>; +def: InstRW<[HWWriteResGroup26], (instregex "MMX_PINSRWirri")>; +def: InstRW<[HWWriteResGroup26], (instregex "PBLENDVBrr0")>; +def: InstRW<[HWWriteResGroup26], (instregex "PINSRBrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "PINSRDrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "PINSRQrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "PINSRWrri")>; +def: InstRW<[HWWriteResGroup26], (instregex "VBLENDVPDYrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "VBLENDVPDrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "VBLENDVPSYrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "VBLENDVPSrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "VPBLENDVBYrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "VPBLENDVBrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "VPINSRBrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "VPINSRDrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "VPINSRQrr")>; +def: InstRW<[HWWriteResGroup26], (instregex "VPINSRWrri")>; -// POPA. -def WritePopA : SchedWriteRes<[]> { - let NumMicroOps = 18; +def HWWriteResGroup27 : SchedWriteRes<[HWPort01]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; } -def : InstRW<[WritePopA], (instregex "POPA(16|32)")>; - -// LAHF SAHF. -def : InstRW<[WriteP06], (instregex "(S|L)AHF")>; +def: InstRW<[HWWriteResGroup27], (instregex "FDECSTP")>; -// BSWAP. -// r32. -def WriteBSwap32 : SchedWriteRes<[HWPort15]>; -def : InstRW<[WriteBSwap32], (instregex "BSWAP32r")>; - -// r64. -def WriteBSwap64 : SchedWriteRes<[HWPort06, HWPort15]> { +def HWWriteResGroup28 : SchedWriteRes<[HWPort0]> { + let Latency = 2; let NumMicroOps = 2; + let ResourceCycles = [2]; } -def : InstRW<[WriteBSwap64], (instregex "BSWAP64r")>; - -// MOVBE. -// r16,m16 / r64,m64. -def : InstRW<[Write2P0156_Lat2Ld], (instregex "MOVBE(16|64)rm")>; +def: InstRW<[HWWriteResGroup28], (instregex "ROL32ri")>; +def: InstRW<[HWWriteResGroup28], (instregex "ROL64r1")>; +def: InstRW<[HWWriteResGroup28], (instregex "ROL8r1")>; +def: InstRW<[HWWriteResGroup28], (instregex "ROL8ri")>; +def: InstRW<[HWWriteResGroup28], (instregex "ROR32ri")>; +def: InstRW<[HWWriteResGroup28], (instregex "ROR64r1")>; +def: InstRW<[HWWriteResGroup28], (instregex "ROR8r1")>; +def: InstRW<[HWWriteResGroup28], (instregex "ROR8ri")>; -// r32, m32. -def WriteMoveBE32rm : SchedWriteRes<[HWPort15, HWPort23]> { +def HWWriteResGroup29 : SchedWriteRes<[HWPort0156]> { + let Latency = 2; let NumMicroOps = 2; + let ResourceCycles = [2]; } -def : InstRW<[WriteMoveBE32rm], (instregex "MOVBE32rm")>; +def: InstRW<[HWWriteResGroup29], (instregex "LFENCE")>; +def: InstRW<[HWWriteResGroup29], (instregex "MFENCE")>; +def: InstRW<[HWWriteResGroup29], (instregex "WAIT")>; +def: InstRW<[HWWriteResGroup29], (instregex "XGETBV")>; -// m16,r16. -def WriteMoveBE16mr : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { - let NumMicroOps = 3; +def HWWriteResGroup30 : SchedWriteRes<[HWPort0,HWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteMoveBE16mr], (instregex "MOVBE16mr")>; +def: InstRW<[HWWriteResGroup30], (instregex "CVTPS2PDrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "CVTSS2SDrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "EXTRACTPSrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "MMX_PEXTRWirri")>; +def: InstRW<[HWWriteResGroup30], (instregex "PEXTRBrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PEXTRDrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PEXTRQrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PEXTRWri")>; +def: InstRW<[HWWriteResGroup30], (instregex "PSLLDrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PSLLQrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PSLLWrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PSRADrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PSRAWrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PSRLDrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PSRLQrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PSRLWrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "PTESTrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VCVTPH2PSYrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VCVTPH2PSrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VCVTPS2PDrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VCVTSS2SDrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VEXTRACTPSrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPEXTRBrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPEXTRDrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPEXTRQrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPEXTRWri")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPSRADrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPSRAWrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPSRLDrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPSRLQrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPSRLWrr")>; +def: InstRW<[HWWriteResGroup30], (instregex "VPTESTrr")>; -// m32,r32. -def WriteMoveBE32mr : SchedWriteRes<[HWPort15, HWPort237, HWPort4]> { - let NumMicroOps = 3; +def HWWriteResGroup31 : SchedWriteRes<[HWPort6,HWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteMoveBE32mr], (instregex "MOVBE32mr")>; +def: InstRW<[HWWriteResGroup31], (instregex "CLFLUSH")>; -// m64,r64. -def WriteMoveBE64mr : SchedWriteRes<[HWPort06, HWPort15, HWPort237, HWPort4]> { - let NumMicroOps = 4; +def HWWriteResGroup32 : SchedWriteRes<[HWPort01,HWPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteMoveBE64mr], (instregex "MOVBE64mr")>; - -//-- Arithmetic instructions --// - -// ADD SUB. -// m,r/i. -def : InstRW<[Write2P0156_2P237_P4], - (instregex "(ADD|SUB)(8|16|32|64)m(r|i)", - "(ADD|SUB)(8|16|32|64)mi8", "(ADD|SUB)64mi32")>; - -// ADC SBB. -// r,r/i. -def : InstRW<[Write2P0156_Lat2], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)", - "(ADC|SBB)(16|32|64)ri8", - "(ADC|SBB)64ri32", - "(ADC|SBB)(8|16|32|64)rr_REV")>; +def: InstRW<[HWWriteResGroup32], (instregex "MMX_MOVDQ2Qrr")>; -// r,m. -def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], (instregex "(ADC|SBB)(8|16|32|64)rm")>; - -// m,r/i. -def : InstRW<[Write3P0156_2P237_P4], - (instregex "(ADC|SBB)(8|16|32|64)m(r|i)", - "(ADC|SBB)(16|32|64)mi8", - "(ADC|SBB)64mi32")>; - -// INC DEC NOT NEG. -// m. -def : InstRW<[WriteP0156_2P237_P4], - (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m", - "(INC|DEC)64(16|32)m")>; +def HWWriteResGroup33 : SchedWriteRes<[HWPort0,HWPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[HWWriteResGroup33], (instregex "BEXTR32rr")>; +def: InstRW<[HWWriteResGroup33], (instregex "BEXTR64rr")>; +def: InstRW<[HWWriteResGroup33], (instregex "BSWAP32r")>; -// MUL IMUL. -// r16. -def WriteMul16 : SchedWriteRes<[HWPort1, HWPort0156]> { - let Latency = 4; - let NumMicroOps = 4; +def HWWriteResGroup34 : SchedWriteRes<[HWPort0,HWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteMul16], (instregex "IMUL16r", "MUL16r")>; +def: InstRW<[HWWriteResGroup34], (instregex "ADC64ri8")>; +def: InstRW<[HWWriteResGroup34], (instregex "ADC64rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "ADC8ri")>; +def: InstRW<[HWWriteResGroup34], (instregex "ADC8rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVAE32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVB32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVE32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVG32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVGE32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVL32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVLE32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVNE32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVNO32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVNP32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVNS32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVO32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVP32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CMOVS32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "CWD")>; +def: InstRW<[HWWriteResGroup34], (instregex "SBB32rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "SBB64ri8")>; +def: InstRW<[HWWriteResGroup34], (instregex "SBB8ri")>; +def: InstRW<[HWWriteResGroup34], (instregex "SBB8rr")>; +def: InstRW<[HWWriteResGroup34], (instregex "SETAr")>; +def: InstRW<[HWWriteResGroup34], (instregex "SETBEr")>; -// m16. -def WriteMul16Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { - let Latency = 8; - let NumMicroOps = 5; +def HWWriteResGroup35 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } -def : InstRW<[WriteMul16Ld], (instregex "IMUL16m", "MUL16m")>; +def: InstRW<[HWWriteResGroup35], (instregex "BLENDVPDrm0")>; +def: InstRW<[HWWriteResGroup35], (instregex "BLENDVPSrm0")>; +def: InstRW<[HWWriteResGroup35], (instregex "MMX_PACKSSDWirm")>; +def: InstRW<[HWWriteResGroup35], (instregex "MMX_PACKSSWBirm")>; +def: InstRW<[HWWriteResGroup35], (instregex "MMX_PACKUSWBirm")>; +def: InstRW<[HWWriteResGroup35], (instregex "PBLENDVBrm0")>; +def: InstRW<[HWWriteResGroup35], (instregex "VBLENDVPDYrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VBLENDVPDrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VBLENDVPSYrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VBLENDVPSrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VMASKMOVPDrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VMASKMOVPDrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VMASKMOVPSrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VMASKMOVPSrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VPBLENDVBYrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VPBLENDVBrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VPMASKMOVDYrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VPMASKMOVDrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VPMASKMOVQYrm")>; +def: InstRW<[HWWriteResGroup35], (instregex "VPMASKMOVQrm")>; -// r32. -def WriteMul32 : SchedWriteRes<[HWPort1, HWPort0156]> { - let Latency = 4; +def HWWriteResGroup36 : SchedWriteRes<[HWPort23,HWPort0156]> { + let Latency = 2; let NumMicroOps = 3; + let ResourceCycles = [1,2]; } -def : InstRW<[WriteMul32], (instregex "IMUL32r", "MUL32r")>; +def: InstRW<[HWWriteResGroup36], (instregex "LEAVE64")>; +def: InstRW<[HWWriteResGroup36], (instregex "SCASB")>; +def: InstRW<[HWWriteResGroup36], (instregex "SCASL")>; +def: InstRW<[HWWriteResGroup36], (instregex "SCASQ")>; +def: InstRW<[HWWriteResGroup36], (instregex "SCASW")>; -// m32. -def WriteMul32Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { - let Latency = 8; - let NumMicroOps = 4; +def HWWriteResGroup37 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteMul32Ld], (instregex "IMUL32m", "MUL32m")>; +def: InstRW<[HWWriteResGroup37], (instregex "PSLLDrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "PSLLQrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "PSLLWrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "PSRADrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "PSRAWrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "PSRLDrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "PSRLQrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "PSRLWrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "PTESTrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "VPSLLDri")>; +def: InstRW<[HWWriteResGroup37], (instregex "VPSLLQri")>; +def: InstRW<[HWWriteResGroup37], (instregex "VPSLLWri")>; +def: InstRW<[HWWriteResGroup37], (instregex "VPSRADrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "VPSRAWrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "VPSRLDrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "VPSRLQrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "VPSRLWrm")>; +def: InstRW<[HWWriteResGroup37], (instregex "VPTESTrm")>; -// r64. -def WriteMul64 : SchedWriteRes<[HWPort1, HWPort6]> { - let Latency = 3; - let NumMicroOps = 2; +def HWWriteResGroup38 : SchedWriteRes<[HWPort0,HWPort01,HWPort23]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteMul64], (instregex "IMUL64r", "MUL64r")>; +def: InstRW<[HWWriteResGroup38], (instregex "FLDCW16m")>; -// m64. -def WriteMul64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> { - let Latency = 7; +def HWWriteResGroup39 : SchedWriteRes<[HWPort0,HWPort23,HWPort0156]> { + let Latency = 2; let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteMul64Ld], (instregex "IMUL64m", "MUL64m")>; +def: InstRW<[HWWriteResGroup39], (instregex "LDMXCSR")>; +def: InstRW<[HWWriteResGroup39], (instregex "VLDMXCSR")>; -// r16,r16. -def WriteMul16rri : SchedWriteRes<[HWPort1, HWPort0156]> { - let Latency = 4; - let NumMicroOps = 2; +def HWWriteResGroup40 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>; +def: InstRW<[HWWriteResGroup40], (instregex "LRETQ")>; +def: InstRW<[HWWriteResGroup40], (instregex "RETQ")>; -// r16,m16. -def WriteMul16rmi : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { - let Latency = 8; +def HWWriteResGroup41 : SchedWriteRes<[HWPort23,HWPort0,HWPort15]> { + let Latency = 2; let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteMul16rmi], (instregex "IMUL16rmi", "IMUL16rmi8")>; +def: InstRW<[HWWriteResGroup41], (instregex "BEXTR32rm")>; +def: InstRW<[HWWriteResGroup41], (instregex "BEXTR64rm")>; -// MULX. -// r32,r32,r32. -def WriteMulX32 : SchedWriteRes<[HWPort1, HWPort056]> { - let Latency = 4; +def HWWriteResGroup42 : SchedWriteRes<[HWPort23,HWPort0,HWPort0156]> { + let Latency = 2; let NumMicroOps = 3; - let ResourceCycles = [1, 2]; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteMulX32], (instregex "MULX32rr")>; +def: InstRW<[HWWriteResGroup42], (instregex "ADC64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "ADC8rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVAE64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVB64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVE64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVG64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVGE64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVL64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVLE64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVNE64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVNO64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVNP64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVNS64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVO64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVP64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "CMOVS64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "SBB64rm")>; +def: InstRW<[HWWriteResGroup42], (instregex "SBB8rm")>; -// r32,r32,m32. -def WriteMulX32Ld : SchedWriteRes<[HWPort1, HWPort056, HWPort23]> { - let Latency = 8; +def HWWriteResGroup43 : SchedWriteRes<[HWPort4,HWPort6,HWPort237,HWPort0156]> { + let Latency = 2; let NumMicroOps = 4; - let ResourceCycles = [1, 2, 1]; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WriteMulX32Ld], (instregex "MULX32rm")>; +def: InstRW<[HWWriteResGroup43], (instregex "CALL64r")>; +def: InstRW<[HWWriteResGroup43], (instregex "SETAm")>; +def: InstRW<[HWWriteResGroup43], (instregex "SETBEm")>; -// r64,r64,r64. -def WriteMulX64 : SchedWriteRes<[HWPort1, HWPort6]> { - let Latency = 4; - let NumMicroOps = 2; +def HWWriteResGroup44 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0]> { + let Latency = 2; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; } -def : InstRW<[WriteMulX64], (instregex "MULX64rr")>; +def: InstRW<[HWWriteResGroup44], (instregex "ROL64m1")>; +def: InstRW<[HWWriteResGroup44], (instregex "ROL64mi")>; +def: InstRW<[HWWriteResGroup44], (instregex "ROL8m1")>; +def: InstRW<[HWWriteResGroup44], (instregex "ROL8mi")>; +def: InstRW<[HWWriteResGroup44], (instregex "ROR64m1")>; +def: InstRW<[HWWriteResGroup44], (instregex "ROR64mi")>; +def: InstRW<[HWWriteResGroup44], (instregex "ROR8m1")>; +def: InstRW<[HWWriteResGroup44], (instregex "ROR8mi")>; -// r64,r64,m64. -def WriteMulX64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> { - let Latency = 8; - let NumMicroOps = 3; +def HWWriteResGroup45 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { + let Latency = 2; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; } -def : InstRW<[WriteMulX64Ld], (instregex "MULX64rm")>; +def: InstRW<[HWWriteResGroup45], (instregex "XADD64rm")>; +def: InstRW<[HWWriteResGroup45], (instregex "XADD8rm")>; -// DIV. -// r8. -def WriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { - let Latency = 22; - let NumMicroOps = 9; +def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> { + let Latency = 2; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,1,1]; } -def : InstRW<[WriteDiv8], (instregex "DIV8r")>; +def: InstRW<[HWWriteResGroup46], (instregex "CALL64m")>; +def: InstRW<[HWWriteResGroup46], (instregex "FARCALL64")>; -// r16. -def WriteDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { - let Latency = 23; - let NumMicroOps = 10; +def HWWriteResGroup47 : SchedWriteRes<[HWPort0]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WriteDiv16], (instregex "DIV16r")>; +def: InstRW<[HWWriteResGroup47], (instregex "MOVMSKPDrr")>; +def: InstRW<[HWWriteResGroup47], (instregex "MOVMSKPSrr")>; +def: InstRW<[HWWriteResGroup47], (instregex "PMOVMSKBrr")>; +def: InstRW<[HWWriteResGroup47], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[HWWriteResGroup47], (instregex "VMOVMSKPDrr")>; +def: InstRW<[HWWriteResGroup47], (instregex "VMOVMSKPSrr")>; +def: InstRW<[HWWriteResGroup47], (instregex "VPMOVMSKBYrr")>; +def: InstRW<[HWWriteResGroup47], (instregex "VPMOVMSKBrr")>; -// r32. -def WriteDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { - let Latency = 22; - let NumMicroOps = 10; +def HWWriteResGroup48 : SchedWriteRes<[HWPort1]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WriteDiv32], (instregex "DIV32r")>; +def: InstRW<[HWWriteResGroup48], (instregex "ADDPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "ADDPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "ADDSDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "ADDSSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "ADDSUBPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "ADDSUBPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "BSF32rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "BSR32rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "CMPPDrri")>; +def: InstRW<[HWWriteResGroup48], (instregex "CMPPSrri")>; +def: InstRW<[HWWriteResGroup48], (instregex "CMPSDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "CMPSSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "COMISDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "COMISSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "CVTDQ2PSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "CVTPS2DQrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "CVTTPS2DQrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "IMUL32rri8")>; +def: InstRW<[HWWriteResGroup48], (instregex "IMUL64rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "IMUL8r")>; +def: InstRW<[HWWriteResGroup48], (instregex "LZCNT32rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MAXPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MAXPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MAXSDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MAXSSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MINPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MINPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MINSDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MINSSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[HWWriteResGroup48], (instregex "MUL8r")>; +def: InstRW<[HWWriteResGroup48], (instregex "PDEP32rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "PDEP64rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "PEXT32rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "PEXT64rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "POPCNT32rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "SHLD32rri8")>; +def: InstRW<[HWWriteResGroup48], (instregex "SHRD32rri8")>; +def: InstRW<[HWWriteResGroup48], (instregex "SUBPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "SUBPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "SUBSDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "SUBSSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "TZCNT32rr")>; +def: InstRW<[HWWriteResGroup48], (instregex "UCOMISDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "UCOMISSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDPDYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDPSYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDSDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDSSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDSUBPDYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDSUBPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDSUBPSYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VADDSUBPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCMPPDYrri")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCMPPDrri")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCMPPSYrri")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCMPPSrri")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCMPSDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCMPSSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCOMISDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCOMISSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCVTPS2DQrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMAXPDYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMAXPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMAXPSYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMAXPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMAXSDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMAXSSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMINPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMINPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMINSDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VMINSSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VSUBPDYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VSUBPDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VSUBPSYrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VSUBPSrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VUCOMISDrr")>; +def: InstRW<[HWWriteResGroup48], (instregex "VUCOMISSrr")>; -// r64. -def WriteDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { - let Latency = 32; - let NumMicroOps = 36; -} -def : InstRW<[WriteDiv64], (instregex "DIV64r")>; - -// IDIV. -// r8. -def WriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { - let Latency = 23; - let NumMicroOps = 9; +def HWWriteResGroup49 : SchedWriteRes<[HWPort5]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WriteIDiv8], (instregex "IDIV8r")>; +def: InstRW<[HWWriteResGroup49], (instregex "KSHIFTRDri")>; +def: InstRW<[HWWriteResGroup49], (instregex "KSHIFTRWri")>; +def: InstRW<[HWWriteResGroup49], (instregex "VBROADCASTSDYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VBROADCASTSSrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VEXTRACTF128rr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VEXTRACTI128rr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VINSERTF128rr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VINSERTI128rr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTBYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTBrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTDYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTQYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTWYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTWrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPERM2I128rr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPERMDYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPERMQYri")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXBDYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXBQYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXBWYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXDQYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXWDYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXWQYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXBDYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXBQYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXBWYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXDQYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXWDYrr")>; +def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXWQYrr")>; -// r16. -def WriteIDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { - let Latency = 23; - let NumMicroOps = 10; +def HWWriteResGroup50 : SchedWriteRes<[HWPort1,HWPort23]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteIDiv16], (instregex "IDIV16r")>; +def: InstRW<[HWWriteResGroup50], (instregex "ADDPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "ADDPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "ADDSDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "ADDSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "ADDSUBPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "ADDSUBPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "BSF64rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "BSR64rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "CMPPDrmi")>; +def: InstRW<[HWWriteResGroup50], (instregex "CMPPSrmi")>; +def: InstRW<[HWWriteResGroup50], (instregex "CMPSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "COMISDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "COMISSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "CVTDQ2PSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "CVTPS2DQrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "CVTTPS2DQrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "IMUL64m")>; +def: InstRW<[HWWriteResGroup50], (instregex "IMUL64rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "IMUL8m")>; +def: InstRW<[HWWriteResGroup50], (instregex "LZCNT64rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MAXPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MAXPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MAXSDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MAXSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MINPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MINPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MINSDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MINSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[HWWriteResGroup50], (instregex "MUL64m")>; +def: InstRW<[HWWriteResGroup50], (instregex "MUL8m")>; +def: InstRW<[HWWriteResGroup50], (instregex "PDEP32rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "PDEP64rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "PEXT32rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "PEXT64rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "POPCNT64rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "SUBPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "SUBPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "SUBSDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "SUBSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "TZCNT64rm")>; +def: InstRW<[HWWriteResGroup50], (instregex "UCOMISDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "UCOMISSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDPDYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDPSYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDSDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDSUBPDYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDSUBPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDSUBPSYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VADDSUBPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCMPPDYrmi")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCMPPDrmi")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCMPPSYrmi")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCMPPSrmi")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCMPSDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCMPSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCOMISDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCOMISSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCVTPS2DQrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAXPDYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAXPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAXPSYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAXPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAXSDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMAXSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMINPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMINPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMINPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMINPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMINSDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VMINSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VSUBPDYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VSUBPDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VSUBPSYrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VSUBPSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VSUBSDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VSUBSSrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VUCOMISDrm")>; +def: InstRW<[HWWriteResGroup50], (instregex "VUCOMISSrm")>; -// r32. -def WriteIDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { - let Latency = 22; - let NumMicroOps = 9; +def HWWriteResGroup51 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteIDiv32], (instregex "IDIV32r")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPERM2F128rm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPERM2I128rm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPERMDYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPERMPDYmi")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPERMPSYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPERMQYmi")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXBDYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXBQYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXBWYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXDQYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXWDYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXWQYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXBDYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXBQYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXBWYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXDQYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXWDYrm")>; +def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXWQYrm")>; -// r64. -def WriteIDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { - let Latency = 39; - let NumMicroOps = 59; -} -def : InstRW<[WriteIDiv64], (instregex "IDIV64r")>; - -//-- Logic instructions --// - -// AND OR XOR. -// m,r/i. -def : InstRW<[Write2P0156_2P237_P4], - (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)", - "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>; - -// SHR SHL SAR. -// m,i. -def WriteShiftRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { - let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; +def HWWriteResGroup52 : SchedWriteRes<[HWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; } -def : InstRW<[WriteShiftRMW], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>; +def: InstRW<[HWWriteResGroup52], (instregex "XADD32rr")>; +def: InstRW<[HWWriteResGroup52], (instregex "XADD8rr")>; +def: InstRW<[HWWriteResGroup52], (instregex "XCHG8rr")>; -// r,cl. -def : InstRW<[Write3P06_Lat2], (instregex "S(A|H)(R|L)(8|16|32|64)rCL")>; - -// m,cl. -def WriteShiftClLdRMW : SchedWriteRes<[HWPort06, HWPort23, HWPort4]> { - let NumMicroOps = 6; - let ResourceCycles = [3, 2, 1]; +def HWWriteResGroup53 : SchedWriteRes<[HWPort0,HWPort5]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } -def : InstRW<[WriteShiftClLdRMW], (instregex "S(A|H)(R|L)(8|16|32|64)mCL")>; +def: InstRW<[HWWriteResGroup53], (instregex "VPSLLVDYrr")>; +def: InstRW<[HWWriteResGroup53], (instregex "VPSLLVDrr")>; +def: InstRW<[HWWriteResGroup53], (instregex "VPSRAVDYrr")>; +def: InstRW<[HWWriteResGroup53], (instregex "VPSRAVDrr")>; +def: InstRW<[HWWriteResGroup53], (instregex "VPSRLVDYrr")>; +def: InstRW<[HWWriteResGroup53], (instregex "VPSRLVDrr")>; -// ROR ROL. -// r,1. -def : InstRW<[Write2P06], (instregex "RO(R|L)(8|16|32|64)r1")>; - -// m,i. -def WriteRotateRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { - let NumMicroOps = 5; - let ResourceCycles = [2, 2, 1]; +def HWWriteResGroup54 : SchedWriteRes<[HWPort5,HWPort15]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } -def : InstRW<[WriteRotateRMW], (instregex "RO(R|L)(8|16|32|64)mi")>; +def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHADDrr64")>; +def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHSUBSWrr64")>; +def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHSUBWrr64")>; +def: InstRW<[HWWriteResGroup54], (instregex "PHADDDrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "PHADDSWrr128")>; +def: InstRW<[HWWriteResGroup54], (instregex "PHADDWrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "PHSUBDrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "PHSUBSWrr128")>; +def: InstRW<[HWWriteResGroup54], (instregex "PHSUBWrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHADDDYrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHADDDrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHADDSWrr128")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHADDSWrr256")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHADDWYrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHADDWrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBDYrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBDrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBSWrr128")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBSWrr256")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBWYrr")>; +def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBWrr")>; -// r,cl. -def : InstRW<[Write3P06_Lat2], (instregex "RO(R|L)(8|16|32|64)rCL")>; +def HWWriteResGroup55 : SchedWriteRes<[HWPort5,HWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[HWWriteResGroup55], (instregex "MMX_PACKSSDWirr")>; +def: InstRW<[HWWriteResGroup55], (instregex "MMX_PACKSSWBirr")>; +def: InstRW<[HWWriteResGroup55], (instregex "MMX_PACKUSWBirr")>; -// m,cl. -def WriteRotateRMWCL : SchedWriteRes<[]> { - let NumMicroOps = 6; +def HWWriteResGroup56 : SchedWriteRes<[HWPort6,HWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; } -def : InstRW<[WriteRotateRMWCL], (instregex "RO(R|L)(8|16|32|64)mCL")>; +def: InstRW<[HWWriteResGroup56], (instregex "CLD")>; -// RCR RCL. -// r,1. -def WriteRCr1 : SchedWriteRes<[HWPort06, HWPort0156]> { - let Latency = 2; +def HWWriteResGroup57 : SchedWriteRes<[HWPort0,HWPort0156]> { + let Latency = 3; let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [1,2]; } -def : InstRW<[WriteRCr1], (instregex "RC(R|L)(8|16|32|64)r1")>; +def: InstRW<[HWWriteResGroup57], (instregex "CMOVA32rr")>; +def: InstRW<[HWWriteResGroup57], (instregex "CMOVBE32rr")>; +def: InstRW<[HWWriteResGroup57], (instregex "RCL32ri")>; +def: InstRW<[HWWriteResGroup57], (instregex "RCL64r1")>; +def: InstRW<[HWWriteResGroup57], (instregex "RCL8r1")>; +def: InstRW<[HWWriteResGroup57], (instregex "RCL8ri")>; +def: InstRW<[HWWriteResGroup57], (instregex "RCR32ri")>; +def: InstRW<[HWWriteResGroup57], (instregex "RCR64r1")>; +def: InstRW<[HWWriteResGroup57], (instregex "RCR8r1")>; +def: InstRW<[HWWriteResGroup57], (instregex "RCR8ri")>; +def: InstRW<[HWWriteResGroup57], (instregex "SHL64rCL")>; +def: InstRW<[HWWriteResGroup57], (instregex "SHL8rCL")>; -// m,1. -def WriteRCm1 : SchedWriteRes<[]> { - let NumMicroOps = 6; +def HWWriteResGroup58 : SchedWriteRes<[HWPort0,HWPort4,HWPort237]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteRCm1], (instregex "RC(R|L)(8|16|32|64)m1")>; +def: InstRW<[HWWriteResGroup58], (instregex "FNSTSWm")>; -// r,i. -def WriteRCri : SchedWriteRes<[HWPort0156]> { - let Latency = 6; - let NumMicroOps = 8; +def HWWriteResGroup59 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; } -def : InstRW<[WriteRCri], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>; +def: InstRW<[HWWriteResGroup59], (instregex "VPSLLVDYrm")>; +def: InstRW<[HWWriteResGroup59], (instregex "VPSLLVDrm")>; +def: InstRW<[HWWriteResGroup59], (instregex "VPSRAVDYrm")>; +def: InstRW<[HWWriteResGroup59], (instregex "VPSRAVDrm")>; +def: InstRW<[HWWriteResGroup59], (instregex "VPSRLVDYrm")>; +def: InstRW<[HWWriteResGroup59], (instregex "VPSRLVDrm")>; -// m,i. -def WriteRCmi : SchedWriteRes<[]> { - let NumMicroOps = 11; +def HWWriteResGroup60 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; } -def : InstRW<[WriteRCmi], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>; +def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHADDrm64")>; +def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHSUBSWrm64")>; +def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHSUBWrm64")>; +def: InstRW<[HWWriteResGroup60], (instregex "PHADDDrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "PHADDSWrm128")>; +def: InstRW<[HWWriteResGroup60], (instregex "PHADDWrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "PHSUBDrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "PHSUBSWrm128")>; +def: InstRW<[HWWriteResGroup60], (instregex "PHSUBWrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHADDDYrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHADDDrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHADDSWrm128")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHADDSWrm256")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHADDWYrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHADDWrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBDYrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBDrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBSWrm128")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBSWrm256")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBWYrm")>; +def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBWrm")>; -// SHRD SHLD. -// r,r,i. -def WriteShDrr : SchedWriteRes<[HWPort1]> { +def HWWriteResGroup61 : SchedWriteRes<[HWPort23,HWPort0,HWPort0156]> { let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; } -def : InstRW<[WriteShDrr], (instregex "SH(R|L)D(16|32|64)rri8")>; +def: InstRW<[HWWriteResGroup61], (instregex "CMOVA64rm")>; +def: InstRW<[HWWriteResGroup61], (instregex "CMOVBE64rm")>; -// m,r,i. -def WriteShDmr : SchedWriteRes<[]> { +def HWWriteResGroup62 : SchedWriteRes<[HWPort23,HWPort237,HWPort0,HWPort0156]> { + let Latency = 3; let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; } -def : InstRW<[WriteShDmr], (instregex "SH(R|L)D(16|32|64)mri8")>; +def: InstRW<[HWWriteResGroup62], (instregex "RCL64m1")>; +def: InstRW<[HWWriteResGroup62], (instregex "RCL64mi")>; +def: InstRW<[HWWriteResGroup62], (instregex "RCL8m1")>; +def: InstRW<[HWWriteResGroup62], (instregex "RCL8mi")>; +def: InstRW<[HWWriteResGroup62], (instregex "RCR64m1")>; +def: InstRW<[HWWriteResGroup62], (instregex "RCR64mi")>; +def: InstRW<[HWWriteResGroup62], (instregex "RCR8m1")>; +def: InstRW<[HWWriteResGroup62], (instregex "RCR8mi")>; -// r,r,cl. -def WriteShlDCL : SchedWriteRes<[HWPort0156]> { +def HWWriteResGroup63 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { let Latency = 3; - let NumMicroOps = 4; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; } -def : InstRW<[WriteShlDCL], (instregex "SHLD(16|32|64)rrCL")>; +def: InstRW<[HWWriteResGroup63], (instregex "ADC64mi8")>; +def: InstRW<[HWWriteResGroup63], (instregex "ADC8mi")>; +def: InstRW<[HWWriteResGroup63], (instregex "ADD8mi")>; +def: InstRW<[HWWriteResGroup63], (instregex "AND8mi")>; +def: InstRW<[HWWriteResGroup63], (instregex "OR8mi")>; +def: InstRW<[HWWriteResGroup63], (instregex "SUB8mi")>; +def: InstRW<[HWWriteResGroup63], (instregex "XCHG64rm")>; +def: InstRW<[HWWriteResGroup63], (instregex "XCHG8rm")>; +def: InstRW<[HWWriteResGroup63], (instregex "XOR8mi")>; -// r,r,cl. -def WriteShrDCL : SchedWriteRes<[HWPort0156]> { - let Latency = 4; - let NumMicroOps = 4; +def HWWriteResGroup64 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0,HWPort0156]> { + let Latency = 3; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; } -def : InstRW<[WriteShrDCL], (instregex "SHRD(16|32|64)rrCL")>; +def: InstRW<[HWWriteResGroup64], (instregex "ADC64mr")>; +def: InstRW<[HWWriteResGroup64], (instregex "ADC8mr")>; +def: InstRW<[HWWriteResGroup64], (instregex "CMPXCHG64rm")>; +def: InstRW<[HWWriteResGroup64], (instregex "CMPXCHG8rm")>; +def: InstRW<[HWWriteResGroup64], (instregex "SBB64mi8")>; +def: InstRW<[HWWriteResGroup64], (instregex "SBB64mr")>; +def: InstRW<[HWWriteResGroup64], (instregex "SBB8mi")>; +def: InstRW<[HWWriteResGroup64], (instregex "SBB8mr")>; +def: InstRW<[HWWriteResGroup64], (instregex "SHL64mCL")>; +def: InstRW<[HWWriteResGroup64], (instregex "SHL8mCL")>; -// m,r,cl. -def WriteShDmrCL : SchedWriteRes<[]> { - let NumMicroOps = 7; +def HWWriteResGroup65 : SchedWriteRes<[HWPort0,HWPort1]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteShDmrCL], (instregex "SH(R|L)D(16|32|64)mrCL")>; - -// BT. -// r,r/i. -def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>; +def: InstRW<[HWWriteResGroup65], (instregex "CVTSD2SI64rr")>; +def: InstRW<[HWWriteResGroup65], (instregex "CVTSD2SIrr")>; +def: InstRW<[HWWriteResGroup65], (instregex "CVTSS2SI64rr")>; +def: InstRW<[HWWriteResGroup65], (instregex "CVTSS2SIrr")>; +def: InstRW<[HWWriteResGroup65], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[HWWriteResGroup65], (instregex "CVTTSD2SIrr")>; +def: InstRW<[HWWriteResGroup65], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[HWWriteResGroup65], (instregex "CVTTSS2SIrr")>; +def: InstRW<[HWWriteResGroup65], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[HWWriteResGroup65], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[HWWriteResGroup65], (instregex "VCVTSS2SIrr")>; +def: InstRW<[HWWriteResGroup65], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[HWWriteResGroup65], (instregex "VCVTTSD2SIrr")>; +def: InstRW<[HWWriteResGroup65], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[HWWriteResGroup65], (instregex "VCVTTSS2SIrr")>; -// m,r. -def WriteBTmr : SchedWriteRes<[]> { - let NumMicroOps = 10; +def HWWriteResGroup66 : SchedWriteRes<[HWPort0,HWPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteBTmr], (instregex "BT(16|32|64)mr")>; - -// m,i. -def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>; +def: InstRW<[HWWriteResGroup66], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[HWWriteResGroup66], (instregex "VPSLLDrr")>; +def: InstRW<[HWWriteResGroup66], (instregex "VPSLLQrr")>; +def: InstRW<[HWWriteResGroup66], (instregex "VPSLLWrr")>; +def: InstRW<[HWWriteResGroup66], (instregex "VPSRADYrr")>; +def: InstRW<[HWWriteResGroup66], (instregex "VPSRAWYrr")>; +def: InstRW<[HWWriteResGroup66], (instregex "VPSRLDYrr")>; +def: InstRW<[HWWriteResGroup66], (instregex "VPSRLQYrr")>; +def: InstRW<[HWWriteResGroup66], (instregex "VPSRLWYrr")>; +def: InstRW<[HWWriteResGroup66], (instregex "VPTESTYrr")>; -// BTR BTS BTC. -// r,r,i. -def : InstRW<[WriteShift], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>; - -// m,r. -def WriteBTRSCmr : SchedWriteRes<[]> { - let NumMicroOps = 11; +def HWWriteResGroup67 : SchedWriteRes<[HWPort1,HWPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>; +def: InstRW<[HWWriteResGroup67], (instregex "CVTDQ2PDrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "CVTPD2DQrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "CVTPD2PSrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "CVTSD2SSrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "CVTSI2SD64rr")>; +def: InstRW<[HWWriteResGroup67], (instregex "CVTSI2SDrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "CVTSI2SSrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "CVTTPD2DQrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[HWWriteResGroup67], (instregex "VCVTDQ2PDrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "VCVTPD2DQrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "VCVTPD2PSrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "VCVTPS2PHrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[HWWriteResGroup67], (instregex "VCVTSI2SDrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "VCVTSI2SSrr")>; +def: InstRW<[HWWriteResGroup67], (instregex "VCVTTPD2DQrr")>; -// m,i. -def : InstRW<[WriteShiftLd], (instregex "BT(R|S|C)(16|32|64)mi8")>; - -// BSF BSR. -// r,r. -def : InstRW<[WriteP1_Lat3], (instregex "BS(R|F)(16|32|64)rr")>; -// r,m. -def : InstRW<[WriteP1_Lat3Ld], (instregex "BS(R|F)(16|32|64)rm")>; - -// SETcc. -// r. -def : InstRW<[WriteShift], - (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>; -// m. -def WriteSetCCm : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { - let NumMicroOps = 3; +def HWWriteResGroup68 : SchedWriteRes<[HWPort1,HWPort6]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteSetCCm], - (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>; +def: InstRW<[HWWriteResGroup68], (instregex "IMUL64r")>; +def: InstRW<[HWWriteResGroup68], (instregex "MUL64r")>; +def: InstRW<[HWWriteResGroup68], (instregex "MULX64rr")>; -// CLD STD. -def WriteCldStd : SchedWriteRes<[HWPort15, HWPort6]> { +def HWWriteResGroup69 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { + let Latency = 4; let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteCldStd], (instregex "STD", "CLD")>; - -// LZCNT TZCNT. -// r,r. -def : InstRW<[WriteP1_Lat3], (instregex "(L|TZCNT)(16|32|64)rr")>; -// r,m. -def : InstRW<[WriteP1_Lat3Ld], (instregex "(L|TZCNT)(16|32|64)rm")>; - -// ANDN. -// r,r. -def : InstRW<[WriteP15], (instregex "ANDN(32|64)rr")>; -// r,m. -def : InstRW<[WriteP15Ld], (instregex "ANDN(32|64)rm")>; +def: InstRW<[HWWriteResGroup69], (instregex "CVTSD2SI64rm")>; +def: InstRW<[HWWriteResGroup69], (instregex "CVTSD2SIrm")>; +def: InstRW<[HWWriteResGroup69], (instregex "CVTSS2SI64rm")>; +def: InstRW<[HWWriteResGroup69], (instregex "CVTSS2SIrm")>; +def: InstRW<[HWWriteResGroup69], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[HWWriteResGroup69], (instregex "CVTTSD2SIrm")>; +def: InstRW<[HWWriteResGroup69], (instregex "CVTTSS2SIrm")>; +def: InstRW<[HWWriteResGroup69], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[HWWriteResGroup69], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[HWWriteResGroup69], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[HWWriteResGroup69], (instregex "VCVTSS2SIrm")>; +def: InstRW<[HWWriteResGroup69], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[HWWriteResGroup69], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[HWWriteResGroup69], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[HWWriteResGroup69], (instregex "VCVTTSS2SIrm")>; -// BLSI BLSMSK BLSR. -// r,r. -def : InstRW<[WriteP15], (instregex "BLS(I|MSK|R)(32|64)rr")>; -// r,m. -def : InstRW<[WriteP15Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>; - -// BEXTR. -// r,r,r. -def : InstRW<[Write2P0156_Lat2], (instregex "BEXTR(32|64)rr")>; -// r,m,r. -def : InstRW<[Write2P0156_Lat2Ld], (instregex "BEXTR(32|64)rm")>; - -// BZHI. -// r,r,r. -def : InstRW<[WriteP15], (instregex "BZHI(32|64)rr")>; -// r,m,r. -def : InstRW<[WriteP15Ld], (instregex "BZHI(32|64)rm")>; - -// PDEP PEXT. -// r,r,r. -def : InstRW<[WriteP1_Lat3], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>; -// r,m,r. -def : InstRW<[WriteP1_Lat3Ld], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; - -//-- Control transfer instructions --// - -// J(E|R)CXZ. -def WriteJCXZ : SchedWriteRes<[HWPort0156, HWPort6]> { - let NumMicroOps = 2; +def HWWriteResGroup70 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>; +def: InstRW<[HWWriteResGroup70], (instregex "VCVTPS2PDYrm")>; +def: InstRW<[HWWriteResGroup70], (instregex "VPTESTYrm")>; -// LOOP. -def WriteLOOP : SchedWriteRes<[]> { - let NumMicroOps = 7; +def HWWriteResGroup71 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteLOOP], (instregex "LOOP")>; +def: InstRW<[HWWriteResGroup71], (instregex "CVTDQ2PDrm")>; +def: InstRW<[HWWriteResGroup71], (instregex "CVTPD2DQrm")>; +def: InstRW<[HWWriteResGroup71], (instregex "CVTPD2PSrm")>; +def: InstRW<[HWWriteResGroup71], (instregex "CVTSD2SSrm")>; +def: InstRW<[HWWriteResGroup71], (instregex "CVTTPD2DQrm")>; +def: InstRW<[HWWriteResGroup71], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[HWWriteResGroup71], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[HWWriteResGroup71], (instregex "MMX_CVTTPD2PIirm")>; +def: InstRW<[HWWriteResGroup71], (instregex "VCVTDQ2PDrm")>; +def: InstRW<[HWWriteResGroup71], (instregex "VCVTSD2SSrm")>; -// LOOP(N)E -def WriteLOOPE : SchedWriteRes<[]> { - let NumMicroOps = 11; +def HWWriteResGroup72 : SchedWriteRes<[HWPort1,HWPort6,HWPort23]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteLOOPE], (instregex "LOOPE", "LOOPNE")>; +def: InstRW<[HWWriteResGroup72], (instregex "MULX64rm")>; -// CALL. -// r. -def WriteCALLr : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { +def HWWriteResGroup73 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { + let Latency = 4; let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteCALLr], (instregex "CALL(16|32)r")>; +def: InstRW<[HWWriteResGroup73], (instregex "VPBROADCASTBYrm")>; +def: InstRW<[HWWriteResGroup73], (instregex "VPBROADCASTBrm")>; +def: InstRW<[HWWriteResGroup73], (instregex "VPBROADCASTWYrm")>; +def: InstRW<[HWWriteResGroup73], (instregex "VPBROADCASTWrm")>; -// m. -def WriteCALLm : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { +def HWWriteResGroup74 : SchedWriteRes<[HWPort0156]> { + let Latency = 4; let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; + let ResourceCycles = [4]; } -def : InstRW<[WriteCALLm], (instregex "CALL(16|32)m")>; +def: InstRW<[HWWriteResGroup74], (instregex "FNCLEX")>; -// RET. -def WriteRET : SchedWriteRes<[HWPort237, HWPort6]> { - let NumMicroOps = 2; -} -def : InstRW<[WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)")>; - -// i. -def WriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> { +def HWWriteResGroup75 : SchedWriteRes<[HWPort015,HWPort0156]> { + let Latency = 4; let NumMicroOps = 4; - let ResourceCycles = [1, 2, 1]; -} -def : InstRW<[WriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>; - -// BOUND. -// r,m. -def WriteBOUND : SchedWriteRes<[]> { - let NumMicroOps = 15; + let ResourceCycles = [1,3]; } -def : InstRW<[WriteBOUND], (instregex "BOUNDS(16|32)rm")>; +def: InstRW<[HWWriteResGroup75], (instregex "VZEROUPPER")>; -// INTO. -def WriteINTO : SchedWriteRes<[]> { +def HWWriteResGroup76 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> { + let Latency = 4; let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; } -def : InstRW<[WriteINTO], (instregex "INTO")>; - -//-- String instructions --// - -// LODSB/W. -def : InstRW<[Write2P0156_P23], (instregex "LODS(B|W)")>; - -// LODSD/Q. -def : InstRW<[WriteP0156_P23], (instregex "LODS(L|Q)")>; - -// STOS. -def WriteSTOS : SchedWriteRes<[HWPort23, HWPort0156, HWPort4]> { - let NumMicroOps = 3; -} -def : InstRW<[WriteSTOS], (instregex "STOS(B|L|Q|W)")>; +def: InstRW<[HWWriteResGroup76], (instregex "LAR32rr")>; -// MOVS. -def WriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> { +def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort4,HWPort237,HWPort15]> { let Latency = 4; - let NumMicroOps = 5; - let ResourceCycles = [2, 1, 2]; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WriteMOVS], (instregex "MOVS(B|L|Q|W)")>; - -// SCAS. -def : InstRW<[Write2P0156_P23], (instregex "SCAS(B|W|L|Q)")>; +def: InstRW<[HWWriteResGroup77], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[HWWriteResGroup77], (instregex "VMASKMOVPDmr")>; +def: InstRW<[HWWriteResGroup77], (instregex "VMASKMOVPSmr")>; +def: InstRW<[HWWriteResGroup77], (instregex "VPMASKMOVDYmr")>; +def: InstRW<[HWWriteResGroup77], (instregex "VPMASKMOVDmr")>; +def: InstRW<[HWWriteResGroup77], (instregex "VPMASKMOVQYmr")>; +def: InstRW<[HWWriteResGroup77], (instregex "VPMASKMOVQmr")>; -// CMPS. -def WriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> { +def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> { let Latency = 4; - let NumMicroOps = 5; - let ResourceCycles = [2, 3]; -} -def : InstRW<[WriteCMPS], (instregex "CMPS(B|L|Q|W)")>; - -//-- Synchronization instructions --// - -// XADD. -def WriteXADD : SchedWriteRes<[]> { - let NumMicroOps = 5; -} -def : InstRW<[WriteXADD], (instregex "XADD(8|16|32|64)rm")>; - -// CMPXCHG. -def WriteCMPXCHG : SchedWriteRes<[]> { - let NumMicroOps = 6; -} -def : InstRW<[WriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>; - -// CMPXCHG8B. -def WriteCMPXCHG8B : SchedWriteRes<[]> { - let NumMicroOps = 15; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WriteCMPXCHG8B], (instregex "CMPXCHG8B")>; +def: InstRW<[HWWriteResGroup78], (instregex "VCVTPS2PHmr")>; -// CMPXCHG16B. -def WriteCMPXCHG16B : SchedWriteRes<[]> { - let NumMicroOps = 22; +def HWWriteResGroup79 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WriteCMPXCHG16B], (instregex "CMPXCHG16B")>; - -//-- Other --// +def: InstRW<[HWWriteResGroup79], (instregex "SHLD64mri8")>; +def: InstRW<[HWWriteResGroup79], (instregex "SHRD64mri8")>; -// PAUSE. -def WritePAUSE : SchedWriteRes<[HWPort05, HWPort6]> { +def HWWriteResGroup80 : SchedWriteRes<[HWPort1,HWPort6,HWPort23,HWPort0156]> { + let Latency = 4; let NumMicroOps = 5; - let ResourceCycles = [1, 3]; -} -def : InstRW<[WritePAUSE], (instregex "PAUSE")>; - -// LEAVE. -def : InstRW<[Write2P0156_P23], (instregex "LEAVE")>; - -// XGETBV. -def WriteXGETBV : SchedWriteRes<[]> { - let NumMicroOps = 8; -} -def : InstRW<[WriteXGETBV], (instregex "XGETBV")>; - -// RDTSC. -def WriteRDTSC : SchedWriteRes<[]> { - let NumMicroOps = 15; -} -def : InstRW<[WriteRDTSC], (instregex "RDTSC")>; - -// RDPMC. -def WriteRDPMC : SchedWriteRes<[]> { - let NumMicroOps = 34; + let ResourceCycles = [1,2,1,1]; } -def : InstRW<[WriteRDPMC], (instregex "RDPMC")>; - -// RDRAND. -def WriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> { - let NumMicroOps = 17; - let ResourceCycles = [1, 16]; -} -def : InstRW<[WriteRDRAND], (instregex "RDRAND(16|32|64)r")>; - -//=== Floating Point x87 Instructions ===// -//-- Move instructions --// - -// FLD. -// m80. -def : InstRW<[WriteP01], (instregex "LD_Frr")>; +def: InstRW<[HWWriteResGroup80], (instregex "LAR32rm")>; +def: InstRW<[HWWriteResGroup80], (instregex "LSL32rm")>; -def WriteLD_F80m : SchedWriteRes<[HWPort01, HWPort23]> { +def HWWriteResGroup81 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> { let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [2, 2]; -} -def : InstRW<[WriteLD_F80m], (instregex "LD_F80m")>; - -// FBLD. -// m80. -def WriteFBLD : SchedWriteRes<[]> { - let Latency = 47; - let NumMicroOps = 43; + let NumMicroOps = 6; + let ResourceCycles = [1,1,4]; } -def : InstRW<[WriteFBLD], (instregex "FBLDm")>; +def: InstRW<[HWWriteResGroup81], (instregex "PUSHF16")>; +def: InstRW<[HWWriteResGroup81], (instregex "PUSHF64")>; -// FST(P). -// r. -def : InstRW<[WriteP01], (instregex "ST_(F|FP)rr")>; - -// m80. -def WriteST_FP80m : SchedWriteRes<[HWPort0156, HWPort23, HWPort4]> { - let NumMicroOps = 7; - let ResourceCycles = [3, 2, 2]; +def HWWriteResGroup82 : SchedWriteRes<[HWPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WriteST_FP80m], (instregex "ST_FP80m")>; +def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMADDWDirr")>; +def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULHUWirr")>; +def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULHWirr")>; +def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULLWirr")>; +def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[HWWriteResGroup82], (instregex "MMX_PSADBWirr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PCMPGTQrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[HWWriteResGroup82], (instregex "PMADDUBSWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PMADDWDrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PMULDQrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PMULHRSWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PMULHUWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PMULHWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PMULLWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PMULUDQrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "PSADBWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "RCPPSr")>; +def: InstRW<[HWWriteResGroup82], (instregex "RCPSSr")>; +def: InstRW<[HWWriteResGroup82], (instregex "RSQRTPSr")>; +def: InstRW<[HWWriteResGroup82], (instregex "RSQRTSSr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPCMPGTQYrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPCMPGTQrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMADDUBSWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMADDUBSWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMADDWDYrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMADDWDrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULDQYrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULDQrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULHRSWYrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULHRSWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULHUWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULHWYrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULHWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULLWYrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULLWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPMULUDQrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPSADBWYrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VPSADBWrr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VRSQRTPSr")>; +def: InstRW<[HWWriteResGroup82], (instregex "VRSQRTSSr")>; -// FBSTP. -// m80. -def WriteFBSTP : SchedWriteRes<[]> { - let NumMicroOps = 226; +def HWWriteResGroup83 : SchedWriteRes<[HWPort01]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WriteFBSTP], (instregex "FBSTPm")>; - -// FXCHG. -def : InstRW<[WriteNop], (instregex "XCH_F")>; +def: InstRW<[HWWriteResGroup83], (instregex "MULPDrr")>; +def: InstRW<[HWWriteResGroup83], (instregex "MULPSrr")>; +def: InstRW<[HWWriteResGroup83], (instregex "MULSDrr")>; +def: InstRW<[HWWriteResGroup83], (instregex "MULSSrr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB132PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB132PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB132PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB132PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB213PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB213PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB213PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB213PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB231PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB231PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB231PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB231PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD132PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD132PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD132PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD132PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD213PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD213PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD213PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD213PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD231PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD231PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD231PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD231PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231PDYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231PDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231PSYr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231PSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231SDr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231SSr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VMULPDYrr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VMULPDrr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VMULPSYrr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VMULPSrr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VMULSDrr")>; +def: InstRW<[HWWriteResGroup83], (instregex "VMULSSrr")>; -// FILD. -def WriteFILD : SchedWriteRes<[HWPort01, HWPort23]> { - let Latency = 6; +def HWWriteResGroup84 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 5; let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteFILD], (instregex "ILD_F(16|32|64)m")>; - -// FIST(P) FISTTP. -def WriteFIST : SchedWriteRes<[HWPort1, HWPort23, HWPort4]> { - let Latency = 7; - let NumMicroOps = 3; -} -def : InstRW<[WriteFIST], (instregex "IST_(F|FP)(16|32)m")>; - -// FLDZ. -def : InstRW<[WriteP01], (instregex "LD_F0")>; - -// FLD1. -def : InstRW<[Write2P01], (instregex "LD_F1")>; - -// FLDPI FLDL2E etc. -def : InstRW<[Write2P01], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>; - -// FCMOVcc. -def WriteFCMOVcc : SchedWriteRes<[HWPort0, HWPort5]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [2, 1]; -} -def : InstRW<[WriteFCMOVcc], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>; +def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMADDWDirm")>; +def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULHUWirm")>; +def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULHWirm")>; +def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULLWirm")>; +def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[HWWriteResGroup84], (instregex "MMX_PSADBWirm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PCMPGTQrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[HWWriteResGroup84], (instregex "PMADDUBSWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PMADDWDrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PMULDQrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PMULHRSWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PMULHUWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PMULHWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PMULLWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PMULUDQrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "PSADBWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "RCPPSm")>; +def: InstRW<[HWWriteResGroup84], (instregex "RCPSSm")>; +def: InstRW<[HWWriteResGroup84], (instregex "RSQRTPSm")>; +def: InstRW<[HWWriteResGroup84], (instregex "RSQRTSSm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPCMPGTQYrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPCMPGTQrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMADDUBSWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMADDUBSWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMADDWDYrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMADDWDrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULDQYrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULDQrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULHRSWYrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULHRSWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULHUWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULHUWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULHWYrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULHWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULLWYrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULLWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULUDQrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPMULUDQrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPSADBWYrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VPSADBWrm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VRCPPSm(_Int)?")>; +def: InstRW<[HWWriteResGroup84], (instregex "VRCPSSm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VRSQRTPSm")>; +def: InstRW<[HWWriteResGroup84], (instregex "VRSQRTSSm")>; -// FNSTSW. -// AX. -def WriteFNSTSW : SchedWriteRes<[HWPort0, HWPort0156]> { +def HWWriteResGroup85 : SchedWriteRes<[HWPort01,HWPort23]> { + let Latency = 5; let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteFNSTSW], (instregex "FNSTSW16r")>; +def: InstRW<[HWWriteResGroup85], (instregex "MULPDrm")>; +def: InstRW<[HWWriteResGroup85], (instregex "MULPSrm")>; +def: InstRW<[HWWriteResGroup85], (instregex "MULSDrm")>; +def: InstRW<[HWWriteResGroup85], (instregex "MULSSrm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB132PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB132PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB132PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB132PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB213PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB213PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB213PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB213PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB231PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB231PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB231PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB231PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD132PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD132PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD132PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD132PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD213PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD213PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD213PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD213PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD231PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD231PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD231PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD231PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231PDYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231PDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231PSYm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231PSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231SDm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231SSm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VMULPDYrm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VMULPDrm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VMULPSYrm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VMULPSrm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VMULSDrm")>; +def: InstRW<[HWWriteResGroup85], (instregex "VMULSSrm")>; -// m16. -def WriteFNSTSWm : SchedWriteRes<[HWPort0, HWPort4, HWPort237]> { - let Latency = 6; +def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort5]> { + let Latency = 5; let NumMicroOps = 3; + let ResourceCycles = [1,2]; } -def : InstRW<[WriteFNSTSWm], (instregex "FNSTSWm")>; +def: InstRW<[HWWriteResGroup86], (instregex "CVTSI2SS64rr")>; +def: InstRW<[HWWriteResGroup86], (instregex "HADDPDrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "HADDPSrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "HSUBPDrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "HSUBPSrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[HWWriteResGroup86], (instregex "VHADDPDrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "VHADDPSYrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "VHADDPSrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "VHSUBPDYrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "VHSUBPDrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "VHSUBPSYrr")>; +def: InstRW<[HWWriteResGroup86], (instregex "VHSUBPSrr")>; -// FLDCW. -def WriteFLDCW : SchedWriteRes<[HWPort01, HWPort23, HWPort6]> { - let Latency = 7; +def HWWriteResGroup87 : SchedWriteRes<[HWPort1,HWPort6,HWPort0]> { + let Latency = 5; let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteFLDCW], (instregex "FLDCW16m")>; +def: InstRW<[HWWriteResGroup87], (instregex "STR32r")>; -// FNSTCW. -def WriteFNSTCW : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { +def HWWriteResGroup88 : SchedWriteRes<[HWPort1,HWPort0,HWPort0156]> { + let Latency = 5; let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteFNSTCW], (instregex "FNSTCW16m")>; - -// FINCSTP FDECSTP. -def : InstRW<[WriteP01], (instregex "FINCSTP", "FDECSTP")>; - -// FFREE. -def : InstRW<[WriteP01], (instregex "FFREE")>; - -// FNSAVE. -def WriteFNSAVE : SchedWriteRes<[]> { - let NumMicroOps = 147; -} -def : InstRW<[WriteFNSAVE], (instregex "FSAVEm")>; - -// FRSTOR. -def WriteFRSTOR : SchedWriteRes<[]> { - let NumMicroOps = 90; -} -def : InstRW<[WriteFRSTOR], (instregex "FRSTORm")>; - -//-- Arithmetic instructions --// - -// FABS. -def : InstRW<[WriteP0], (instregex "ABS_F")>; - -// FCHS. -def : InstRW<[WriteP0], (instregex "CHS_F")>; - -// FCOM(P) FUCOM(P). -// r. -def : InstRW<[WriteP1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr", - "UCOM_FPr")>; -// m. -def : InstRW<[WriteP1_P23], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>; - -// FCOMPP FUCOMPP. -// r. -def : InstRW<[Write2P01], (instregex "FCOMPP", "UCOM_FPPr")>; - -// FCOMI(P) FUCOMI(P). -// m. -def : InstRW<[Write3P01], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr", - "UCOM_FIPr")>; - -// FICOM(P). -def : InstRW<[Write2P1_P23], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>; - -// FTST. -def : InstRW<[WriteP1], (instregex "TST_F")>; - -// FXAM. -def : InstRW<[Write2P1], (instregex "FXAM")>; +def: InstRW<[HWWriteResGroup88], (instregex "MULX32rr")>; -// FPREM. -def WriteFPREM : SchedWriteRes<[]> { - let Latency = 19; - let NumMicroOps = 28; -} -def : InstRW<[WriteFPREM], (instregex "FPREM")>; - -// FPREM1. -def WriteFPREM1 : SchedWriteRes<[]> { - let Latency = 27; - let NumMicroOps = 41; -} -def : InstRW<[WriteFPREM1], (instregex "FPREM1")>; - -// FRNDINT. -def WriteFRNDINT : SchedWriteRes<[]> { - let Latency = 11; - let NumMicroOps = 17; -} -def : InstRW<[WriteFRNDINT], (instregex "FRNDINT")>; - -//-- Math instructions --// - -// FSCALE. -def WriteFSCALE : SchedWriteRes<[]> { - let Latency = 75; // 49-125 - let NumMicroOps = 50; // 25-75 -} -def : InstRW<[WriteFSCALE], (instregex "FSCALE")>; - -// FXTRACT. -def WriteFXTRACT : SchedWriteRes<[]> { - let Latency = 15; - let NumMicroOps = 17; +def HWWriteResGroup89 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; } -def : InstRW<[WriteFXTRACT], (instregex "FXTRACT")>; - -//-- Other instructions --// +def: InstRW<[HWWriteResGroup89], (instregex "HADDPDrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "HADDPSrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "HSUBPDrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "HSUBPSrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "VHADDPDrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "VHADDPDrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "VHADDPSYrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "VHADDPSrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "VHSUBPDYrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "VHSUBPDrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "VHSUBPSYrm")>; +def: InstRW<[HWWriteResGroup89], (instregex "VHSUBPSrm")>; -// FNOP. -def : InstRW<[WriteP01], (instregex "FNOP")>; - -// WAIT. -def : InstRW<[Write2P01], (instregex "WAIT")>; - -// FNCLEX. -def : InstRW<[Write5P0156], (instregex "FNCLEX")>; - -// FNINIT. -def WriteFNINIT : SchedWriteRes<[]> { - let NumMicroOps = 26; +def HWWriteResGroup90 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WriteFNINIT], (instregex "FNINIT")>; - -//=== Integer MMX and XMM Instructions ===// -//-- Move instructions --// - -// MOVD. -// r32/64 <- (x)mm. -def : InstRW<[WriteP0], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr", - "VMOVPDI2DIrr", "MOVPDI2DIrr")>; - -// (x)mm <- r32/64. -def : InstRW<[WriteP5], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr", - "VMOVDI2PDIrr", "MOVDI2PDIrr")>; +def: InstRW<[HWWriteResGroup90], (instregex "CVTTSS2SI64rm")>; -// MOVQ. -// r64 <- (x)mm. -def : InstRW<[WriteP0], (instregex "VMOVPQIto64rr")>; - -// (x)mm <- r64. -def : InstRW<[WriteP5], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>; - -// (x)mm <- (x)mm. -def : InstRW<[WriteP015], (instregex "MMX_MOVQ64rr")>; - -// (V)MOVDQA/U. -// x <- x. -def : InstRW<[WriteP015], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr", - "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV", - "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>; - -// MOVDQ2Q. -def : InstRW<[WriteP01_P5], (instregex "MMX_MOVDQ2Qrr")>; - -// MOVQ2DQ. -def : InstRW<[WriteP015], (instregex "MMX_MOVQ2DQrr")>; - - -// PACKSSWB/DW. -// mm <- mm. -def WriteMMXPACKSSrr : SchedWriteRes<[HWPort5]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [3]; +def HWWriteResGroup91 : SchedWriteRes<[HWPort1,HWPort23,HWPort0,HWPort0156]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WriteMMXPACKSSrr], (instregex "MMX_PACKSSDWirr", - "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>; +def: InstRW<[HWWriteResGroup91], (instregex "MULX32rm")>; -// mm <- m64. -def WriteMMXPACKSSrm : SchedWriteRes<[HWPort23, HWPort5]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1, 3]; +def HWWriteResGroup92 : SchedWriteRes<[HWPort6,HWPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,4]; } -def : InstRW<[WriteMMXPACKSSrm], (instregex "MMX_PACKSSDWirm", - "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>; +def: InstRW<[HWWriteResGroup92], (instregex "PAUSE")>; -// VPMOVSX/ZX BW BD BQ DW DQ. -// y <- x. -def WriteVPMOVSX : SchedWriteRes<[HWPort5]> { - let Latency = 3; - let NumMicroOps = 1; +def HWWriteResGroup93 : SchedWriteRes<[HWPort0,HWPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,4]; } -def : InstRW<[WriteVPMOVSX], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>; +def: InstRW<[HWWriteResGroup93], (instregex "XSETBV")>; -// PBLENDW. -// x,x,i / v,v,v,i -def WritePBLENDWr : SchedWriteRes<[HWPort5]>; -def : InstRW<[WritePBLENDWr], (instregex "(V?)PBLENDW(Y?)rri")>; - -// x,m,i / v,v,m,i -def WritePBLENDWm : SchedWriteRes<[HWPort5, HWPort23]> { - let NumMicroOps = 2; - let Latency = 4; - let ResourceCycles = [1, 1]; +def HWWriteResGroup94 : SchedWriteRes<[HWPort0,HWPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [2,3]; } -def : InstRW<[WritePBLENDWm, ReadAfterLd], (instregex "(V?)PBLENDW(Y?)rmi")>; +def: InstRW<[HWWriteResGroup94], (instregex "CMPXCHG32rr")>; +def: InstRW<[HWWriteResGroup94], (instregex "CMPXCHG8rr")>; +def: InstRW<[HWWriteResGroup94], (instregex "ROUNDPDr")>; +def: InstRW<[HWWriteResGroup94], (instregex "ROUNDPSr")>; +def: InstRW<[HWWriteResGroup94], (instregex "ROUNDSDr")>; +def: InstRW<[HWWriteResGroup94], (instregex "ROUNDSSr")>; +def: InstRW<[HWWriteResGroup94], (instregex "VBROADCASTF128")>; +def: InstRW<[HWWriteResGroup94], (instregex "VPBROADCASTMB2QZrr")>; +def: InstRW<[HWWriteResGroup94], (instregex "VROUNDPDr")>; +def: InstRW<[HWWriteResGroup94], (instregex "VROUNDPSr")>; +def: InstRW<[HWWriteResGroup94], (instregex "VROUNDSDr")>; -// VPBLENDD. -// v,v,v,i. -def WriteVPBLENDDr : SchedWriteRes<[HWPort015]>; -def : InstRW<[WriteVPBLENDDr], (instregex "VPBLENDD(Y?)rri")>; - -// v,v,m,i -def WriteVPBLENDDm : SchedWriteRes<[HWPort015, HWPort23]> { +def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort5]> { + let Latency = 6; let NumMicroOps = 2; - let Latency = 4; - let ResourceCycles = [1, 1]; -} -def : InstRW<[WriteVPBLENDDm, ReadAfterLd], (instregex "VPBLENDD(Y?)rmi")>; - -// MASKMOVQ. -def WriteMASKMOVQ : SchedWriteRes<[HWPort0, HWPort4, HWPort23]> { - let Latency = 13; - let NumMicroOps = 4; - let ResourceCycles = [1, 1, 2]; -} -def : InstRW<[WriteMASKMOVQ], (instregex "MMX_MASKMOVQ(64)?")>; - -// MASKMOVDQU. -def WriteMASKMOVDQU : SchedWriteRes<[HWPort04, HWPort56, HWPort23]> { - let Latency = 14; - let NumMicroOps = 10; - let ResourceCycles = [4, 2, 4]; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteMASKMOVDQU], (instregex "(V?)MASKMOVDQU(64)?")>; +def: InstRW<[HWWriteResGroup95], (instregex "VCVTDQ2PDYrr")>; +def: InstRW<[HWWriteResGroup95], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[HWWriteResGroup95], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[HWWriteResGroup95], (instregex "VCVTPS2PHYrr")>; +def: InstRW<[HWWriteResGroup95], (instregex "VCVTTPD2DQYrr")>; +def: InstRW<[HWWriteResGroup95], (instregex "ROUNDPDm")>; +def: InstRW<[HWWriteResGroup95], (instregex "ROUNDPSm")>; +def: InstRW<[HWWriteResGroup95], (instregex "ROUNDSDm")>; +def: InstRW<[HWWriteResGroup95], (instregex "ROUNDSSm")>; +def: InstRW<[HWWriteResGroup95], (instregex "VROUNDPDm")>; +def: InstRW<[HWWriteResGroup95], (instregex "VROUNDPDm")>; +def: InstRW<[HWWriteResGroup95], (instregex "VROUNDPSm")>; +def: InstRW<[HWWriteResGroup95], (instregex "VROUNDPSm")>; +def: InstRW<[HWWriteResGroup95], (instregex "VROUNDSDm")>; +def: InstRW<[HWWriteResGroup95], (instregex "VROUNDSSm")>; -// VPMASKMOV D/Q. -// v,v,m. -def WriteVPMASKMOVr : SchedWriteRes<[HWPort5, HWPort23]> { - let Latency = 4; +def HWWriteResGroup96 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { + let Latency = 6; let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [1,1,1]; } -def : InstRW<[WriteVPMASKMOVr, ReadAfterLd], - (instregex "VPMASKMOV(D|Q)(Y?)rm")>; +def: InstRW<[HWWriteResGroup96], (instregex "VCVTDQ2PDYrm")>; -// m, v,v. -def WriteVPMASKMOVm : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { - let Latency = 13; +def HWWriteResGroup97 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> { + let Latency = 6; let NumMicroOps = 4; - let ResourceCycles = [1, 1, 1, 1]; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WriteVPMASKMOVm], (instregex "VPMASKMOV(D|Q)(Y?)mr")>; +def: InstRW<[HWWriteResGroup97], (instregex "VCVTPS2PHYmr")>; -// PMOVMSKB. -def WritePMOVMSKB : SchedWriteRes<[HWPort0]> { - let Latency = 3; +def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort6,HWPort0,HWPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def : InstRW<[WritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKB(Y?)rr")>; +def: InstRW<[HWWriteResGroup98], (instregex "SLDT32r")>; -// PEXTR B/W/D/Q. -// r32,x,i. -def WritePEXTRr : SchedWriteRes<[HWPort0, HWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; +def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> { + let Latency = 6; + let NumMicroOps = 6; + let ResourceCycles = [1,5]; } -def : InstRW<[WritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>; +def: InstRW<[HWWriteResGroup99], (instregex "STD")>; -// m8,x,i. -def WritePEXTRm : SchedWriteRes<[HWPort23, HWPort4, HWPort5]> { - let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; +def HWWriteResGroup100 : SchedWriteRes<[HWPort5]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>; +def: InstRW<[HWWriteResGroup100], (instregex "AESDECLASTrr")>; +def: InstRW<[HWWriteResGroup100], (instregex "AESDECrr")>; +def: InstRW<[HWWriteResGroup100], (instregex "AESENCLASTrr")>; +def: InstRW<[HWWriteResGroup100], (instregex "AESENCrr")>; +def: InstRW<[HWWriteResGroup100], (instregex "KANDQrr")>; +def: InstRW<[HWWriteResGroup100], (instregex "VAESDECLASTrr")>; +def: InstRW<[HWWriteResGroup100], (instregex "VAESDECrr")>; +def: InstRW<[HWWriteResGroup100], (instregex "VAESENCrr")>; -// VPBROADCAST B/W. -// x, m8/16. -def WriteVPBROADCAST128Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; +def HWWriteResGroup101 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteVPBROADCAST128Ld, ReadAfterLd], - (instregex "VPBROADCAST(B|W)rm")>; +def: InstRW<[HWWriteResGroup101], (instregex "AESDECLASTrm")>; +def: InstRW<[HWWriteResGroup101], (instregex "AESDECrm")>; +def: InstRW<[HWWriteResGroup101], (instregex "AESENCLASTrm")>; +def: InstRW<[HWWriteResGroup101], (instregex "AESENCrm")>; +def: InstRW<[HWWriteResGroup101], (instregex "VAESDECLASTrm")>; +def: InstRW<[HWWriteResGroup101], (instregex "VAESDECrm")>; +def: InstRW<[HWWriteResGroup101], (instregex "VAESENCLASTrm")>; +def: InstRW<[HWWriteResGroup101], (instregex "VAESENCrm")>; -// y, m8/16 -def WriteVPBROADCAST256Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> { +def HWWriteResGroup102 : SchedWriteRes<[HWPort0,HWPort5]> { let Latency = 7; let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; -} -def : InstRW<[WriteVPBROADCAST256Ld, ReadAfterLd], - (instregex "VPBROADCAST(B|W)Yrm")>; - -// VPGATHERDD. -// x. -def WriteVPGATHERDD128 : SchedWriteRes<[]> { - let NumMicroOps = 20; -} -def : InstRW<[WriteVPGATHERDD128, ReadAfterLd], (instregex "VPGATHERDDrm")>; - -// y. -def WriteVPGATHERDD256 : SchedWriteRes<[]> { - let NumMicroOps = 34; -} -def : InstRW<[WriteVPGATHERDD256, ReadAfterLd], (instregex "VPGATHERDDYrm")>; - -// VPGATHERQD. -// x. -def WriteVPGATHERQD128 : SchedWriteRes<[]> { - let NumMicroOps = 15; -} -def : InstRW<[WriteVPGATHERQD128, ReadAfterLd], (instregex "VPGATHERQDrm")>; - -// y. -def WriteVPGATHERQD256 : SchedWriteRes<[]> { - let NumMicroOps = 22; -} -def : InstRW<[WriteVPGATHERQD256, ReadAfterLd], (instregex "VPGATHERQDYrm")>; - -// VPGATHERDQ. -// x. -def WriteVPGATHERDQ128 : SchedWriteRes<[]> { - let NumMicroOps = 12; + let ResourceCycles = [1,2]; } -def : InstRW<[WriteVPGATHERDQ128, ReadAfterLd], (instregex "VPGATHERDQrm")>; +def: InstRW<[HWWriteResGroup102], (instregex "MPSADBWrri")>; +def: InstRW<[HWWriteResGroup102], (instregex "VMPSADBWYrri")>; +def: InstRW<[HWWriteResGroup102], (instregex "VMPSADBWrri")>; -// y. -def WriteVPGATHERDQ256 : SchedWriteRes<[]> { - let NumMicroOps = 20; -} -def : InstRW<[WriteVPGATHERDQ256, ReadAfterLd], (instregex "VPGATHERDQYrm")>; - -// VPGATHERQQ. -// x. -def WriteVPGATHERQQ128 : SchedWriteRes<[]> { - let NumMicroOps = 14; -} -def : InstRW<[WriteVPGATHERQQ128, ReadAfterLd], (instregex "VPGATHERQQrm")>; - -// y. -def WriteVPGATHERQQ256 : SchedWriteRes<[]> { - let NumMicroOps = 22; +def HWWriteResGroup103 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; } -def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>; +def: InstRW<[HWWriteResGroup103], (instregex "MPSADBWrmi")>; +def: InstRW<[HWWriteResGroup103], (instregex "VMPSADBWYrmi")>; +def: InstRW<[HWWriteResGroup103], (instregex "VMPSADBWrmi")>; -//-- Arithmetic instructions --// - -//////////////////////////////////////////////////////////////////////////////// -// Horizontal add/sub instructions. -//////////////////////////////////////////////////////////////////////////////// - -// HADD, HSUB PS/PD -// x,x / v,v,v. -def : WriteRes { - let Latency = 5; +def HWWriteResGroup104 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> { + let Latency = 9; let NumMicroOps = 3; - let ResourceCycles = [1, 2]; + let ResourceCycles = [1,1,1]; } +def: InstRW<[HWWriteResGroup104], (instregex "DPPDrri")>; +def: InstRW<[HWWriteResGroup104], (instregex "VDPPDrri")>; -// x,m / v,v,m. -def : WriteRes { +def HWWriteResGroup105 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { let Latency = 9; let NumMicroOps = 4; - let ResourceCycles = [1, 2, 1]; + let ResourceCycles = [1,1,1,1]; } +def: InstRW<[HWWriteResGroup105], (instregex "DPPDrmi")>; +def: InstRW<[HWWriteResGroup105], (instregex "VDPPDrmi")>; -// PHADD|PHSUB (S) W/D. -// v <- v,v. -def : WriteRes { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1, 2]; -} -// v <- v,m. -def : WriteRes { - let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [1, 2, 1]; -} - -// PHADD|PHSUB (S) W/D. -// v <- v,v. -def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1, 2]; -} -def : InstRW<[WritePHADDSUBr], (instregex "MMX_PHADD(W?)rr64", - "MMX_PHADDSWrr64", - "MMX_PHSUB(W|D)rr64", - "MMX_PHSUBSWrr64", - "(V?)PH(ADD|SUB)(W|D)(Y?)rr", - "(V?)PH(ADD|SUB)SWrr(256)?")>; - -// v <- v,m. -def WritePHADDSUBm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { - let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [1, 2, 1]; -} -def : InstRW<[WritePHADDSUBm, ReadAfterLd], - (instregex "MMX_PHADD(W?)rm64", - "MMX_PHADDSWrm64", - "MMX_PHSUB(W|D)rm64", - "MMX_PHSUBSWrm64", - "(V?)PH(ADD|SUB)(W|D)(Y?)rm", - "(V?)PH(ADD|SUB)SWrm(128|256)?")>; - -// PCMPGTQ. -// v <- v,v. -def WritePCMPGTQr : SchedWriteRes<[HWPort0]> { - let Latency = 5; - let NumMicroOps = 1; -} -def : InstRW<[WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>; - -// v <- v,m. -def WritePCMPGTQm : SchedWriteRes<[HWPort0, HWPort23]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; -} -def : InstRW<[WritePCMPGTQm, ReadAfterLd], (instregex "(V?)PCMPGTQ(Y?)rm")>; - -// PMULLD. -// x,x / y,y,y. -def WritePMULLDr : SchedWriteRes<[HWPort0]> { +def HWWriteResGroup106 : SchedWriteRes<[HWPort0]> { let Latency = 10; let NumMicroOps = 2; let ResourceCycles = [2]; } -def : InstRW<[WritePMULLDr], (instregex "(V?)PMULLD(Y?)rr")>; +def: InstRW<[HWWriteResGroup106], (instregex "PMULLDrr")>; +def: InstRW<[HWWriteResGroup106], (instregex "VPMULLDYrr")>; +def: InstRW<[HWWriteResGroup106], (instregex "VPMULLDrr")>; -// x,m / y,y,m. -def WritePMULLDm : SchedWriteRes<[HWPort0, HWPort23]> { +def HWWriteResGroup107 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 10; let NumMicroOps = 3; - let ResourceCycles = [2, 1]; -} -def : InstRW<[WritePMULLDm, ReadAfterLd], (instregex "(V?)PMULLD(Y?)rm")>; - -//-- Logic instructions --// - -// PTEST. -// v,v. -def WritePTESTr : SchedWriteRes<[HWPort0, HWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; -} -def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rr")>; - -// v,m. -def WritePTESTm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> { - let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; -} -def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rm")>; - -// PSLL,PSRL,PSRA W/D/Q. -// x,x / v,v,x. -def WritePShift : SchedWriteRes<[HWPort0, HWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; -} -def : InstRW<[WritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)(Y?)rr")>; - -// PSLL,PSRL DQ. -def : InstRW<[WriteP5], (instregex "(V?)PS(R|L)LDQ(Y?)ri")>; - -//-- Other --// - -// EMMS. -def WriteEMMS : SchedWriteRes<[]> { - let Latency = 13; - let NumMicroOps = 31; -} -def : InstRW<[WriteEMMS], (instregex "MMX_EMMS")>; - -//=== Floating Point XMM and YMM Instructions ===// -//-- Move instructions --// - -// MOVMSKP S/D. -// r32 <- x. -def WriteMOVMSKPr : SchedWriteRes<[HWPort0]> { - let Latency = 3; + let ResourceCycles = [2,1]; } -def : InstRW<[WriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)rr")>; +def: InstRW<[HWWriteResGroup107], (instregex "PMULLDrm")>; +def: InstRW<[HWWriteResGroup107], (instregex "VPMULLDYrm")>; +def: InstRW<[HWWriteResGroup107], (instregex "VPMULLDrm")>; -// r32 <- y. -def WriteVMOVMSKPYr : SchedWriteRes<[HWPort0]> { - let Latency = 2; +def HWWriteResGroup108 : SchedWriteRes<[HWPort0]> { + let Latency = 11; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WriteVMOVMSKPYr], (instregex "VMOVMSKP(S|D)Yrr")>; - -// VPERM2F128. -def : InstRW<[WriteFShuffle256], (instregex "VPERM2F128rr")>; -def : InstRW<[WriteFShuffle256Ld, ReadAfterLd], (instregex "VPERM2F128rm")>; +def: InstRW<[HWWriteResGroup108], (instregex "DIVPSrr")>; +def: InstRW<[HWWriteResGroup108], (instregex "DIVSSrr")>; -// BLENDVP S/D. -def : InstRW<[WriteFVarBlend], (instregex "BLENDVP(S|D)rr0")>; -def : InstRW<[WriteFVarBlendLd, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>; - -// VBROADCASTF128. -def : InstRW<[WriteLoad], (instregex "VBROADCASTF128")>; - -// EXTRACTPS. -// r32,x,i. -def WriteEXTRACTPSr : SchedWriteRes<[HWPort0, HWPort5]> { +def HWWriteResGroup109 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 11; let NumMicroOps = 2; - let ResourceCycles = [1, 1]; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>; +def: InstRW<[HWWriteResGroup109], (instregex "DIVPSrm")>; +def: InstRW<[HWWriteResGroup109], (instregex "DIVSSrm")>; -// m32,x,i. -def WriteEXTRACTPSm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> { - let Latency = 4; +def HWWriteResGroup110 : SchedWriteRes<[HWPort0]> { + let Latency = 11; let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; -} -def : InstRW<[WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>; - -// VEXTRACTF128. -// x,y,i. -def : InstRW<[WriteFShuffle256], (instregex "VEXTRACTF128rr")>; - -// m128,y,i. -def WriteVEXTRACTF128m : SchedWriteRes<[HWPort23, HWPort4]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; + let ResourceCycles = [3]; } -def : InstRW<[WriteVEXTRACTF128m], (instregex "VEXTRACTF128mr")>; - -// VINSERTF128. -// y,y,x,i. -def : InstRW<[WriteFShuffle256], (instregex "VINSERTF128rr")>; +def: InstRW<[HWWriteResGroup110], (instregex "PCMPISTRIrr")>; +def: InstRW<[HWWriteResGroup110], (instregex "PCMPISTRM128rr")>; +def: InstRW<[HWWriteResGroup110], (instregex "VPCMPISTRIrr")>; +def: InstRW<[HWWriteResGroup110], (instregex "VPCMPISTRM128rr")>; -// y,y,m128,i. -def WriteVINSERTF128m : SchedWriteRes<[HWPort015, HWPort23]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; +def HWWriteResGroup111 : SchedWriteRes<[HWPort0,HWPort5]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } -def : InstRW<[WriteFShuffle256, ReadAfterLd], (instregex "VINSERTF128rm")>; +def: InstRW<[HWWriteResGroup111], (instregex "PCLMULQDQrr")>; +def: InstRW<[HWWriteResGroup111], (instregex "VPCLMULQDQrr")>; -// VMASKMOVP S/D. -// v,v,m. -def WriteVMASKMOVPrm : SchedWriteRes<[HWPort5, HWPort23]> { - let Latency = 4; +def HWWriteResGroup112 : SchedWriteRes<[HWPort0,HWPort015]> { + let Latency = 11; let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [2,1]; } -def : InstRW<[WriteVMASKMOVPrm], (instregex "VMASKMOVP(S|D)(Y?)rm")>; +def: InstRW<[HWWriteResGroup112], (instregex "VRCPPSYr(_Int)?")>; +def: InstRW<[HWWriteResGroup112], (instregex "VRSQRTPSYr")>; -// m128,x,x. -def WriteVMASKMOVPmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { - let Latency = 13; +def HWWriteResGroup113 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 11; let NumMicroOps = 4; - let ResourceCycles = [1, 1, 1, 1]; + let ResourceCycles = [3,1]; } -def : InstRW<[WriteVMASKMOVPmr], (instregex "VMASKMOVP(S|D)mr")>; +def: InstRW<[HWWriteResGroup113], (instregex "PCMPISTRIrm")>; +def: InstRW<[HWWriteResGroup113], (instregex "PCMPISTRM128rm")>; +def: InstRW<[HWWriteResGroup113], (instregex "VPCMPISTRIrm")>; +def: InstRW<[HWWriteResGroup113], (instregex "VPCMPISTRM128rm")>; -// m256,y,y. -def WriteVMASKMOVPYmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { - let Latency = 14; +def HWWriteResGroup114 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { + let Latency = 11; let NumMicroOps = 4; - let ResourceCycles = [1, 1, 1, 1]; -} -def : InstRW<[WriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>; - -// VGATHERDPS. -// x. -def WriteVGATHERDPS128 : SchedWriteRes<[]> { - let NumMicroOps = 20; -} -def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>; - -// y. -def WriteVGATHERDPS256 : SchedWriteRes<[]> { - let NumMicroOps = 34; -} -def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>; - -// VGATHERQPS. -// x. -def WriteVGATHERQPS128 : SchedWriteRes<[]> { - let NumMicroOps = 15; -} -def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>; - -// y. -def WriteVGATHERQPS256 : SchedWriteRes<[]> { - let NumMicroOps = 22; + let ResourceCycles = [2,1,1]; } -def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>; +def: InstRW<[HWWriteResGroup114], (instregex "PCLMULQDQrm")>; +def: InstRW<[HWWriteResGroup114], (instregex "VPCLMULQDQrm")>; +def: InstRW<[HWWriteResGroup114], (instregex "VRCPPSYm(_Int)?")>; -// VGATHERDPD. -// x. -def WriteVGATHERDPD128 : SchedWriteRes<[]> { - let NumMicroOps = 12; -} -def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>; - -// y. -def WriteVGATHERDPD256 : SchedWriteRes<[]> { - let NumMicroOps = 20; +def HWWriteResGroup115 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; } -def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>; +def: InstRW<[HWWriteResGroup115], (instregex "VRCPPSm")>; +def: InstRW<[HWWriteResGroup115], (instregex "VRSQRTPSYm")>; -// VGATHERQPD. -// x. -def WriteVGATHERQPD128 : SchedWriteRes<[]> { +def HWWriteResGroup116 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0,HWPort15,HWPort0156]> { + let Latency = 11; let NumMicroOps = 14; + let ResourceCycles = [1,1,1,4,2,5]; } -def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>; +def: InstRW<[HWWriteResGroup116], (instregex "CMPXCHG8B")>; -// y. -def WriteVGATHERQPD256 : SchedWriteRes<[]> { - let NumMicroOps = 22; +def HWWriteResGroup117 : SchedWriteRes<[HWPort0]> { + let Latency = 13; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>; - -//-- Conversion instructions --// - -// CVTPD2PS. -// x,x. -def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVTPD2PSrr")>; - -// x,m128. -def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVTPD2PS(X?)rm")>; +def: InstRW<[HWWriteResGroup117], (instregex "SQRTPSr")>; +def: InstRW<[HWWriteResGroup117], (instregex "SQRTSSr")>; +def: InstRW<[HWWriteResGroup117], (instregex "VDIVPSrr")>; +def: InstRW<[HWWriteResGroup117], (instregex "VDIVSSrr")>; -// x,y. -def WriteCVTPD2PSYrr : SchedWriteRes<[HWPort1, HWPort5]> { - let Latency = 5; +def HWWriteResGroup118 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 13; let NumMicroOps = 2; - let ResourceCycles = [1, 1]; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteCVTPD2PSYrr], (instregex "(V?)CVTPD2PSYrr")>; +def: InstRW<[HWWriteResGroup118], (instregex "SQRTPSm")>; +def: InstRW<[HWWriteResGroup118], (instregex "SQRTSSm")>; +def: InstRW<[HWWriteResGroup118], (instregex "VDIVPSrm")>; +def: InstRW<[HWWriteResGroup118], (instregex "VDIVSSrm")>; -// x,m256. -def WriteCVTPD2PSYrm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; +def HWWriteResGroup119 : SchedWriteRes<[HWPort0]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WriteCVTPD2PSYrm], (instregex "(V?)CVTPD2PSYrm")>; +def: InstRW<[HWWriteResGroup119], (instregex "DIVPDrr")>; +def: InstRW<[HWWriteResGroup119], (instregex "DIVSDrr")>; +def: InstRW<[HWWriteResGroup119], (instregex "VSQRTPSr")>; +def: InstRW<[HWWriteResGroup119], (instregex "VSQRTSSr")>; -// CVTSD2SS. -// x,x. -def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V)?CVTSD2SSrr")>; - -// x,m64. -def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(Int_)?(V)?CVTSD2SSrm")>; - -// CVTPS2PD. -// x,x. -def WriteCVTPS2PDrr : SchedWriteRes<[HWPort0, HWPort5]> { - let Latency = 2; +def HWWriteResGroup120 : SchedWriteRes<[HWPort5]> { + let Latency = 14; let NumMicroOps = 2; - let ResourceCycles = [1, 1]; + let ResourceCycles = [2]; } -def : InstRW<[WriteCVTPS2PDrr], (instregex "(V?)CVTPS2PDrr")>; +def: InstRW<[HWWriteResGroup120], (instregex "AESIMCrr")>; +def: InstRW<[HWWriteResGroup120], (instregex "VAESIMCrr")>; -// x,m64. -// y,m128. -def WriteCVTPS2PDrm : SchedWriteRes<[HWPort0, HWPort23]> { - let Latency = 5; +def HWWriteResGroup121 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 14; let NumMicroOps = 2; - let ResourceCycles = [1, 1]; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteCVTPS2PDrm], (instregex "(V?)CVTPS2PD(Y?)rm")>; +def: InstRW<[HWWriteResGroup121], (instregex "DIVPDrm")>; +def: InstRW<[HWWriteResGroup121], (instregex "DIVSDrm")>; +def: InstRW<[HWWriteResGroup121], (instregex "VSQRTPSm")>; +def: InstRW<[HWWriteResGroup121], (instregex "VSQRTSSm")>; -// y,x. -def WriteVCVTPS2PDYrr : SchedWriteRes<[HWPort0, HWPort5]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; +def HWWriteResGroup122 : SchedWriteRes<[HWPort5,HWPort23]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } -def : InstRW<[WriteVCVTPS2PDYrr], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[HWWriteResGroup122], (instregex "AESIMCrm")>; +def: InstRW<[HWWriteResGroup122], (instregex "VAESIMCrm")>; -// CVTSS2SD. -// x,x. -def WriteCVTSS2SDrr : SchedWriteRes<[HWPort0, HWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; +def HWWriteResGroup123 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; } -def : InstRW<[WriteCVTSS2SDrr], (instregex "(Int_)?(V?)CVTSS2SDrr")>; +def: InstRW<[HWWriteResGroup123], (instregex "DPPSrri")>; +def: InstRW<[HWWriteResGroup123], (instregex "VDPPSYrri")>; +def: InstRW<[HWWriteResGroup123], (instregex "VDPPSrri")>; -// x,m32. -def WriteCVTSS2SDrm : SchedWriteRes<[HWPort0, HWPort23]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; +def HWWriteResGroup124 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { + let Latency = 14; + let NumMicroOps = 5; + let ResourceCycles = [2,1,1,1]; } -def : InstRW<[WriteCVTSS2SDrm], (instregex "(Int_)?(V?)CVTSS2SDrm")>; +def: InstRW<[HWWriteResGroup124], (instregex "DPPSrmi")>; +def: InstRW<[HWWriteResGroup124], (instregex "VDPPSYrmi")>; +def: InstRW<[HWWriteResGroup124], (instregex "VDPPSrmi")>; -// CVTDQ2PD. -// x,x. -def : InstRW<[WriteP1_P5_Lat4], (instregex "(V)?CVTDQ2PDrr")>; - -// y,x. -def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVTDQ2PDYrr")>; - -// CVT(T)PD2DQ. -// x,x. -def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVT(T?)PD2DQrr")>; -// x,m128. -def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVT(T?)PD2DQrm")>; -// x,y. -def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVT(T?)PD2DQYrr")>; -// x,m256. -def : InstRW<[WriteP1_P5_Lat6Ld], (instregex "VCVT(T?)PD2DQYrm")>; - -// CVT(T)PS2PI. -// mm,x. -def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PS2PIirr")>; - -// CVTPI2PD. -// x,mm. -def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PI2PDirr")>; - -// CVT(T)PD2PI. -// mm,x. -def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PD2PIirr")>; - -// CVSTSI2SS. -// x,r32. -def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>; - -// CVT(T)SS2SI. -// r32,x. -def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>; -// r32,m32. -def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>; - -// CVTSI2SD. -// x,r32/64. -def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>; - -// CVTSD2SI. -// r32/64 -def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rr")>; -// r32,m32. -def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rm")>; - -// VCVTPS2PH. -// x,v,i. -def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPS2PH(Y?)rr")>; -// m,v,i. -def : InstRW<[WriteP1_P5_Lat4Ld, WriteRMW], (instregex "VCVTPS2PH(Y?)mr")>; - -// VCVTPH2PS. -// v,x. -def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPH2PS(Y?)rr")>; +def HWWriteResGroup125 : SchedWriteRes<[HWPort23,HWPort0156]> { + let Latency = 14; + let NumMicroOps = 15; + let ResourceCycles = [1,14]; +} +def: InstRW<[HWWriteResGroup125], (instregex "POPF16")>; -//-- Arithmetic instructions --// +def HWWriteResGroup126 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort0,HWPort0156]> { + let Latency = 15; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,1,2]; +} +def: InstRW<[HWWriteResGroup126], (instregex "INSB")>; +def: InstRW<[HWWriteResGroup126], (instregex "INSL")>; +def: InstRW<[HWWriteResGroup126], (instregex "INSW")>; -// HADD, HSUB PS/PD -// x,x / v,v,v. -def WriteHADDSUBPr : SchedWriteRes<[HWPort1, HWPort5]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1, 2]; +def HWWriteResGroup127 : SchedWriteRes<[HWPort5]> { + let Latency = 16; + let NumMicroOps = 16; + let ResourceCycles = [16]; } -def : InstRW<[WriteHADDSUBPr], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rr")>; +def: InstRW<[HWWriteResGroup127], (instregex "VZEROALL")>; -// x,m / v,v,m. -def WriteHADDSUBPm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1, 2, 1]; +def HWWriteResGroup128 : SchedWriteRes<[HWPort0,HWPort4,HWPort5,HWPort23,HWPort237,HWPort0,HWPort0156]> { + let Latency = 16; + let NumMicroOps = 19; + let ResourceCycles = [2,1,4,1,1,4,6]; } -def : InstRW<[WriteHADDSUBPm], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rm")>; +def: InstRW<[HWWriteResGroup128], (instregex "CMPXCHG16B")>; -// MULL SS/SD PS/PD. -// x,x / v,v,v. -def WriteMULr : SchedWriteRes<[HWPort01]> { - let Latency = 5; +def HWWriteResGroup129 : SchedWriteRes<[HWPort0,HWPort5,HWPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [4,3,1]; } -def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>; +def: InstRW<[HWWriteResGroup129], (instregex "PCMPESTRIrr")>; +def: InstRW<[HWWriteResGroup129], (instregex "VPCMPESTRIrr")>; -// x,m / v,v,m. -def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> { - let Latency = 9; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; +def HWWriteResGroup130 : SchedWriteRes<[HWPort5,HWPort6,HWPort0,HWPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,5]; } -def : InstRW<[WriteMULm], (instregex "(V?)MUL(P|S)(S|D)rm")>; +def: InstRW<[HWWriteResGroup130], (instregex "CPUID")>; +def: InstRW<[HWWriteResGroup130], (instregex "RDTSC")>; -// VDIVPS. -// y,y,y. -def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> { - let Latency = 19; // 18-21 cycles. - let NumMicroOps = 3; - let ResourceCycles = [2, 1]; +def HWWriteResGroup131 : SchedWriteRes<[HWPort0,HWPort5,HWPort23,HWPort0156]> { + let Latency = 18; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; } -def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>; +def: InstRW<[HWWriteResGroup131], (instregex "PCMPESTRIrm")>; +def: InstRW<[HWWriteResGroup131], (instregex "VPCMPESTRIrm")>; -// y,y,m256. -def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { - let Latency = 23; // 18-21 + 4 cycles. - let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; +def HWWriteResGroup132 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> { + let Latency = 18; + let NumMicroOps = 19; + let ResourceCycles = [3,1,15]; } -def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>; +def: InstRW<[HWWriteResGroup132], (instregex "XRSTOR")>; -// VDIVPD. -// y,y,y. -def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> { - let Latency = 27; // 19-35 cycles. - let NumMicroOps = 3; - let ResourceCycles = [2, 1]; +def HWWriteResGroup133 : SchedWriteRes<[HWPort0,HWPort5,HWPort015,HWPort0156]> { + let Latency = 19; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; } -def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>; +def: InstRW<[HWWriteResGroup133], (instregex "PCMPESTRM128rr")>; +def: InstRW<[HWWriteResGroup133], (instregex "VPCMPESTRM128rr")>; -// y,y,m256. -def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { - let Latency = 31; // 19-35 + 4 cycles. - let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; +def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort5,HWPort23,HWPort015,HWPort0156]> { + let Latency = 19; + let NumMicroOps = 10; + let ResourceCycles = [4,3,1,1,1]; } -def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>; +def: InstRW<[HWWriteResGroup134], (instregex "PCMPESTRM128rm")>; +def: InstRW<[HWWriteResGroup134], (instregex "VPCMPESTRM128rm")>; +def: InstRW<[HWWriteResGroup134], (instregex "SQRTPDr")>; +def: InstRW<[HWWriteResGroup134], (instregex "SQRTSDr")>; +def: InstRW<[HWWriteResGroup134], (instregex "VDIVPDrr")>; +def: InstRW<[HWWriteResGroup134], (instregex "VDIVSDrr")>; +def: InstRW<[HWWriteResGroup134], (instregex "SQRTPDm")>; +def: InstRW<[HWWriteResGroup134], (instregex "SQRTSDm")>; +def: InstRW<[HWWriteResGroup134], (instregex "VDIVPDrm")>; +def: InstRW<[HWWriteResGroup134], (instregex "VDIVSDrm")>; -// VRCPPS. -// y,y. -def WriteVRCPPSr : SchedWriteRes<[HWPort0, HWPort15]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2, 1]; +def HWWriteResGroup135 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> { + let Latency = 20; + let NumMicroOps = 10; + let ResourceCycles = [1,2,7]; } -def : InstRW<[WriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>; +def: InstRW<[HWWriteResGroup135], (instregex "MWAITrr")>; -// y,m256. -def WriteVRCPPSm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; +def HWWriteResGroup136 : SchedWriteRes<[HWPort0]> { + let Latency = 21; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def : InstRW<[WriteVRCPPSm], (instregex "VRCPPSYm(_Int)?")>; +def: InstRW<[HWWriteResGroup136], (instregex "VSQRTPDr")>; +def: InstRW<[HWWriteResGroup136], (instregex "VSQRTSDr")>; -// ROUND SS/SD PS/PD. -// v,v,i. -def WriteROUNDr : SchedWriteRes<[HWPort1]> { - let Latency = 6; +def HWWriteResGroup137 : SchedWriteRes<[HWPort0,HWPort23]> { + let Latency = 21; let NumMicroOps = 2; - let ResourceCycles = [2]; + let ResourceCycles = [1,1]; } -def : InstRW<[WriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>; +def: InstRW<[HWWriteResGroup137], (instregex "VSQRTPDm")>; +def: InstRW<[HWWriteResGroup137], (instregex "VSQRTSDm")>; -// v,m,i. -def WriteROUNDm : SchedWriteRes<[HWPort1, HWPort23]> { - let Latency = 10; +def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort015]> { + let Latency = 21; let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [2,1]; } -def : InstRW<[WriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>; +def: InstRW<[HWWriteResGroup138], (instregex "VDIVPSYrr")>; +def: InstRW<[HWWriteResGroup138], (instregex "VSQRTPSYr")>; -// DPPS. -// x,x,i / v,v,v,i. -def WriteDPPSr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { - let Latency = 14; +def HWWriteResGroup139 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { + let Latency = 21; let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; -} -def : InstRW<[WriteDPPSr], (instregex "(V?)DPPS(Y?)rri")>; - -// x,m,i / v,v,m,i. -def WriteDPPSm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23, HWPort6]> { - let Latency = 18; - let NumMicroOps = 6; - let ResourceCycles = [2, 1, 1, 1, 1]; + let ResourceCycles = [2,1,1]; } -def : InstRW<[WriteDPPSm, ReadAfterLd], (instregex "(V?)DPPS(Y?)rmi")>; +def: InstRW<[HWWriteResGroup139], (instregex "VDIVPSYrm")>; +def: InstRW<[HWWriteResGroup139], (instregex "VSQRTPSYm")>; -// DPPD. -// x,x,i. -def WriteDPPDr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; +def HWWriteResGroup140 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> { + let Latency = 24; + let NumMicroOps = 27; + let ResourceCycles = [1,5,1,1,19]; } -def : InstRW<[WriteDPPDr], (instregex "(V?)DPPDrri")>; +def: InstRW<[HWWriteResGroup140], (instregex "XSAVE64")>; -// x,m,i. -def WriteDPPDm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23]> { - let Latency = 13; - let NumMicroOps = 4; - let ResourceCycles = [1, 1, 1, 1]; +def HWWriteResGroup141 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> { + let Latency = 25; + let NumMicroOps = 28; + let ResourceCycles = [1,6,1,1,19]; } -def : InstRW<[WriteDPPDm], (instregex "(V?)DPPDrmi")>; +def: InstRW<[HWWriteResGroup141], (instregex "XSAVE")>; -// VFMADD. -// v,v,v. -def WriteFMADDr : SchedWriteRes<[HWPort01]> { - let Latency = 5; - let NumMicroOps = 1; +def HWWriteResGroup142 : SchedWriteRes<[HWPort0,HWPort5,HWPort23,HWPort015]> { + let Latency = 28; + let NumMicroOps = 11; + let ResourceCycles = [2,7,1,1]; } -def : InstRW<[WriteFMADDr], - (instregex - // 3p forms. - "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?", - // 3s forms. - "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r", - // 4s/4s_int forms. - "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?", - // 4p forms. - "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>; +def: InstRW<[HWWriteResGroup142], (instregex "AESKEYGENASSIST128rm")>; +def: InstRW<[HWWriteResGroup142], (instregex "VAESKEYGENASSIST128rm")>; -// v,v,m. -def WriteFMADDm : SchedWriteRes<[HWPort01, HWPort23]> { - let Latency = 9; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; -} -def : InstRW<[WriteFMADDm], - (instregex - // 3p forms. - "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?", - // 3s forms. - "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m", - // 4s/4s_int forms. - "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?", - // 4p forms. - "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>; - -//-- Math instructions --// - -// VSQRTPS. -// y,y. -def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { - let Latency = 19; - let NumMicroOps = 3; - let ResourceCycles = [2, 1]; +def HWWriteResGroup143 : SchedWriteRes<[HWPort0,HWPort5,HWPort015]> { + let Latency = 29; + let NumMicroOps = 11; + let ResourceCycles = [2,7,2]; } -def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>; +def: InstRW<[HWWriteResGroup143], (instregex "AESKEYGENASSIST128rr")>; +def: InstRW<[HWWriteResGroup143], (instregex "VAESKEYGENASSIST128rr")>; -// y,m256. -def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { - let Latency = 23; - let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; +def HWWriteResGroup145 : SchedWriteRes<[HWPort01,HWPort15,HWPort015,HWPort0156]> { + let Latency = 31; + let NumMicroOps = 31; + let ResourceCycles = [8,1,21,1]; } -def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>; +def: InstRW<[HWWriteResGroup145], (instregex "MMX_EMMS")>; -// VSQRTPD. -// y,y. -def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> { - let Latency = 28; +def HWWriteResGroup146 : SchedWriteRes<[HWPort0,HWPort015]> { + let Latency = 35; let NumMicroOps = 3; - let ResourceCycles = [2, 1]; + let ResourceCycles = [2,1]; } -def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>; +def: InstRW<[HWWriteResGroup146], (instregex "VDIVPDYrr")>; +def: InstRW<[HWWriteResGroup146], (instregex "VSQRTPDYr")>; -// y,m256. -def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { - let Latency = 32; +def HWWriteResGroup147 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { + let Latency = 35; let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; + let ResourceCycles = [2,1,1]; } -def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>; +def: InstRW<[HWWriteResGroup147], (instregex "VDIVPDYrm")>; +def: InstRW<[HWWriteResGroup147], (instregex "VSQRTPDYm")>; -// RSQRT SS/PS. -// x,x. -def WriteRSQRTr : SchedWriteRes<[HWPort0]> { - let Latency = 5; +def HWWriteResGroup148 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> { + let Latency = 35; + let NumMicroOps = 18; + let ResourceCycles = [1,1,2,3,1,1,1,8]; } -def : InstRW<[WriteRSQRTr], (instregex "(V?)RSQRT(SS|PS)r(_Int)?")>; +def: InstRW<[HWWriteResGroup148], (instregex "VMCLEARm")>; -// x,m128. -def WriteRSQRTm : SchedWriteRes<[HWPort0, HWPort23]> { - let Latency = 9; - let NumMicroOps = 2; - let ResourceCycles = [1, 1]; +def HWWriteResGroup149 : SchedWriteRes<[HWPort5,HWPort0156]> { + let Latency = 42; + let NumMicroOps = 22; + let ResourceCycles = [2,20]; } -def : InstRW<[WriteRSQRTm], (instregex "(V?)RSQRT(SS|PS)m(_Int)?")>; +def: InstRW<[HWWriteResGroup149], (instregex "RDTSCP")>; -// RSQRTPS 256. -// y,y. -def WriteRSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2, 1]; +def HWWriteResGroup150 : SchedWriteRes<[HWPort0,HWPort01,HWPort23,HWPort0,HWPort0,HWPort015,HWPort0156]> { + let Latency = 56; + let NumMicroOps = 64; + let ResourceCycles = [2,2,8,1,10,2,39]; } -def : InstRW<[WriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>; +def: InstRW<[HWWriteResGroup150], (instregex "FLDENVm")>; +def: InstRW<[HWWriteResGroup150], (instregex "FLDENVm")>; -// y,m256. -def WriteRSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [2, 1, 1]; +def HWWriteResGroup151 : SchedWriteRes<[HWPort0,HWPort6,HWPort23,HWPort0,HWPort0,HWPort15,HWPort0156]> { + let Latency = 59; + let NumMicroOps = 88; + let ResourceCycles = [4,4,31,1,2,1,45]; } -def : InstRW<[WriteRSQRTPSYm], (instregex "VRSQRTPSYm(_Int)?")>; - -//-- Logic instructions --// +def: InstRW<[HWWriteResGroup151], (instregex "FXRSTOR64")>; -// AND, ANDN, OR, XOR PS/PD. -// x,x / v,v,v. -def : InstRW<[WriteP5], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>; -// x,m / v,v,m. -def : InstRW<[WriteP5Ld, ReadAfterLd], - (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>; - -//-- Other instructions --// +def HWWriteResGroup152 : SchedWriteRes<[HWPort0,HWPort6,HWPort23,HWPort0,HWPort0,HWPort15,HWPort0156]> { + let Latency = 59; + let NumMicroOps = 90; + let ResourceCycles = [4,2,33,1,2,1,47]; +} +def: InstRW<[HWWriteResGroup152], (instregex "FXRSTOR")>; -// VZEROUPPER. -def WriteVZEROUPPER : SchedWriteRes<[]> { - let NumMicroOps = 4; +def HWWriteResGroup153 : SchedWriteRes<[HWPort5,HWPort01,HWPort0156]> { + let Latency = 75; + let NumMicroOps = 15; + let ResourceCycles = [6,3,6]; } -def : InstRW<[WriteVZEROUPPER], (instregex "VZEROUPPER")>; +def: InstRW<[HWWriteResGroup153], (instregex "FNINIT")>; -// VZEROALL. -def WriteVZEROALL : SchedWriteRes<[]> { - let NumMicroOps = 12; +def HWWriteResGroup154 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156]> { + let Latency = 98; + let NumMicroOps = 32; + let ResourceCycles = [7,7,3,3,1,11]; } -def : InstRW<[WriteVZEROALL], (instregex "VZEROALL")>; +def: InstRW<[HWWriteResGroup154], (instregex "DIV64r")>; -// LDMXCSR. -def WriteLDMXCSR : SchedWriteRes<[HWPort0, HWPort6, HWPort23]> { - let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [1, 1, 1]; +def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort0,HWPort0156]> { + let Latency = 112; + let NumMicroOps = 66; + let ResourceCycles = [4,2,4,8,14,34]; } -def : InstRW<[WriteLDMXCSR], (instregex "(V)?LDMXCSR")>; +def: InstRW<[HWWriteResGroup155], (instregex "IDIV64r")>; -// STMXCSR. -def WriteSTMXCSR : SchedWriteRes<[HWPort0, HWPort4, HWPort6, HWPort237]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1, 1, 1, 1]; +def HWWriteResGroup156 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,HWPort237,HWPort0,HWPort0156]> { + let Latency = 114; + let NumMicroOps = 100; + let ResourceCycles = [9,9,11,8,1,11,21,30]; } -def : InstRW<[WriteSTMXCSR], (instregex "(V)?STMXCSR")>; +def: InstRW<[HWWriteResGroup156], (instregex "FSTENVm")>; +def: InstRW<[HWWriteResGroup156], (instregex "FSTENVm")>; } // SchedModel Index: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td +++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td @@ -24,8 +24,8 @@ // Based on the LSD (loop-stream detector) queue size. let LoopMicroOpBufferSize = 28; - // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow - // the scheduler to assign a default model to unrecognized opcodes. + // This flag is set to allow the scheduler to assign + // a default model to unrecognized opcodes. let CompleteModel = 0; } @@ -48,6 +48,7 @@ def SBPort4 : ProcResource<1>; // Many micro-ops are capable of issuing on multiple ports. +def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>; def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; @@ -157,31 +158,6 @@ let ResourceCycles = [1, 1, 1, 1]; } -//////////////////////////////////////////////////////////////////////////////// -// Horizontal add/sub instructions. -//////////////////////////////////////////////////////////////////////////////// -// HADD, HSUB PS/PD -// x,x / v,v,v. -def : WriteRes { - let Latency = 3; -} - -// x,m / v,v,m. -def : WriteRes { - let Latency = 7; - let ResourceCycles = [1, 1]; -} - -// PHADD|PHSUB (S) W/D. -// v <- v,v. -def : WriteRes; - -// v <- v,m. -def : WriteRes { - let Latency = 5; - let ResourceCycles = [1, 1]; -} - // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { @@ -272,4 +248,2282 @@ defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; + +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + +// Remaining SNB instrs. + +def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>; + +def SBWriteResGroup1 : SchedWriteRes<[SBPort5]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup1], (instregex "ANDNPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "ANDNPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "ANDPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "ANDPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "FDECSTP")>; +def: InstRW<[SBWriteResGroup1], (instregex "FFREE")>; +def: InstRW<[SBWriteResGroup1], (instregex "FINCSTP")>; +def: InstRW<[SBWriteResGroup1], (instregex "FNOP")>; +def: InstRW<[SBWriteResGroup1], (instregex "INSERTPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "JMP64r")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOV64toPQIrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVAPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVAPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVDDUPrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVDI2PDIrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVHLPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVLHPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVSDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVSHDUPrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVSLDUPrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVSSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVUPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "MOVUPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "ORPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "ORPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "RETQ")>; +def: InstRW<[SBWriteResGroup1], (instregex "SHUFPDrri")>; +def: InstRW<[SBWriteResGroup1], (instregex "SHUFPSrri")>; +def: InstRW<[SBWriteResGroup1], (instregex "UNPCKHPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "UNPCKHPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "UNPCKLPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "UNPCKLPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VANDNPDYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VANDNPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VANDNPSYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VANDNPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VANDPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VANDPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VANDPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VEXTRACTF128rr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VGATHERQPSZrm")>; +def: InstRW<[SBWriteResGroup1], (instregex "VINSERTF128rr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VINSERTPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVAPDYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVAPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVAPSYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVAPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVDDUPYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVDDUPrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVHLPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVHLPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVSDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVSHDUPrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVSLDUPrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVSSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVUPDYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVUPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVUPSYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VMOVUPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VORPDYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VORPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VORPSYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VORPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPDri")>; +def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPDrm")>; +def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPSri")>; +def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPSrm")>; +def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VSHUFPDYrri")>; +def: InstRW<[SBWriteResGroup1], (instregex "VSHUFPDrri")>; +def: InstRW<[SBWriteResGroup1], (instregex "VSHUFPSYrri")>; +def: InstRW<[SBWriteResGroup1], (instregex "VSHUFPSrri")>; +def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKHPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKHPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKLPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKLPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VXORPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "VXORPSrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "XORPDrr")>; +def: InstRW<[SBWriteResGroup1], (instregex "XORPSrr")>; + +def SBWriteResGroup2 : SchedWriteRes<[SBPort01]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup2], (instregex "LEA64_32r")>; + +def SBWriteResGroup3 : SchedWriteRes<[SBPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup3], (instregex "BLENDPDrri")>; +def: InstRW<[SBWriteResGroup3], (instregex "BLENDPSrri")>; +def: InstRW<[SBWriteResGroup3], (instregex "BT32ri8")>; +def: InstRW<[SBWriteResGroup3], (instregex "BT32rr")>; +def: InstRW<[SBWriteResGroup3], (instregex "BTC32ri8")>; +def: InstRW<[SBWriteResGroup3], (instregex "BTC32rr")>; +def: InstRW<[SBWriteResGroup3], (instregex "BTR32ri8")>; +def: InstRW<[SBWriteResGroup3], (instregex "BTR32rr")>; +def: InstRW<[SBWriteResGroup3], (instregex "BTS32ri8")>; +def: InstRW<[SBWriteResGroup3], (instregex "BTS32rr")>; +def: InstRW<[SBWriteResGroup3], (instregex "CDQ")>; +def: InstRW<[SBWriteResGroup3], (instregex "CQO")>; +def: InstRW<[SBWriteResGroup3], (instregex "LAHF")>; +def: InstRW<[SBWriteResGroup3], (instregex "SAHF")>; +def: InstRW<[SBWriteResGroup3], (instregex "SAR32ri")>; +def: InstRW<[SBWriteResGroup3], (instregex "SAR8ri")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETAEr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETBr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETEr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETGEr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETGr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETLEr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETLr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETNEr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETNOr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETNPr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETNSr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETOr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETPr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SETSr")>; +def: InstRW<[SBWriteResGroup3], (instregex "SHL32ri")>; +def: InstRW<[SBWriteResGroup3], (instregex "SHL64r1")>; +def: InstRW<[SBWriteResGroup3], (instregex "SHL8r1")>; +def: InstRW<[SBWriteResGroup3], (instregex "SHL8ri")>; +def: InstRW<[SBWriteResGroup3], (instregex "SHR32ri")>; +def: InstRW<[SBWriteResGroup3], (instregex "SHR8ri")>; +def: InstRW<[SBWriteResGroup3], (instregex "VBLENDPDYrri")>; +def: InstRW<[SBWriteResGroup3], (instregex "VBLENDPDrri")>; +def: InstRW<[SBWriteResGroup3], (instregex "VBLENDPSYrri")>; +def: InstRW<[SBWriteResGroup3], (instregex "VBLENDPSrri")>; +def: InstRW<[SBWriteResGroup3], (instregex "VMOVDQAYrr")>; +def: InstRW<[SBWriteResGroup3], (instregex "VMOVDQArr")>; +def: InstRW<[SBWriteResGroup3], (instregex "VMOVDQUYrr")>; +def: InstRW<[SBWriteResGroup3], (instregex "VMOVDQUrr")>; + +def SBWriteResGroup4 : SchedWriteRes<[SBPort15]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup4], (instregex "KORTESTBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "MMX_PABSBrr64")>; +def: InstRW<[SBWriteResGroup4], (instregex "MMX_PABSDrr64")>; +def: InstRW<[SBWriteResGroup4], (instregex "MMX_PABSWrr64")>; +def: InstRW<[SBWriteResGroup4], (instregex "MMX_PADDQirr")>; +def: InstRW<[SBWriteResGroup4], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[SBWriteResGroup4], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[SBWriteResGroup4], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[SBWriteResGroup4], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[SBWriteResGroup4], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[SBWriteResGroup4], (instregex "PABSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PABSDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PABSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PACKSSDWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PACKSSWBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PACKUSDWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PACKUSWBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PADDBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PADDDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PADDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PADDSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PADDSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PADDUSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PADDUSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PADDWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PALIGNRrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "PAVGBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PAVGWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PBLENDWrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "PCMPEQBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PCMPEQDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PCMPEQQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PCMPEQWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PCMPGTBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PCMPGTDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PCMPGTWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMAXSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMAXSDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMAXSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMAXUBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMAXUDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMAXUWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMINSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMINSDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMINSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMINUBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMINUDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMINUWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXBDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXBQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXBWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXWDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXWQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXBDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXBQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXBWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXWDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXWQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSHUFBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSHUFDri")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSHUFHWri")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSHUFLWri")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSIGNBrr128")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSIGNDrr128")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSIGNWrr128")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSLLDQri")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSRLDQri")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSUBBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSUBDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSUBQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSUBSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSUBSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSUBUSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSUBUSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PSUBWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKHBWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKHDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKHWDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKLBWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKLDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKLWDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPABSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPABSDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPABSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPACKSSDWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPACKSSWBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPACKUSDWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPACKUSWBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPADDBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPADDDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPADDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPADDUSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPADDUSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPALIGNRrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPAVGBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPAVGWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPBLENDWrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPCMPEQBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPCMPEQDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPCMPEQWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPCMPGTBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPCMPGTDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPCMPGTWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMAXSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMAXSDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMAXSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMAXUBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMAXUDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMAXUWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMINSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMINSDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMINSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMINUBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMINUDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMINUWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXBDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXBQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXBWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXWDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXWQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXBDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXBQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXBWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXWDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXWQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSHUFBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSHUFDri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSHUFLWri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSIGNBrr128")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSIGNDrr128")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSIGNWrr128")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSLLDQri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSRLDQri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSUBBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSUBDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSUBQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSUBSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSUBSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSUBUSBrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSUBUSWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPSUBWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKLWDrr")>; + +def SBWriteResGroup5 : SchedWriteRes<[SBPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup5], (instregex "ADD32ri8")>; +def: InstRW<[SBWriteResGroup5], (instregex "ADD32rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "ADD8ri")>; +def: InstRW<[SBWriteResGroup5], (instregex "ADD8rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "AND32ri")>; +def: InstRW<[SBWriteResGroup5], (instregex "AND64ri8")>; +def: InstRW<[SBWriteResGroup5], (instregex "AND64rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "AND8ri")>; +def: InstRW<[SBWriteResGroup5], (instregex "AND8rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "CBW")>; +def: InstRW<[SBWriteResGroup5], (instregex "CMC")>; +def: InstRW<[SBWriteResGroup5], (instregex "CMP16ri8")>; +def: InstRW<[SBWriteResGroup5], (instregex "CMP32i32")>; +def: InstRW<[SBWriteResGroup5], (instregex "CMP64rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "CMP8ri")>; +def: InstRW<[SBWriteResGroup5], (instregex "CMP8rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "CWDE")>; +def: InstRW<[SBWriteResGroup5], (instregex "DEC64r")>; +def: InstRW<[SBWriteResGroup5], (instregex "DEC8r")>; +def: InstRW<[SBWriteResGroup5], (instregex "INC64r")>; +def: InstRW<[SBWriteResGroup5], (instregex "INC8r")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOV32rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOV8ri")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOV8rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOVDQArr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOVDQUrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOVPQI2QIrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOVSX32rr16")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOVSX32rr8")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOVZX32rr16")>; +def: InstRW<[SBWriteResGroup5], (instregex "MOVZX32rr8")>; +def: InstRW<[SBWriteResGroup5], (instregex "NEG64r")>; +def: InstRW<[SBWriteResGroup5], (instregex "NEG8r")>; +def: InstRW<[SBWriteResGroup5], (instregex "NOT64r")>; +def: InstRW<[SBWriteResGroup5], (instregex "NOT8r")>; +def: InstRW<[SBWriteResGroup5], (instregex "OR64ri8")>; +def: InstRW<[SBWriteResGroup5], (instregex "OR64rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "OR8ri")>; +def: InstRW<[SBWriteResGroup5], (instregex "OR8rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PANDNrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PANDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PORrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PXORrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "STC")>; +def: InstRW<[SBWriteResGroup5], (instregex "SUB64ri8")>; +def: InstRW<[SBWriteResGroup5], (instregex "SUB64rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "SUB8ri")>; +def: InstRW<[SBWriteResGroup5], (instregex "SUB8rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "TEST64rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "TEST8ri")>; +def: InstRW<[SBWriteResGroup5], (instregex "TEST8rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPANDNrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPANDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPORrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPXORrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "XOR32rr")>; +def: InstRW<[SBWriteResGroup5], (instregex "XOR64ri8")>; +def: InstRW<[SBWriteResGroup5], (instregex "XOR8ri")>; +def: InstRW<[SBWriteResGroup5], (instregex "XOR8rr")>; + +def SBWriteResGroup6 : SchedWriteRes<[SBPort0]> { + let Latency = 2; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup6], (instregex "MOVMSKPDrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVMSKPSrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVPDI2DIrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVPQIto64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "PMOVMSKBrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VMOVMSKPDrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VMOVMSKPSrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQIto64rr")>; + +def SBWriteResGroup8 : SchedWriteRes<[SBPort0]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup8], (instregex "BLENDVPDrr0")>; +def: InstRW<[SBWriteResGroup8], (instregex "BLENDVPSrr0")>; +def: InstRW<[SBWriteResGroup8], (instregex "ROL32ri")>; +def: InstRW<[SBWriteResGroup8], (instregex "ROL8ri")>; +def: InstRW<[SBWriteResGroup8], (instregex "ROR32ri")>; +def: InstRW<[SBWriteResGroup8], (instregex "ROR8ri")>; +def: InstRW<[SBWriteResGroup8], (instregex "SETAr")>; +def: InstRW<[SBWriteResGroup8], (instregex "SETBEr")>; +def: InstRW<[SBWriteResGroup8], (instregex "VBLENDVPDYrr")>; +def: InstRW<[SBWriteResGroup8], (instregex "VBLENDVPDrr")>; +def: InstRW<[SBWriteResGroup8], (instregex "VBLENDVPSYrr")>; +def: InstRW<[SBWriteResGroup8], (instregex "VBLENDVPSrr")>; + +def SBWriteResGroup9 : SchedWriteRes<[SBPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup9], (instregex "VPBLENDVBrr")>; + +def SBWriteResGroup10 : SchedWriteRes<[SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup10], (instregex "SCASB")>; +def: InstRW<[SBWriteResGroup10], (instregex "SCASL")>; +def: InstRW<[SBWriteResGroup10], (instregex "SCASQ")>; +def: InstRW<[SBWriteResGroup10], (instregex "SCASW")>; + +def SBWriteResGroup11 : SchedWriteRes<[SBPort0,SBPort1]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup11], (instregex "COMISDrr")>; +def: InstRW<[SBWriteResGroup11], (instregex "COMISSrr")>; +def: InstRW<[SBWriteResGroup11], (instregex "UCOMISDrr")>; +def: InstRW<[SBWriteResGroup11], (instregex "UCOMISSrr")>; +def: InstRW<[SBWriteResGroup11], (instregex "VCOMISDrr")>; +def: InstRW<[SBWriteResGroup11], (instregex "VCOMISSrr")>; +def: InstRW<[SBWriteResGroup11], (instregex "VUCOMISDrr")>; +def: InstRW<[SBWriteResGroup11], (instregex "VUCOMISSrr")>; + +def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup12], (instregex "CVTPS2PDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "PTESTrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VCVTPS2PDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VPTESTYrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VPTESTrr")>; + +def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup13], (instregex "PSLLDrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "PSLLQrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "PSLLWrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "PSRADrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "PSRAWrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "PSRLDrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "PSRLQrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "PSRLWrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VPSRADrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VPSRAWrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VPSRLDrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VPSRLQrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VPSRLWrr")>; + +def SBWriteResGroup14 : SchedWriteRes<[SBPort1,SBPort0]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup14], (instregex "BSWAP32r")>; + +def SBWriteResGroup15 : SchedWriteRes<[SBPort5,SBPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup15], (instregex "PINSRBrr")>; +def: InstRW<[SBWriteResGroup15], (instregex "PINSRDrr")>; +def: InstRW<[SBWriteResGroup15], (instregex "PINSRQrr")>; +def: InstRW<[SBWriteResGroup15], (instregex "PINSRWrri")>; +def: InstRW<[SBWriteResGroup15], (instregex "VPINSRBrr")>; +def: InstRW<[SBWriteResGroup15], (instregex "VPINSRDrr")>; +def: InstRW<[SBWriteResGroup15], (instregex "VPINSRQrr")>; +def: InstRW<[SBWriteResGroup15], (instregex "VPINSRWrri")>; + +def SBWriteResGroup16 : SchedWriteRes<[SBPort5,SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup16], (instregex "MMX_MOVDQ2Qrr")>; + +def SBWriteResGroup17 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup17], (instregex "ADC64ri8")>; +def: InstRW<[SBWriteResGroup17], (instregex "ADC64rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "ADC8ri")>; +def: InstRW<[SBWriteResGroup17], (instregex "ADC8rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVAE32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVB32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVE32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVG32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVGE32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVL32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVLE32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVNE32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVNO32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVNP32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVNS32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVO32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVP32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "CMOVS32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "SBB32rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "SBB64ri8")>; +def: InstRW<[SBWriteResGroup17], (instregex "SBB8ri")>; +def: InstRW<[SBWriteResGroup17], (instregex "SBB8rr")>; +def: InstRW<[SBWriteResGroup17], (instregex "SHLD32rri8")>; +def: InstRW<[SBWriteResGroup17], (instregex "SHRD32rri8")>; + +def SBWriteResGroup18 : SchedWriteRes<[SBPort0]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup18], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[SBWriteResGroup18], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[SBWriteResGroup18], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PMADDUBSWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PMADDWDrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PMULDQrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PMULHRSWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PMULHUWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PMULHWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PMULLDrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PMULLWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PMULUDQrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "PSADBWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "VPMADDUBSWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "VPMADDWDrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "VPMULDQrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "VPMULHRSWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "VPMULHWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "VPMULLDrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "VPMULLWrr")>; +def: InstRW<[SBWriteResGroup18], (instregex "VPSADBWrr")>; + +def SBWriteResGroup19 : SchedWriteRes<[SBPort1]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup19], (instregex "ADDPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADDPSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADDSDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADDSSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADDSUBPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADDSUBPSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "BSF32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "BSR32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMPPDrri")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMPPSrri")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMPSDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMPSSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CRC32r32r32")>; +def: InstRW<[SBWriteResGroup19], (instregex "CRC32r32r8")>; +def: InstRW<[SBWriteResGroup19], (instregex "CVTDQ2PSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CVTPS2DQrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CVTTPS2DQrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MAXPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MAXPSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MAXSDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MAXSSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MINPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MINPSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MINSDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MINSSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[SBWriteResGroup19], (instregex "MUL8r")>; +def: InstRW<[SBWriteResGroup19], (instregex "POPCNT32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ROUNDPDr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ROUNDPSr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ROUNDSDr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ROUNDSSr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SUBPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SUBPSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SUBSDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SUBSSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDPDYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDPSYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDPSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDSDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDSSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDSUBPDYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDSUBPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDSUBPSYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VADDSUBPSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VBROADCASTF128")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCMPPDYrri")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCMPPDrri")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCMPPSYrri")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCMPPSrri")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCMPSDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCMPSSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCVTPS2DQrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMAXPDYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMAXPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMAXPSYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMAXPSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMAXSDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMAXSSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMINPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMINPSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMINSDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VMINSSrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VPBROADCASTMB2QZrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VROUNDPDr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VROUNDPSr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VROUNDSDr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VSUBPDYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VSUBPDrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VSUBPSYrr")>; +def: InstRW<[SBWriteResGroup19], (instregex "VSUBPSrr")>; + +def SBWriteResGroup20 : SchedWriteRes<[SBPort0,SBPort5]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup20], (instregex "EXTRACTPSrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VEXTRACTPSrr")>; + +def SBWriteResGroup21 : SchedWriteRes<[SBPort0,SBPort15]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup21], (instregex "PEXTRBrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "PEXTRDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "PEXTRQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "PEXTRWri")>; +def: InstRW<[SBWriteResGroup21], (instregex "VPEXTRBrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VPEXTRDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VPEXTRQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VPEXTRWri")>; +def: InstRW<[SBWriteResGroup21], (instregex "SHL64rCL")>; +def: InstRW<[SBWriteResGroup21], (instregex "SHL8rCL")>; + +def SBWriteResGroup22 : SchedWriteRes<[SBPort15]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHADDrr64")>; +def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHSUBSWrr64")>; +def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHSUBWrr64")>; +def: InstRW<[SBWriteResGroup22], (instregex "PHADDDrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "PHADDSWrr128")>; +def: InstRW<[SBWriteResGroup22], (instregex "PHADDWrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "PHSUBDrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "PHSUBSWrr128")>; +def: InstRW<[SBWriteResGroup22], (instregex "PHSUBWrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "VPHADDDrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "VPHADDSWrr128")>; +def: InstRW<[SBWriteResGroup22], (instregex "VPHADDWrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "VPHSUBDrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "VPHSUBSWrr128")>; +def: InstRW<[SBWriteResGroup22], (instregex "VPHSUBWrr")>; + +def SBWriteResGroup23 : SchedWriteRes<[SBPort015]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SBWriteResGroup23], (instregex "LEAVE64")>; +def: InstRW<[SBWriteResGroup23], (instregex "XADD32rr")>; +def: InstRW<[SBWriteResGroup23], (instregex "XADD8rr")>; + +def SBWriteResGroup24 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup24], (instregex "CMOVA32rr")>; +def: InstRW<[SBWriteResGroup24], (instregex "CMOVBE32rr")>; + +def SBWriteResGroup25 : SchedWriteRes<[SBPort0,SBPort1]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup25], (instregex "MUL64r")>; + +def SBWriteResGroup26 : SchedWriteRes<[SBPort1,SBPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup26], (instregex "CVTDQ2PDrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "CVTPD2DQrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "CVTPD2PSrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "CVTSD2SSrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SBWriteResGroup26], (instregex "CVTSI2SDrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "CVTTPD2DQrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[SBWriteResGroup26], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[SBWriteResGroup26], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTDQ2PDYrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTDQ2PDrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTPD2DQrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTPD2PSrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTSI2SDrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTTPD2DQYrr")>; +def: InstRW<[SBWriteResGroup26], (instregex "VCVTTPD2DQrr")>; + +def SBWriteResGroup27 : SchedWriteRes<[SBPort1,SBPort015]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup27], (instregex "MOV64sr")>; +def: InstRW<[SBWriteResGroup27], (instregex "PAUSE")>; + +def SBWriteResGroup28 : SchedWriteRes<[SBPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup28], (instregex "MULPDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "MULPSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "MULSDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "MULSSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "PCMPGTQrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[SBWriteResGroup28], (instregex "RCPPSr")>; +def: InstRW<[SBWriteResGroup28], (instregex "RCPSSr")>; +def: InstRW<[SBWriteResGroup28], (instregex "RSQRTPSr")>; +def: InstRW<[SBWriteResGroup28], (instregex "RSQRTSSr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VMULPDYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VMULPDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VMULPSYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VMULPSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VMULSDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VMULSSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VPCMPGTQrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[SBWriteResGroup28], (instregex "VRSQRTPSr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VRSQRTSSr")>; + +def SBWriteResGroup29 : SchedWriteRes<[SBPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup29], (instregex "MOV32rm")>; +def: InstRW<[SBWriteResGroup29], (instregex "MOV8rm")>; +def: InstRW<[SBWriteResGroup29], (instregex "MOVSX32rm16")>; +def: InstRW<[SBWriteResGroup29], (instregex "MOVSX32rm8")>; +def: InstRW<[SBWriteResGroup29], (instregex "MOVZX32rm16")>; +def: InstRW<[SBWriteResGroup29], (instregex "MOVZX32rm8")>; +def: InstRW<[SBWriteResGroup29], (instregex "PREFETCH")>; + +def SBWriteResGroup30 : SchedWriteRes<[SBPort0,SBPort1]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup30], (instregex "CVTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup30], (instregex "CVTSD2SIrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "CVTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup30], (instregex "CVTSS2SIrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup30], (instregex "CVTTSD2SIrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup30], (instregex "CVTTSS2SIrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VCVTSS2SIrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VCVTTSD2SIrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VCVTTSS2SIrr")>; + +def SBWriteResGroup31 : SchedWriteRes<[SBPort4,SBPort23]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup31], (instregex "MOV64mr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOV8mr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVAPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVAPSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVDQAmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVDQUmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVHPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVHPSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVLPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVLPSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVNTDQmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVNTI_64mr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVNTImr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVNTPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVNTPSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVPDI2DImr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVPQI2QImr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVPQIto64mr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVSSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVUPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVUPSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "PUSH64i8")>; +def: InstRW<[SBWriteResGroup31], (instregex "PUSH64r")>; +def: InstRW<[SBWriteResGroup31], (instregex "VEXTRACTF128mr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVAPDYmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVAPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVAPSYmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVAPSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVDQAYmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVDQAmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVDQUYmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVDQUmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVHPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVHPSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVLPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVLPSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTDQYmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTDQmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTPDYmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTPSYmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTPSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVPDI2DImr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVPQI2QImr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVPQIto64mr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVSDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVSSmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVUPDYmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVUPDmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVUPSYmr")>; +def: InstRW<[SBWriteResGroup31], (instregex "VMOVUPSmr")>; + +def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort15]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup32], (instregex "MPSADBWrri")>; +def: InstRW<[SBWriteResGroup32], (instregex "VMPSADBWrri")>; + +def SBWriteResGroup33 : SchedWriteRes<[SBPort1,SBPort5]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup33], (instregex "CLI")>; +def: InstRW<[SBWriteResGroup33], (instregex "CVTSI2SS64rr")>; +def: InstRW<[SBWriteResGroup33], (instregex "CVTSI2SSrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "HADDPDrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "HADDPSrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "HSUBPDrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "HSUBPSrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VCVTSI2SSrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VHADDPDrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VHADDPSYrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VHADDPSrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VHSUBPDYrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VHSUBPDrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VHSUBPSYrr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VHSUBPSrr")>; + +def SBWriteResGroup34 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup34], (instregex "CALL64r")>; +def: InstRW<[SBWriteResGroup34], (instregex "EXTRACTPSmr")>; +def: InstRW<[SBWriteResGroup34], (instregex "VEXTRACTPSmr")>; + +def SBWriteResGroup35 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup35], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[SBWriteResGroup35], (instregex "VMASKMOVPDmr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VMASKMOVPSmr")>; + +def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup36], (instregex "SETAEm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETBm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETEm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETGEm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETGm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETLEm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETLm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETNEm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETNOm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETNPm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETNSm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETOm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETPm")>; +def: InstRW<[SBWriteResGroup36], (instregex "SETSm")>; + +def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup37], (instregex "PEXTRBmr")>; +def: InstRW<[SBWriteResGroup37], (instregex "VPEXTRBmr")>; +def: InstRW<[SBWriteResGroup37], (instregex "VPEXTRDmr")>; +def: InstRW<[SBWriteResGroup37], (instregex "VPEXTRWmr")>; + +def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup38], (instregex "MOV8mi")>; +def: InstRW<[SBWriteResGroup38], (instregex "STOSB")>; +def: InstRW<[SBWriteResGroup38], (instregex "STOSL")>; +def: InstRW<[SBWriteResGroup38], (instregex "STOSQ")>; +def: InstRW<[SBWriteResGroup38], (instregex "STOSW")>; + +def SBWriteResGroup39 : SchedWriteRes<[SBPort5,SBPort015]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup39], (instregex "FNINIT")>; + +def SBWriteResGroup40 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup40], (instregex "CMPXCHG32rr")>; +def: InstRW<[SBWriteResGroup40], (instregex "CMPXCHG8rr")>; + +def SBWriteResGroup41 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup41], (instregex "SETAm")>; +def: InstRW<[SBWriteResGroup41], (instregex "SETBEm")>; + +def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup42], (instregex "LDMXCSR")>; +def: InstRW<[SBWriteResGroup42], (instregex "STMXCSR")>; +def: InstRW<[SBWriteResGroup42], (instregex "VLDMXCSR")>; +def: InstRW<[SBWriteResGroup42], (instregex "VSTMXCSR")>; + +def SBWriteResGroup43 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup43], (instregex "PEXTRDmr")>; +def: InstRW<[SBWriteResGroup43], (instregex "PEXTRQmr")>; +def: InstRW<[SBWriteResGroup43], (instregex "VPEXTRQmr")>; +def: InstRW<[SBWriteResGroup43], (instregex "PUSHF16")>; +def: InstRW<[SBWriteResGroup43], (instregex "PUSHF64")>; + +def SBWriteResGroup44 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup44], (instregex "CLFLUSH")>; + +def SBWriteResGroup45 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SBWriteResGroup45], (instregex "FXRSTOR")>; + +def SBWriteResGroup46 : SchedWriteRes<[SBPort23]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup46], (instregex "LDDQUrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOV64toPQIrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVAPDrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVAPSrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVDDUPrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVDI2PDIrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVDQArm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVDQUrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVNTDQArm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVSHDUPrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVSLDUPrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVSSrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVUPDrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "MOVUPSrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "POP64r")>; +def: InstRW<[SBWriteResGroup46], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VLDDQUYrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VLDDQUrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOV64toPQIrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVAPDrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVAPSrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVDDUPrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVDQArm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVDQUrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVNTDQArm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVSDrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVSHDUPrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVSLDUPrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVSSrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVUPDrm")>; +def: InstRW<[SBWriteResGroup46], (instregex "VMOVUPSrm")>; + +def SBWriteResGroup47 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup47], (instregex "JMP64m")>; +def: InstRW<[SBWriteResGroup47], (instregex "MOV64sm")>; + +def SBWriteResGroup48 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup48], (instregex "BT64mi8")>; + +def SBWriteResGroup49 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup49], (instregex "MMX_PABSBrm64")>; +def: InstRW<[SBWriteResGroup49], (instregex "MMX_PABSDrm64")>; +def: InstRW<[SBWriteResGroup49], (instregex "MMX_PABSWrm64")>; +def: InstRW<[SBWriteResGroup49], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[SBWriteResGroup49], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[SBWriteResGroup49], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[SBWriteResGroup49], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[SBWriteResGroup49], (instregex "MMX_PSIGNWrm64")>; + +def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup50], (instregex "ADD64rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "ADD8rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "AND64rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "AND8rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "CMP64mi8")>; +def: InstRW<[SBWriteResGroup50], (instregex "CMP64mr")>; +def: InstRW<[SBWriteResGroup50], (instregex "CMP64rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "CMP8mi")>; +def: InstRW<[SBWriteResGroup50], (instregex "CMP8mr")>; +def: InstRW<[SBWriteResGroup50], (instregex "CMP8rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "LODSL")>; +def: InstRW<[SBWriteResGroup50], (instregex "LODSQ")>; +def: InstRW<[SBWriteResGroup50], (instregex "OR64rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "OR8rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "SUB64rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "SUB8rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "XOR64rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "XOR8rm")>; +def: InstRW<[SBWriteResGroup50], (instregex "POP64rmm")>; +def: InstRW<[SBWriteResGroup50], (instregex "PUSH64rmm")>; + +def SBWriteResGroup51 : SchedWriteRes<[SBPort23]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup51], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VMOVAPDYrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VMOVAPSYrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VMOVDDUPYrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VMOVDQAYrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VMOVDQUYrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VMOVUPDYrm")>; +def: InstRW<[SBWriteResGroup51], (instregex "VMOVUPSYrm")>; + +def SBWriteResGroup52 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup52], (instregex "CVTPS2PDrm")>; +def: InstRW<[SBWriteResGroup52], (instregex "CVTSS2SDrm")>; +def: InstRW<[SBWriteResGroup52], (instregex "VCVTPS2PDYrm")>; +def: InstRW<[SBWriteResGroup52], (instregex "VCVTPS2PDrm")>; +def: InstRW<[SBWriteResGroup52], (instregex "VCVTSS2SDrm")>; +def: InstRW<[SBWriteResGroup52], (instregex "VTESTPDrm")>; +def: InstRW<[SBWriteResGroup52], (instregex "VTESTPSrm")>; + +def SBWriteResGroup53 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup53], (instregex "ANDNPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "ANDNPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "ANDPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "ANDPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "INSERTPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "MOVHPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "MOVHPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "MOVLPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "MOVLPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "ORPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "ORPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "SHUFPDrmi")>; +def: InstRW<[SBWriteResGroup53], (instregex "SHUFPSrmi")>; +def: InstRW<[SBWriteResGroup53], (instregex "UNPCKHPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "UNPCKHPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "UNPCKLPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "UNPCKLPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VANDNPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VANDNPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VANDPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VANDPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VBROADCASTF128")>; +def: InstRW<[SBWriteResGroup53], (instregex "VINSERTPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VMOVHPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VMOVHPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VMOVLPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VMOVLPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VORPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VORPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VPERMILPDmi")>; +def: InstRW<[SBWriteResGroup53], (instregex "VPERMILPDri")>; +def: InstRW<[SBWriteResGroup53], (instregex "VPERMILPSmi")>; +def: InstRW<[SBWriteResGroup53], (instregex "VPERMILPSri")>; +def: InstRW<[SBWriteResGroup53], (instregex "VSHUFPDrmi")>; +def: InstRW<[SBWriteResGroup53], (instregex "VSHUFPSrmi")>; +def: InstRW<[SBWriteResGroup53], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VUNPCKLPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VUNPCKLPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VXORPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "VXORPSrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "XORPDrm")>; +def: InstRW<[SBWriteResGroup53], (instregex "XORPSrm")>; + +def SBWriteResGroup54 : SchedWriteRes<[SBPort5,SBPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup54], (instregex "AESDECLASTrr")>; +def: InstRW<[SBWriteResGroup54], (instregex "AESDECrr")>; +def: InstRW<[SBWriteResGroup54], (instregex "AESENCLASTrr")>; +def: InstRW<[SBWriteResGroup54], (instregex "AESENCrr")>; +def: InstRW<[SBWriteResGroup54], (instregex "KANDQrr")>; +def: InstRW<[SBWriteResGroup54], (instregex "VAESDECLASTrr")>; +def: InstRW<[SBWriteResGroup54], (instregex "VAESDECrr")>; +def: InstRW<[SBWriteResGroup54], (instregex "VAESENCrr")>; + +def SBWriteResGroup55 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup55], (instregex "BLENDPDrmi")>; +def: InstRW<[SBWriteResGroup55], (instregex "BLENDPSrmi")>; +def: InstRW<[SBWriteResGroup55], (instregex "VBLENDPDrmi")>; +def: InstRW<[SBWriteResGroup55], (instregex "VBLENDPSrmi")>; +def: InstRW<[SBWriteResGroup55], (instregex "VINSERTF128rm")>; + +def SBWriteResGroup56 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup56], (instregex "MMX_PADDQirm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PABSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PABSDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PABSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PACKSSDWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PACKSSWBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PACKUSDWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PACKUSWBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PADDBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PADDDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PADDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PADDSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PADDSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PADDUSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PADDUSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PADDWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PALIGNRrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "PAVGBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PAVGWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PBLENDWrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "PCMPEQBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PCMPEQDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PCMPEQQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PCMPEQWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PCMPGTBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PCMPGTDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PCMPGTWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PINSRBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PINSRDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PINSRQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PINSRWrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMAXSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMAXSDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMAXSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMAXUBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMAXUDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMAXUWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMINSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMINSDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMINSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMINUBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMINUDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMINUWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXBDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXBQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXWDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXWQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXBDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXBQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXWDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXWQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSHUFBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSHUFDmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSHUFHWmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSHUFLWmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSIGNBrm128")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSIGNDrm128")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSIGNWrm128")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSUBBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSUBDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSUBQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSUBSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSUBSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSUBUSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSUBUSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PSUBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKHBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKHDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKHWDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKLBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKLDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKLWDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPABSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPABSDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPABSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPACKSSDWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPACKSSWBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPACKUSDWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPACKUSWBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPADDBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPADDDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPADDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPADDSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPADDSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPADDUSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPADDUSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPADDWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPALIGNRrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPAVGBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPAVGWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPBLENDWrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPCMPEQBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPCMPEQDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPCMPEQQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPCMPEQWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPCMPGTBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPCMPGTDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPCMPGTWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPINSRBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPINSRDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPINSRQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPINSRWrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMAXSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMAXSDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMAXSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMAXUBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMAXUDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMAXUWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMINSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMINSDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMINSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMINUBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMINUDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMINUWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXBDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXBQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXWDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXWQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXBDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXBQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXWDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXWQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSHUFBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSHUFDmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSHUFHWmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSHUFLWmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSIGNBrm128")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSIGNDrm128")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSIGNWrm128")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSUBBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSUBDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSUBQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSUBSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSUBSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSUBUSBrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSUBUSWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPSUBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKLWDrm")>; + +def SBWriteResGroup57 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup57], (instregex "PANDNrm")>; +def: InstRW<[SBWriteResGroup57], (instregex "PANDrm")>; +def: InstRW<[SBWriteResGroup57], (instregex "PORrm")>; +def: InstRW<[SBWriteResGroup57], (instregex "PXORrm")>; +def: InstRW<[SBWriteResGroup57], (instregex "VPANDNrm")>; +def: InstRW<[SBWriteResGroup57], (instregex "VPANDrm")>; +def: InstRW<[SBWriteResGroup57], (instregex "VPORrm")>; +def: InstRW<[SBWriteResGroup57], (instregex "VPXORrm")>; + +def SBWriteResGroup58 : SchedWriteRes<[SBPort0,SBPort0]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup58], (instregex "VRCPPSr")>; +def: InstRW<[SBWriteResGroup58], (instregex "VRSQRTPSYr")>; + +def SBWriteResGroup59 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup59], (instregex "VERRm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VERWm")>; + +def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup60], (instregex "LODSB")>; +def: InstRW<[SBWriteResGroup60], (instregex "LODSW")>; + +def SBWriteResGroup61 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup61], (instregex "FARJMP64")>; + +def SBWriteResGroup62 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup62], (instregex "ADC64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "ADC8rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVAE64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVB64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVE64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVG64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVGE64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVL64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVLE64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVNE64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVNO64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVNP64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVNS64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVO64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVP64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "CMOVS64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "SBB64rm")>; +def: InstRW<[SBWriteResGroup62], (instregex "SBB8rm")>; + +def SBWriteResGroup63 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup63], (instregex "FNSTSWm")>; + +def SBWriteResGroup64 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup64], (instregex "SLDT32r")>; +def: InstRW<[SBWriteResGroup64], (instregex "STR32r")>; + +def SBWriteResGroup65 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup65], (instregex "CALL64m")>; +def: InstRW<[SBWriteResGroup65], (instregex "FNSTCW16m")>; + +def SBWriteResGroup66 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup66], (instregex "BTC64mi8")>; +def: InstRW<[SBWriteResGroup66], (instregex "BTR64mi8")>; +def: InstRW<[SBWriteResGroup66], (instregex "BTS64mi8")>; +def: InstRW<[SBWriteResGroup66], (instregex "SAR64mi")>; +def: InstRW<[SBWriteResGroup66], (instregex "SAR8mi")>; +def: InstRW<[SBWriteResGroup66], (instregex "SHL64m1")>; +def: InstRW<[SBWriteResGroup66], (instregex "SHL64mi")>; +def: InstRW<[SBWriteResGroup66], (instregex "SHL8m1")>; +def: InstRW<[SBWriteResGroup66], (instregex "SHL8mi")>; +def: InstRW<[SBWriteResGroup66], (instregex "SHR64mi")>; +def: InstRW<[SBWriteResGroup66], (instregex "SHR8mi")>; + +def SBWriteResGroup67 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup67], (instregex "ADD64mi8")>; +def: InstRW<[SBWriteResGroup67], (instregex "ADD64mr")>; +def: InstRW<[SBWriteResGroup67], (instregex "ADD8mi")>; +def: InstRW<[SBWriteResGroup67], (instregex "ADD8mr")>; +def: InstRW<[SBWriteResGroup67], (instregex "AND64mi8")>; +def: InstRW<[SBWriteResGroup67], (instregex "AND64mr")>; +def: InstRW<[SBWriteResGroup67], (instregex "AND8mi")>; +def: InstRW<[SBWriteResGroup67], (instregex "AND8mr")>; +def: InstRW<[SBWriteResGroup67], (instregex "DEC64m")>; +def: InstRW<[SBWriteResGroup67], (instregex "DEC8m")>; +def: InstRW<[SBWriteResGroup67], (instregex "INC64m")>; +def: InstRW<[SBWriteResGroup67], (instregex "INC8m")>; +def: InstRW<[SBWriteResGroup67], (instregex "NEG64m")>; +def: InstRW<[SBWriteResGroup67], (instregex "NEG8m")>; +def: InstRW<[SBWriteResGroup67], (instregex "NOT64m")>; +def: InstRW<[SBWriteResGroup67], (instregex "NOT8m")>; +def: InstRW<[SBWriteResGroup67], (instregex "OR64mi8")>; +def: InstRW<[SBWriteResGroup67], (instregex "OR64mr")>; +def: InstRW<[SBWriteResGroup67], (instregex "OR8mi")>; +def: InstRW<[SBWriteResGroup67], (instregex "OR8mr")>; +def: InstRW<[SBWriteResGroup67], (instregex "SUB64mi8")>; +def: InstRW<[SBWriteResGroup67], (instregex "SUB64mr")>; +def: InstRW<[SBWriteResGroup67], (instregex "SUB8mi")>; +def: InstRW<[SBWriteResGroup67], (instregex "SUB8mr")>; +def: InstRW<[SBWriteResGroup67], (instregex "TEST64rm")>; +def: InstRW<[SBWriteResGroup67], (instregex "TEST8mi")>; +def: InstRW<[SBWriteResGroup67], (instregex "TEST8rm")>; +def: InstRW<[SBWriteResGroup67], (instregex "XOR64mi8")>; +def: InstRW<[SBWriteResGroup67], (instregex "XOR64mr")>; +def: InstRW<[SBWriteResGroup67], (instregex "XOR8mi")>; +def: InstRW<[SBWriteResGroup67], (instregex "XOR8mr")>; + +def SBWriteResGroup68 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup68], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[SBWriteResGroup68], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[SBWriteResGroup68], (instregex "VTESTPDYrm")>; +def: InstRW<[SBWriteResGroup68], (instregex "VTESTPSYrm")>; + +def SBWriteResGroup69 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup69], (instregex "BSF64rm")>; +def: InstRW<[SBWriteResGroup69], (instregex "BSR64rm")>; +def: InstRW<[SBWriteResGroup69], (instregex "CRC32r32m16")>; +def: InstRW<[SBWriteResGroup69], (instregex "CRC32r32m8")>; +def: InstRW<[SBWriteResGroup69], (instregex "MUL8m")>; + +def SBWriteResGroup70 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup70], (instregex "VANDNPDYrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VANDNPSYrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VANDPDrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VANDPSrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VORPDYrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VORPSYrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VPERM2F128rm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VPERMILPDYri")>; +def: InstRW<[SBWriteResGroup70], (instregex "VPERMILPDmi")>; +def: InstRW<[SBWriteResGroup70], (instregex "VPERMILPSYri")>; +def: InstRW<[SBWriteResGroup70], (instregex "VPERMILPSmi")>; +def: InstRW<[SBWriteResGroup70], (instregex "VSHUFPDYrmi")>; +def: InstRW<[SBWriteResGroup70], (instregex "VSHUFPSYrmi")>; +def: InstRW<[SBWriteResGroup70], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VUNPCKLPSYrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VXORPDrm")>; +def: InstRW<[SBWriteResGroup70], (instregex "VXORPSrm")>; + +def SBWriteResGroup71 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup71], (instregex "VBLENDPDYrmi")>; +def: InstRW<[SBWriteResGroup71], (instregex "VBLENDPSYrmi")>; + +def SBWriteResGroup72 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup72], (instregex "BLENDVPDrm0")>; +def: InstRW<[SBWriteResGroup72], (instregex "BLENDVPSrm0")>; +def: InstRW<[SBWriteResGroup72], (instregex "VBLENDVPDrm")>; +def: InstRW<[SBWriteResGroup72], (instregex "VBLENDVPSrm")>; +def: InstRW<[SBWriteResGroup72], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SBWriteResGroup72], (instregex "VMASKMOVPSrm")>; + +def SBWriteResGroup73 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup73], (instregex "PBLENDVBrr0")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPBLENDVBrm")>; + +def SBWriteResGroup74 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup74], (instregex "COMISDrm")>; +def: InstRW<[SBWriteResGroup74], (instregex "COMISSrm")>; +def: InstRW<[SBWriteResGroup74], (instregex "UCOMISDrm")>; +def: InstRW<[SBWriteResGroup74], (instregex "UCOMISSrm")>; +def: InstRW<[SBWriteResGroup74], (instregex "VCOMISDrm")>; +def: InstRW<[SBWriteResGroup74], (instregex "VCOMISSrm")>; +def: InstRW<[SBWriteResGroup74], (instregex "VUCOMISDrm")>; +def: InstRW<[SBWriteResGroup74], (instregex "VUCOMISSrm")>; + +def SBWriteResGroup75 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup75], (instregex "PTESTrm")>; +def: InstRW<[SBWriteResGroup75], (instregex "VPTESTrm")>; + +def SBWriteResGroup76 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup76], (instregex "PSLLDrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "PSLLQrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "PSLLWrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "PSRADrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "PSRAWrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "PSRLDrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "PSRLQrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "PSRLWrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPSLLDri")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPSLLQri")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPSLLWri")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPSRADrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPSRAWrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPSRLDrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPSRLQrm")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPSRLWrm")>; + +def SBWriteResGroup77 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHADDrm64")>; +def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHSUBSWrm64")>; +def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHSUBWrm64")>; + +def SBWriteResGroup78 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup78], (instregex "CMPXCHG64rm")>; +def: InstRW<[SBWriteResGroup78], (instregex "CMPXCHG8rm")>; + +def SBWriteResGroup79 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup79], (instregex "CMOVA64rm")>; +def: InstRW<[SBWriteResGroup79], (instregex "CMOVBE64rm")>; + +def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [2,3]; +} +def: InstRW<[SBWriteResGroup80], (instregex "CMPSB")>; +def: InstRW<[SBWriteResGroup80], (instregex "CMPSL")>; +def: InstRW<[SBWriteResGroup80], (instregex "CMPSQ")>; +def: InstRW<[SBWriteResGroup80], (instregex "CMPSW")>; + +def SBWriteResGroup81 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,2]; +} +def: InstRW<[SBWriteResGroup81], (instregex "FLDCW16m")>; + +def SBWriteResGroup82 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,2]; +} +def: InstRW<[SBWriteResGroup82], (instregex "ROL64mi")>; +def: InstRW<[SBWriteResGroup82], (instregex "ROL8mi")>; +def: InstRW<[SBWriteResGroup82], (instregex "ROR64mi")>; +def: InstRW<[SBWriteResGroup82], (instregex "ROR8mi")>; + +def SBWriteResGroup83 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,2]; +} +def: InstRW<[SBWriteResGroup83], (instregex "MOVSB")>; +def: InstRW<[SBWriteResGroup83], (instregex "MOVSL")>; +def: InstRW<[SBWriteResGroup83], (instregex "MOVSQ")>; +def: InstRW<[SBWriteResGroup83], (instregex "MOVSW")>; +def: InstRW<[SBWriteResGroup83], (instregex "XADD64rm")>; +def: InstRW<[SBWriteResGroup83], (instregex "XADD8rm")>; + +def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SBWriteResGroup84], (instregex "FARCALL64")>; + +def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SBWriteResGroup85], (instregex "SHLD64mri8")>; +def: InstRW<[SBWriteResGroup85], (instregex "SHRD64mri8")>; + +def SBWriteResGroup86 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup86], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PMADDUBSWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PMADDWDrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PMULDQrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PMULHRSWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PMULHUWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PMULHWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PMULLDrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PMULLWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PMULUDQrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "PSADBWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPMADDUBSWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPMADDWDrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPMULDQrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPMULHRSWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPMULHUWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPMULHWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPMULLDrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPMULLWrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPMULUDQrm")>; +def: InstRW<[SBWriteResGroup86], (instregex "VPSADBWrm")>; + +def SBWriteResGroup87 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup87], (instregex "ADDPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "ADDPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "ADDSDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "ADDSSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "ADDSUBPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "ADDSUBPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "CMPPDrmi")>; +def: InstRW<[SBWriteResGroup87], (instregex "CMPPSrmi")>; +def: InstRW<[SBWriteResGroup87], (instregex "CMPSSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "CVTDQ2PSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "CVTPS2DQrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "CVTSI2SD64rm")>; +def: InstRW<[SBWriteResGroup87], (instregex "CVTSI2SDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "CVTTPS2DQrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MAXPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MAXPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MAXSDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MAXSSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MINPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MINPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MINSDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MINSSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[SBWriteResGroup87], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[SBWriteResGroup87], (instregex "POPCNT64rm")>; +def: InstRW<[SBWriteResGroup87], (instregex "ROUNDPDm")>; +def: InstRW<[SBWriteResGroup87], (instregex "ROUNDPSm")>; +def: InstRW<[SBWriteResGroup87], (instregex "ROUNDSDm")>; +def: InstRW<[SBWriteResGroup87], (instregex "ROUNDSSm")>; +def: InstRW<[SBWriteResGroup87], (instregex "SUBPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "SUBPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "SUBSDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "SUBSSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VADDPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VADDPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VADDSDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VADDSSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VADDSUBPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VADDSUBPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VCMPPDrmi")>; +def: InstRW<[SBWriteResGroup87], (instregex "VCMPPSrmi")>; +def: InstRW<[SBWriteResGroup87], (instregex "VCMPSDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VCMPSSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VCVTPS2DQrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VCVTSI2SD64rm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VCVTSI2SDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VMAXPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VMAXPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VMAXSDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VMAXSSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VMINPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VMINPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VMINSDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VMINSSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VROUNDPDm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VROUNDPSm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VROUNDSDm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VROUNDSSm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VSUBPDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VSUBPSrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VSUBSDrm")>; +def: InstRW<[SBWriteResGroup87], (instregex "VSUBSSrm")>; + +def SBWriteResGroup88 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup88], (instregex "VBLENDVPDYrm")>; +def: InstRW<[SBWriteResGroup88], (instregex "VBLENDVPSYrm")>; +def: InstRW<[SBWriteResGroup88], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SBWriteResGroup88], (instregex "VMASKMOVPSrm")>; + +def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup89], (instregex "DPPDrri")>; +def: InstRW<[SBWriteResGroup89], (instregex "VDPPDrri")>; + +def SBWriteResGroup90 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup90], (instregex "CVTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTSD2SIrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTSS2SIrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTTSD2SIrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTTSS2SIrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MUL64m")>; + +def SBWriteResGroup91 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup91], (instregex "VPTESTYrm")>; + +def SBWriteResGroup92 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup92], (instregex "PHADDDrm")>; +def: InstRW<[SBWriteResGroup92], (instregex "PHADDSWrm128")>; +def: InstRW<[SBWriteResGroup92], (instregex "PHADDWrm")>; +def: InstRW<[SBWriteResGroup92], (instregex "PHSUBDrm")>; +def: InstRW<[SBWriteResGroup92], (instregex "PHSUBSWrm128")>; +def: InstRW<[SBWriteResGroup92], (instregex "PHSUBWrm")>; +def: InstRW<[SBWriteResGroup92], (instregex "VPHADDDrm")>; +def: InstRW<[SBWriteResGroup92], (instregex "VPHADDSWrm128")>; +def: InstRW<[SBWriteResGroup92], (instregex "VPHADDWrm")>; +def: InstRW<[SBWriteResGroup92], (instregex "VPHSUBDrm")>; +def: InstRW<[SBWriteResGroup92], (instregex "VPHSUBSWrm128")>; +def: InstRW<[SBWriteResGroup92], (instregex "VPHSUBWrm")>; +def: InstRW<[SBWriteResGroup92], (instregex "SHL64mCL")>; +def: InstRW<[SBWriteResGroup92], (instregex "SHL8mCL")>; + +def SBWriteResGroup93 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 9; + let NumMicroOps = 6; + let ResourceCycles = [1,2,3]; +} +def: InstRW<[SBWriteResGroup93], (instregex "ADC64mi8")>; +def: InstRW<[SBWriteResGroup93], (instregex "ADC8mi")>; +def: InstRW<[SBWriteResGroup93], (instregex "SBB64mi8")>; +def: InstRW<[SBWriteResGroup93], (instregex "SBB8mi")>; + +def SBWriteResGroup94 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { + let Latency = 9; + let NumMicroOps = 6; + let ResourceCycles = [1,2,2,1]; +} +def: InstRW<[SBWriteResGroup94], (instregex "ADC64mr")>; +def: InstRW<[SBWriteResGroup94], (instregex "ADC8mr")>; +def: InstRW<[SBWriteResGroup94], (instregex "SBB64mr")>; +def: InstRW<[SBWriteResGroup94], (instregex "SBB8mr")>; + +def SBWriteResGroup95 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> { + let Latency = 9; + let NumMicroOps = 6; + let ResourceCycles = [1,1,2,1,1]; +} +def: InstRW<[SBWriteResGroup95], (instregex "BT64mr")>; +def: InstRW<[SBWriteResGroup95], (instregex "BTC64mr")>; +def: InstRW<[SBWriteResGroup95], (instregex "BTR64mr")>; +def: InstRW<[SBWriteResGroup95], (instregex "BTS64mr")>; +def: InstRW<[SBWriteResGroup95], (instregex "VADDPDYrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VADDPSYrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VADDSUBPDYrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VADDSUBPSYrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VCMPPDYrmi")>; +def: InstRW<[SBWriteResGroup95], (instregex "VCMPPSYrmi")>; +def: InstRW<[SBWriteResGroup95], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VMAXPDYrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VMAXPSYrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VMINPDrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VMINPSrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VROUNDPDm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VROUNDPSm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VSUBPDYrm")>; +def: InstRW<[SBWriteResGroup95], (instregex "VSUBPSYrm")>; + +def SBWriteResGroup96 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup96], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup96], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VCVTSS2SIrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup96], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VCVTTSS2SIrm")>; + +def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup97], (instregex "CVTDQ2PDrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "CVTPD2DQrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "CVTPD2PSrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "CVTSD2SSrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "CVTSI2SS64rm")>; +def: InstRW<[SBWriteResGroup97], (instregex "CVTSI2SSrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "CVTTPD2DQrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[SBWriteResGroup97], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[SBWriteResGroup97], (instregex "MMX_CVTTPD2PIirm")>; +def: InstRW<[SBWriteResGroup97], (instregex "VCVTDQ2PDYrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "VCVTDQ2PDrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "VCVTPD2DQrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "VCVTPD2PSrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "VCVTSD2SSrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "VCVTSI2SS64rm")>; +def: InstRW<[SBWriteResGroup97], (instregex "VCVTSI2SSrm")>; +def: InstRW<[SBWriteResGroup97], (instregex "VCVTTPD2DQrm")>; + +def SBWriteResGroup98 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup98], (instregex "MULPDrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "MULPSrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "MULSDrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "MULSSrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "PCMPGTQrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[SBWriteResGroup98], (instregex "RCPPSm")>; +def: InstRW<[SBWriteResGroup98], (instregex "RCPSSm")>; +def: InstRW<[SBWriteResGroup98], (instregex "RSQRTPSm")>; +def: InstRW<[SBWriteResGroup98], (instregex "RSQRTSSm")>; +def: InstRW<[SBWriteResGroup98], (instregex "VMULPDrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "VMULPSrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "VMULSDrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "VMULSSrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "VPCMPGTQrm")>; +def: InstRW<[SBWriteResGroup98], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[SBWriteResGroup98], (instregex "VRCPPSm")>; +def: InstRW<[SBWriteResGroup98], (instregex "VRCPSSm")>; +def: InstRW<[SBWriteResGroup98], (instregex "VRSQRTPSm")>; +def: InstRW<[SBWriteResGroup98], (instregex "VRSQRTSSm")>; + +def SBWriteResGroup99 : SchedWriteRes<[SBPort0]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SBWriteResGroup99], (instregex "PCMPISTRIrr")>; +def: InstRW<[SBWriteResGroup99], (instregex "PCMPISTRM128rr")>; +def: InstRW<[SBWriteResGroup99], (instregex "VPCMPISTRIrr")>; +def: InstRW<[SBWriteResGroup99], (instregex "VPCMPISTRM128rr")>; + +def SBWriteResGroup100 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup100], (instregex "VCVTPD2DQYrm")>; +def: InstRW<[SBWriteResGroup100], (instregex "VCVTPD2PSYrm")>; +def: InstRW<[SBWriteResGroup100], (instregex "VCVTTPD2DQYrm")>; + +def SBWriteResGroup101 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup101], (instregex "MPSADBWrmi")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMPSADBWrmi")>; + +def SBWriteResGroup102 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup102], (instregex "HADDPDrm")>; +def: InstRW<[SBWriteResGroup102], (instregex "HADDPSrm")>; +def: InstRW<[SBWriteResGroup102], (instregex "HSUBPDrm")>; +def: InstRW<[SBWriteResGroup102], (instregex "HSUBPSrm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VHADDPDrm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VHADDPSrm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VHSUBPDrm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VHSUBPSrm")>; + +def SBWriteResGroup103 : SchedWriteRes<[SBPort5]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup103], (instregex "AESIMCrr")>; +def: InstRW<[SBWriteResGroup103], (instregex "VAESIMCrr")>; +def: InstRW<[SBWriteResGroup103], (instregex "VMULPDYrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VMULPSYrm")>; + +def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup104], (instregex "DPPSrri")>; +def: InstRW<[SBWriteResGroup104], (instregex "VDPPSYrri")>; +def: InstRW<[SBWriteResGroup104], (instregex "VDPPSrri")>; + +def SBWriteResGroup105 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup105], (instregex "VHADDPDrm")>; +def: InstRW<[SBWriteResGroup105], (instregex "VHADDPSYrm")>; +def: InstRW<[SBWriteResGroup105], (instregex "VHSUBPDYrm")>; +def: InstRW<[SBWriteResGroup105], (instregex "VHSUBPSYrm")>; + +def SBWriteResGroup106 : SchedWriteRes<[SBPort5,SBPort23,SBPort015]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup106], (instregex "AESDECLASTrm")>; +def: InstRW<[SBWriteResGroup106], (instregex "AESDECrm")>; +def: InstRW<[SBWriteResGroup106], (instregex "AESENCLASTrm")>; +def: InstRW<[SBWriteResGroup106], (instregex "AESENCrm")>; +def: InstRW<[SBWriteResGroup106], (instregex "VAESDECLASTrm")>; +def: InstRW<[SBWriteResGroup106], (instregex "VAESDECrm")>; +def: InstRW<[SBWriteResGroup106], (instregex "VAESENCLASTrm")>; +def: InstRW<[SBWriteResGroup106], (instregex "VAESENCrm")>; + +def SBWriteResGroup107 : SchedWriteRes<[SBPort0]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup107], (instregex "DIVPSrr")>; +def: InstRW<[SBWriteResGroup107], (instregex "DIVSSrr")>; +def: InstRW<[SBWriteResGroup107], (instregex "SQRTPSr")>; +def: InstRW<[SBWriteResGroup107], (instregex "SQRTSSr")>; +def: InstRW<[SBWriteResGroup107], (instregex "VDIVPSrr")>; +def: InstRW<[SBWriteResGroup107], (instregex "VDIVSSrr")>; +def: InstRW<[SBWriteResGroup107], (instregex "VSQRTPSr")>; + +def SBWriteResGroup108 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 14; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup108], (instregex "VSQRTSSm")>; + +def SBWriteResGroup109 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SBWriteResGroup109], (instregex "VRCPPSm")>; +def: InstRW<[SBWriteResGroup109], (instregex "VRSQRTPSYm")>; + +def SBWriteResGroup110 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { + let Latency = 15; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup110], (instregex "DPPDrmi")>; +def: InstRW<[SBWriteResGroup110], (instregex "VDPPDrmi")>; + +def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 17; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; +} +def: InstRW<[SBWriteResGroup111], (instregex "PCMPISTRIrm")>; +def: InstRW<[SBWriteResGroup111], (instregex "PCMPISTRM128rm")>; +def: InstRW<[SBWriteResGroup111], (instregex "VPCMPISTRIrm")>; +def: InstRW<[SBWriteResGroup111], (instregex "VPCMPISTRM128rm")>; + +def SBWriteResGroup112 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 18; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup112], (instregex "AESIMCrm")>; +def: InstRW<[SBWriteResGroup112], (instregex "VAESIMCrm")>; + +def SBWriteResGroup113 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 20; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup113], (instregex "DIVPSrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "DIVSSrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "SQRTPSm")>; +def: InstRW<[SBWriteResGroup113], (instregex "SQRTSSm")>; +def: InstRW<[SBWriteResGroup113], (instregex "VDIVPSrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "VDIVSSrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "VSQRTPSm")>; + +def SBWriteResGroup114 : SchedWriteRes<[SBPort0]> { + let Latency = 21; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup114], (instregex "VSQRTSDr")>; + +def SBWriteResGroup115 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup115], (instregex "VSQRTSDm")>; + +def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> { + let Latency = 22; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup116], (instregex "DIVPDrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "DIVSDrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "SQRTPDr")>; +def: InstRW<[SBWriteResGroup116], (instregex "SQRTSDr")>; +def: InstRW<[SBWriteResGroup116], (instregex "VDIVPDrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "VDIVSDrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "VSQRTPDr")>; + +def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 28; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup117], (instregex "DIVPDrm")>; +def: InstRW<[SBWriteResGroup117], (instregex "DIVSDrm")>; +def: InstRW<[SBWriteResGroup117], (instregex "SQRTPDm")>; +def: InstRW<[SBWriteResGroup117], (instregex "SQRTSDm")>; +def: InstRW<[SBWriteResGroup117], (instregex "VDIVPDrm")>; +def: InstRW<[SBWriteResGroup117], (instregex "VDIVSDrm")>; +def: InstRW<[SBWriteResGroup117], (instregex "VSQRTPDm")>; + +def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort0]> { + let Latency = 29; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup118], (instregex "VDIVPSYrr")>; +def: InstRW<[SBWriteResGroup118], (instregex "VSQRTPSYr")>; + +def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { + let Latency = 36; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SBWriteResGroup119], (instregex "VDIVPSYrm")>; +def: InstRW<[SBWriteResGroup119], (instregex "VSQRTPSYm")>; + +def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort0]> { + let Latency = 45; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup120], (instregex "VDIVPDYrr")>; +def: InstRW<[SBWriteResGroup120], (instregex "VSQRTPDYr")>; + +def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { + let Latency = 52; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SBWriteResGroup121], (instregex "VDIVPDYrm")>; +def: InstRW<[SBWriteResGroup121], (instregex "VSQRTPDYm")>; + +def SBWriteResGroup122 : SchedWriteRes<[SBPort0]> { + let Latency = 114; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup122], (instregex "VSQRTSSr")>; + } // SchedModel Index: llvm/trunk/test/CodeGen/X86/avx-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll @@ -10,14 +10,14 @@ ; SANDY-LABEL: test_addpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addpd: ; BTVER2: # BB#0: @@ -40,14 +40,14 @@ ; SANDY-LABEL: test_addps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addps: ; BTVER2: # BB#0: @@ -70,14 +70,14 @@ ; SANDY-LABEL: test_addsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsubpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # BB#0: @@ -101,14 +101,14 @@ ; SANDY-LABEL: test_addsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsubps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # BB#0: @@ -131,17 +131,17 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_andnotpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # BB#0: @@ -172,17 +172,17 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_andnotps: ; SANDY: # BB#0: -; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # BB#0: @@ -216,14 +216,14 @@ ; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_andpd: ; BTVER2: # BB#0: @@ -255,14 +255,14 @@ ; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_andps: ; BTVER2: # BB#0: @@ -291,17 +291,17 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_blendpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] +; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # BB#0: @@ -326,15 +326,15 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_blendps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] -; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] -; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_blendps: ; BTVER2: # BB#0: @@ -356,15 +356,15 @@ define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { ; SANDY-LABEL: test_blendvpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: @@ -387,15 +387,15 @@ define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { ; SANDY-LABEL: test_blendvps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: @@ -418,13 +418,13 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) { ; SANDY-LABEL: test_broadcastf128: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_broadcastf128: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [4:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [?:5.000000e-01] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_broadcastf128: ; BTVER2: # BB#0: @@ -443,13 +443,13 @@ define <4 x double> @test_broadcastsd_ymm(double *%a0) { ; SANDY-LABEL: test_broadcastsd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_broadcastsd_ymm: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_broadcastsd_ymm: ; BTVER2: # BB#0: @@ -469,13 +469,13 @@ define <4 x float> @test_broadcastss(float *%a0) { ; SANDY-LABEL: test_broadcastss: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_broadcastss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [4:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_broadcastss: ; BTVER2: # BB#0: @@ -496,12 +496,12 @@ ; SANDY-LABEL: test_broadcastss_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_broadcastss_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_broadcastss_ymm: ; BTVER2: # BB#0: @@ -521,17 +521,17 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_cmppd: ; SANDY: # BB#0: -; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] +; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vorpd %ymm2, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: @@ -559,17 +559,17 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_cmpps: ; SANDY: # BB#0: -; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm2 # sched: [9:1.00] +; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vorps %ymm2, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: @@ -598,16 +598,16 @@ ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] +; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [6:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: @@ -632,19 +632,19 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:1.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [4:1.00] +; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:1.00] +; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [3:1.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: @@ -669,17 +669,17 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [10:1.00] +; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00] ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # BB#0: @@ -704,17 +704,17 @@ define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [9:1.00] +; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00] +; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00] ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: @@ -741,15 +741,15 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00] -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00] ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # BB#0: @@ -774,15 +774,15 @@ define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00] +; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [27:2.00] -; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [31:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:2.00] +; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [35:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_divpd: ; BTVER2: # BB#0: @@ -804,15 +804,15 @@ define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: -; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00] +; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [19:2.00] -; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [23:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:2.00] +; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [21:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_divps: ; BTVER2: # BB#0: @@ -834,15 +834,15 @@ define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_dpps: ; SANDY: # BB#0: -; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] ; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_dpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00] -; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [18:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_dpps: ; BTVER2: # BB#0: @@ -866,16 +866,16 @@ ; SANDY-LABEL: test_extractf128: ; SANDY: # BB#0: ; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_extractf128: ; HASWELL: # BB#0: ; HASWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [4:1.00] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_extractf128: ; BTVER2: # BB#0: @@ -900,13 +900,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_haddpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: @@ -929,15 +929,15 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_haddps: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_haddps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: @@ -960,15 +960,15 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_hsubpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_hsubpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: @@ -991,15 +991,15 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_hsubps: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_hsubps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: @@ -1023,16 +1023,16 @@ ; SANDY-LABEL: test_insertf128: ; SANDY: # BB#0: ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_insertf128: ; HASWELL: # BB#0: ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_insertf128: ; BTVER2: # BB#0: @@ -1059,13 +1059,13 @@ define <32 x i8> @test_lddqu(i8* %a0) { ; SANDY-LABEL: test_lddqu: ; SANDY: # BB#0: -; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_lddqu: ; HASWELL: # BB#0: -; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [4:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # BB#0: @@ -1084,17 +1084,17 @@ define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) { ; SANDY-LABEL: test_maskmovpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00] -; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] +; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [4:2.00] -; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [13:1.00] +; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:1.00] ; HASWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maskmovpd: ; BTVER2: # BB#0: @@ -1119,29 +1119,29 @@ define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) { ; SANDY-LABEL: test_maskmovpd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00] +; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) ; SANDY-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovpd_ymm: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [4:2.00] -; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [14:1.00] +; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [4:1.00] +; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) ; HASWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maskmovpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] +; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) ; BTVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maskmovpd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; ZNVER1-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] +; ZNVER1-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) ; ZNVER1-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %a1) @@ -1154,17 +1154,17 @@ define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) { ; SANDY-LABEL: test_maskmovps: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00] -; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00] +; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [4:2.00] -; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [13:1.00] +; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [2:2.00] +; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:1.00] ; HASWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maskmovps: ; BTVER2: # BB#0: @@ -1189,29 +1189,29 @@ define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) { ; SANDY-LABEL: test_maskmovps_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] +; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) ; SANDY-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovps_ymm: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [4:2.00] -; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [14:1.00] +; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] +; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) ; HASWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maskmovps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] +; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) ; BTVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maskmovps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; ZNVER1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] +; ZNVER1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) ; ZNVER1-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %a1) @@ -1225,14 +1225,14 @@ ; SANDY-LABEL: test_maxpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: @@ -1256,14 +1256,14 @@ ; SANDY-LABEL: test_maxps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: @@ -1288,13 +1288,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: @@ -1319,13 +1319,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: @@ -1348,17 +1348,17 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movapd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [4:0.50] +; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movapd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movapd: ; BTVER2: # BB#0: @@ -1382,17 +1382,17 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movaps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [4:0.50] +; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movaps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movaps: ; BTVER2: # BB#0: @@ -1417,16 +1417,16 @@ ; SANDY-LABEL: test_movddup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [4:0.50] +; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movddup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [4:0.50] +; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # BB#0: @@ -1451,15 +1451,15 @@ define i32 @test_movmskpd(<4 x double> %a0) { ; SANDY-LABEL: test_movmskpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.33] +; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] +; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: @@ -1479,15 +1479,15 @@ define i32 @test_movmskps(<8 x float> %a0) { ; SANDY-LABEL: test_movmskps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.33] +; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [5:1.00] +; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: @@ -1508,14 +1508,14 @@ ; SANDY-LABEL: test_movntpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: @@ -1537,14 +1537,14 @@ ; SANDY-LABEL: test_movntps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: @@ -1566,16 +1566,16 @@ ; SANDY-LABEL: test_movshdup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [4:0.50] +; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movshdup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [4:0.50] +; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # BB#0: @@ -1601,16 +1601,16 @@ ; SANDY-LABEL: test_movsldup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [4:0.50] +; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movsldup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [4:0.50] +; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # BB#0: @@ -1635,19 +1635,19 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movupd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] +; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movupd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movupd: ; BTVER2: # BB#0: @@ -1671,19 +1671,19 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movups: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movups: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movups: ; BTVER2: # BB#0: @@ -1708,14 +1708,14 @@ ; SANDY-LABEL: test_mulpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # BB#0: @@ -1738,14 +1738,14 @@ ; SANDY-LABEL: test_mulps: ; SANDY: # BB#0: ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_mulps: ; BTVER2: # BB#0: @@ -1767,17 +1767,17 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: orpd: ; SANDY: # BB#0: -; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: orpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: orpd: ; BTVER2: # BB#0: @@ -1806,17 +1806,17 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_orps: ; SANDY: # BB#0: -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_orps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_orps: ; BTVER2: # BB#0: @@ -1846,16 +1846,16 @@ ; SANDY-LABEL: test_permilpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [5:1.00] +; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [5:1.00] +; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_permilpd: ; BTVER2: # BB#0: @@ -1880,17 +1880,17 @@ define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_permilpd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] +; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00] ; SANDY-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilpd_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] ; HASWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_permilpd_ymm: ; BTVER2: # BB#0: @@ -1916,16 +1916,16 @@ ; SANDY-LABEL: test_permilps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00] +; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00] +; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_permilps: ; BTVER2: # BB#0: @@ -1950,17 +1950,17 @@ define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_permilps_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] +; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00] ; SANDY-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilps_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] ; HASWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_permilps_ymm: ; BTVER2: # BB#0: @@ -1986,14 +1986,14 @@ ; SANDY-LABEL: test_permilvarpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilvarpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_permilvarpd: ; BTVER2: # BB#0: @@ -2018,13 +2018,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; SANDY-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilvarpd_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_permilvarpd_ymm: ; BTVER2: # BB#0: @@ -2048,14 +2048,14 @@ ; SANDY-LABEL: test_permilvarps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilvarps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_permilvarps: ; BTVER2: # BB#0: @@ -2080,13 +2080,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; SANDY-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilvarps_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_permilvarps_ymm: ; BTVER2: # BB#0: @@ -2112,14 +2112,14 @@ ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vrcpps (%rdi), %ymm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rcpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # BB#0: @@ -2148,14 +2148,14 @@ ; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:2.00] -; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:2.00] +; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: @@ -2184,14 +2184,14 @@ ; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [7:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:2.00] -; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:2.00] +; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [7:1.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: @@ -2217,17 +2217,17 @@ define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_rsqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [9:1.00] +; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:3.00] +; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:3.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rsqrtps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00] +; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # BB#0: @@ -2254,16 +2254,16 @@ ; SANDY-LABEL: test_shufpd: ; SANDY: # BB#0: ; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [5:1.00] +; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [5:1.00] +; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # BB#0: @@ -2289,14 +2289,14 @@ ; SANDY-LABEL: test_shufps: ; SANDY: # BB#0: ; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_shufps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_shufps: ; BTVER2: # BB#0: @@ -2318,17 +2318,17 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:3.00] +; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:3.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [32:2.00] -; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [28:2.00] +; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:2.00] +; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:2.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # BB#0: @@ -2354,17 +2354,17 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:3.00] +; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:3.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [23:2.00] -; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [19:2.00] +; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [21:2.00] +; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:2.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # BB#0: @@ -2391,14 +2391,14 @@ ; SANDY-LABEL: test_subpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_subpd: ; BTVER2: # BB#0: @@ -2421,14 +2421,14 @@ ; SANDY-LABEL: test_subps: ; SANDY: # BB#0: ; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_subps: ; BTVER2: # BB#0: @@ -2451,20 +2451,20 @@ ; SANDY-LABEL: test_testpd: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: setb %al # sched: [1:0.50] -; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: setb %al # sched: [1:1.00] +; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: adcl $0, %eax # sched: [1:0.25] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_testpd: ; BTVER2: # BB#0: @@ -2495,22 +2495,22 @@ ; SANDY-LABEL: test_testpd_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testpd_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: setb %al # sched: [1:0.50] -; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: setb %al # sched: [1:1.00] +; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: adcl $0, %eax # sched: [1:0.25] +; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_testpd_ymm: ; BTVER2: # BB#0: @@ -2542,20 +2542,20 @@ ; SANDY-LABEL: test_testps: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testps: ; HASWELL: # BB#0: ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: setb %al # sched: [1:0.50] -; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: setb %al # sched: [1:1.00] +; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: adcl $0, %eax # sched: [1:0.25] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_testps: ; BTVER2: # BB#0: @@ -2586,22 +2586,22 @@ ; SANDY-LABEL: test_testps_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testps_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: setb %al # sched: [1:0.50] -; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: setb %al # sched: [1:1.00] +; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: adcl $0, %eax # sched: [1:0.25] +; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_testps_ymm: ; BTVER2: # BB#0: @@ -2635,14 +2635,14 @@ ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # BB#0: @@ -2669,13 +2669,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpckhps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # BB#0: @@ -2698,16 +2698,16 @@ ; SANDY-LABEL: test_unpcklpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [5:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [5:1.00] +; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # BB#0: @@ -2733,14 +2733,14 @@ ; SANDY-LABEL: test_unpcklps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpcklps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # BB#0: @@ -2765,14 +2765,14 @@ ; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # BB#0: @@ -2804,14 +2804,14 @@ ; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_xorps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_xorps: ; BTVER2: # BB#0: @@ -2841,12 +2841,12 @@ ; SANDY-LABEL: test_zeroall: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroall # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_zeroall: ; HASWELL: # BB#0: -; HASWELL-NEXT: vzeroall # sched: [1:0.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vzeroall # sched: [16:16.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_zeroall: ; BTVER2: # BB#0: @@ -2866,12 +2866,12 @@ ; SANDY-LABEL: test_zeroupper: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_zeroupper: ; HASWELL: # BB#0: -; HASWELL-NEXT: vzeroupper # sched: [1:0.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vzeroupper # sched: [4:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_zeroupper: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ llvm/trunk/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1619,10 +1619,10 @@ ; ; AVX512VL-LABEL: test_gather_mask: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] -; AVX512VL-NEXT: vgatherdps %ymm3, (%eax,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x88] ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] +; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] +; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] +; AVX512VL-NEXT: vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89] ; AVX512VL-NEXT: vmovups %ymm2, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a_i8 = bitcast float* %a to i8* Index: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll +++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll @@ -9,7 +9,7 @@ ; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [5:0.50] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pabsb: ; ZNVER1: # BB#0: @@ -29,9 +29,9 @@ ; HASWELL-LABEL: test_pabsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [5:0.50] +; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pabsd: ; ZNVER1: # BB#0: @@ -51,9 +51,9 @@ ; HASWELL-LABEL: test_pabsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [5:0.50] +; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pabsw: ; ZNVER1: # BB#0: @@ -74,7 +74,7 @@ ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_paddb: ; ZNVER1: # BB#0: @@ -92,7 +92,7 @@ ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_paddd: ; ZNVER1: # BB#0: @@ -109,8 +109,8 @@ ; HASWELL-LABEL: test_paddq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_paddq: ; ZNVER1: # BB#0: @@ -128,7 +128,7 @@ ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_paddw: ; ZNVER1: # BB#0: @@ -145,9 +145,9 @@ ; HASWELL-LABEL: test_pand: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pand: ; ZNVER1: # BB#0: @@ -166,9 +166,9 @@ ; HASWELL-LABEL: test_pandn: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:0.50] +; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pandn: ; ZNVER1: # BB#0: @@ -190,7 +190,7 @@ ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pmulld: ; ZNVER1: # BB#0: @@ -207,8 +207,8 @@ ; HASWELL-LABEL: test_pmullw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pmullw: ; ZNVER1: # BB#0: @@ -225,9 +225,9 @@ ; HASWELL-LABEL: test_por: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_por: ; ZNVER1: # BB#0: @@ -246,8 +246,8 @@ ; HASWELL-LABEL: test_psubb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_psubb: ; ZNVER1: # BB#0: @@ -264,8 +264,8 @@ ; HASWELL-LABEL: test_psubd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_psubd: ; ZNVER1: # BB#0: @@ -282,8 +282,8 @@ ; HASWELL-LABEL: test_psubq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_psubq: ; ZNVER1: # BB#0: @@ -300,8 +300,8 @@ ; HASWELL-LABEL: test_psubw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_psubw: ; ZNVER1: # BB#0: @@ -318,9 +318,9 @@ ; HASWELL-LABEL: test_pxor: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; ZNVER1-LABEL: test_pxor: ; ZNVER1: # BB#0: Index: llvm/trunk/test/CodeGen/X86/avx2-vector-shifts.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx2-vector-shifts.ll +++ llvm/trunk/test/CodeGen/X86/avx2-vector-shifts.ll @@ -381,6 +381,7 @@ ; X32-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: srl_trunc_and_v4i64: ; X64: ## BB#0: @@ -391,6 +392,7 @@ ; X64-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq +; X64-NEXT: ## -- End function %and = and <4 x i64> %y, %trunc = trunc <4 x i64> %and to <4 x i32> %sra = lshr <4 x i32> %x, %trunc @@ -412,6 +414,7 @@ ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: shl_8i16: ; X64: ## BB#0: @@ -423,6 +426,7 @@ ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq +; X64-NEXT: ## -- End function %shl = shl <8 x i16> %r, %a ret <8 x i16> %shl } @@ -434,13 +438,14 @@ ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X32-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 -; X32-NEXT: vpsrld $16, %ymm3, %ymm3 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X32-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpsrld $16, %ymm3, %ymm1 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0 -; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 +; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: shl_16i16: ; X64: ## BB#0: @@ -448,13 +453,14 @@ ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X64-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 -; X64-NEXT: vpsrld $16, %ymm3, %ymm3 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X64-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpsrld $16, %ymm3, %ymm1 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0 -; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 +; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function %shl = shl <16 x i16> %r, %a ret <16 x i16> %shl } @@ -474,6 +480,7 @@ ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: shl_32i8: ; X64: ## BB#0: @@ -489,6 +496,7 @@ ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function %shl = shl <32 x i8> %r, %a ret <32 x i8> %shl } @@ -504,6 +512,7 @@ ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: ashr_8i16: ; X64: ## BB#0: @@ -515,6 +524,7 @@ ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq +; X64-NEXT: ## -- End function %ashr = ashr <8 x i16> %r, %a ret <8 x i16> %ashr } @@ -526,13 +536,14 @@ ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X32-NEXT: vpsravd %ymm3, %ymm4, %ymm3 -; X32-NEXT: vpsrld $16, %ymm3, %ymm3 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X32-NEXT: vpsravd %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpsrld $16, %ymm3, %ymm1 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0 -; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 +; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: ashr_16i16: ; X64: ## BB#0: @@ -540,13 +551,14 @@ ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X64-NEXT: vpsravd %ymm3, %ymm4, %ymm3 -; X64-NEXT: vpsrld $16, %ymm3, %ymm3 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X64-NEXT: vpsravd %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpsrld $16, %ymm3, %ymm1 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0 -; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 +; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function %ashr = ashr <16 x i16> %r, %a ret <16 x i16> %ashr } @@ -579,6 +591,7 @@ ; X32-NEXT: vpsrlw $8, %ymm0, %ymm0 ; X32-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: ashr_32i8: ; X64: ## BB#0: @@ -607,6 +620,7 @@ ; X64-NEXT: vpsrlw $8, %ymm0, %ymm0 ; X64-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function %ashr = ashr <32 x i8> %r, %a ret <32 x i8> %ashr } @@ -622,6 +636,7 @@ ; X32-NEXT: ## kill: %XMM0 %XMM0 %YMM0 ; X32-NEXT: vzeroupper ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: lshr_8i16: ; X64: ## BB#0: @@ -633,6 +648,7 @@ ; X64-NEXT: ## kill: %XMM0 %XMM0 %YMM0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq +; X64-NEXT: ## -- End function %lshr = lshr <8 x i16> %r, %a ret <8 x i16> %lshr } @@ -644,13 +660,14 @@ ; X32-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X32-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X32-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 -; X32-NEXT: vpsrld $16, %ymm3, %ymm3 ; X32-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X32-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X32-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; X32-NEXT: vpsrld $16, %ymm3, %ymm1 ; X32-NEXT: vpsrld $16, %ymm0, %ymm0 -; X32-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 +; X32-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: lshr_16i16: ; X64: ## BB#0: @@ -658,13 +675,14 @@ ; X64-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] ; X64-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] ; X64-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 -; X64-NEXT: vpsrld $16, %ymm3, %ymm3 ; X64-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] ; X64-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; X64-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; X64-NEXT: vpsrld $16, %ymm3, %ymm1 ; X64-NEXT: vpsrld $16, %ymm0, %ymm0 -; X64-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 +; X64-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function %lshr = lshr <16 x i16> %r, %a ret <16 x i16> %lshr } @@ -685,6 +703,7 @@ ; X32-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X32-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X32-NEXT: retl +; X32-NEXT: ## -- End function ; ; X64-LABEL: lshr_32i8: ; X64: ## BB#0: @@ -701,6 +720,7 @@ ; X64-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; X64-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; X64-NEXT: retq +; X64-NEXT: ## -- End function %lshr = lshr <32 x i8> %r, %a ret <32 x i8> %lshr } Index: llvm/trunk/test/CodeGen/X86/avx512-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512-cmp.ll @@ -14,6 +14,7 @@ ; ALL-NEXT: LBB0_2: ## %l2 ; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; ALL-NEXT: retq +; ALL-NEXT: ## -- End function %tobool = fcmp une double %a, %b br i1 %tobool, label %l1, label %l2 @@ -36,6 +37,7 @@ ; ALL-NEXT: LBB1_2: ## %l2 ; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; ALL-NEXT: retq +; ALL-NEXT: ## -- End function %tobool = fcmp olt float %a, %b br i1 %tobool, label %l1, label %l2 @@ -124,11 +126,11 @@ define i32 @test8(i32 %a1, i32 %a2, i32 %a3) { ; ALL-LABEL: test8: ; ALL: ## BB#0: -; ALL-NEXT: notl %edi ; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000 ; ALL-NEXT: testl %edx, %edx ; ALL-NEXT: movl $1, %eax ; ALL-NEXT: cmovel %eax, %edx +; ALL-NEXT: notl %edi ; ALL-NEXT: orl %edi, %esi ; ALL-NEXT: cmovnel %edx, %eax ; ALL-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll +++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll @@ -1545,19 +1545,19 @@ } define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { -; NOVL-LABEL: uitofp_2i1_float: -; NOVL: # BB#0: -; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpextrb $8, %xmm0, %eax -; NOVL-NEXT: andl $1, %eax -; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 -; NOVL-NEXT: vpextrb $0, %xmm0, %eax -; NOVL-NEXT: andl $1, %eax -; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 -; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; NOVL-NEXT: retq +; KNL-LABEL: uitofp_2i1_float: +; KNL: # BB#0: +; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: vpextrb $8, %xmm0, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: vpextrb $0, %xmm0, %ecx +; KNL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; KNL-NEXT: andl $1, %ecx +; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm1 +; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; KNL-NEXT: retq ; ; VL-LABEL: uitofp_2i1_float: ; VL: # BB#0: @@ -1567,6 +1567,34 @@ ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} ; VL-NEXT: vcvtudq2ps %xmm0, %xmm0 ; VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_2i1_float: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX512DQ-NEXT: vpextrb $8, %xmm0, %eax +; AVX512DQ-NEXT: andl $1, %eax +; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 +; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax +; AVX512DQ-NEXT: andl $1, %eax +; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: uitofp_2i1_float: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax +; AVX512BW-NEXT: andl $1, %eax +; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 +; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: andl $1, %eax +; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; AVX512BW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512BW-NEXT: retq %mask = icmp ult <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x float> ret <2 x float> %1 Index: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll +++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll @@ -12,6 +12,7 @@ ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test1: ; SKX: ## BB#0: @@ -21,6 +22,7 @@ ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; SKX-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %rrr = load float, float* %br %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 @@ -36,6 +38,7 @@ ; KNL-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test2: ; SKX: ## BB#0: @@ -45,6 +48,7 @@ ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SKX-NEXT: vinsertf64x2 $3, %xmm0, %zmm2, %zmm0 ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %rrr = load double, double* %br %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 @@ -58,6 +62,7 @@ ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] ; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test3: ; SKX: ## BB#0: @@ -65,6 +70,7 @@ ; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] ; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %eee = extractelement <16 x float> %x, i32 4 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 ret <16 x float> %rrr2 @@ -78,6 +84,7 @@ ; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test4: ; SKX: ## BB#0: @@ -86,6 +93,7 @@ ; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %eee = extractelement <8 x i64> %x, i32 4 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 ret <8 x i64> %rrr2 @@ -96,11 +104,13 @@ ; KNL: ## BB#0: ; KNL-NEXT: vextractps $3, %xmm0, %eax ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test5: ; SKX: ## BB#0: ; SKX-NEXT: vextractps $3, %xmm0, %eax ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %ef = extractelement <4 x float> %x, i32 3 %ei = bitcast float %ef to i32 ret i32 %ei @@ -111,11 +121,13 @@ ; KNL: ## BB#0: ; KNL-NEXT: vextractps $3, %xmm0, (%rdi) ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test6: ; SKX: ## BB#0: ; SKX-NEXT: vextractps $3, %xmm0, (%rdi) ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %ef = extractelement <4 x float> %x, i32 3 store float %ef, float* %out, align 4 ret void @@ -135,6 +147,7 @@ ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test7: ; SKX: ## BB#0: @@ -150,6 +163,7 @@ ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %e = extractelement <16 x float> %x, i32 %ind ret float %e } @@ -168,6 +182,7 @@ ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test8: ; SKX: ## BB#0: @@ -183,6 +198,7 @@ ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %e = extractelement <8 x double> %x, i32 %ind ret double %e } @@ -201,6 +217,7 @@ ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test9: ; SKX: ## BB#0: @@ -216,6 +233,7 @@ ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %e = extractelement <8 x float> %x, i32 %ind ret float %e } @@ -234,6 +252,7 @@ ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test10: ; SKX: ## BB#0: @@ -249,6 +268,7 @@ ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %e = extractelement <16 x i32> %x, i32 %ind ret i32 %e } @@ -1114,137 +1134,137 @@ ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: subq $32, %rsp -; KNL-NEXT: xorl %eax, %eax -; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: setb %al ; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %edx -; KNL-NEXT: vmovd %edx, %xmm1 -; KNL-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vmovd %ecx, %xmm1 +; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %edx -; KNL-NEXT: vmovd %edx, %xmm0 -; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $14, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: vmovd %eax, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: cmpl %esi, %edi ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 +; KNL-NEXT: setb %al ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 @@ -1299,8 +1319,8 @@ ; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vpextrb $4, %xmm0, %ecx ; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: vpextrb $0, %xmm0, %ecx +; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] @@ -2124,8 +2144,8 @@ define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) { ; KNL-LABEL: test_extractelement_variable_v16i8: ; KNL: ## BB#0: -; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: andl $15, %edi ; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; KNL-NEXT: movb (%rdi,%rax), %al @@ -2156,8 +2176,8 @@ ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: subq $64, %rsp -; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: vmovaps %ymm0, (%rsp) +; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: andl $31, %edi ; KNL-NEXT: movq %rsp, %rax ; KNL-NEXT: movb (%rdi,%rax), %al @@ -2204,9 +2224,9 @@ ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-64, %rsp ; KNL-NEXT: subq $128, %rsp -; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovaps %ymm0, (%rsp) +; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: andl $63, %edi ; KNL-NEXT: movq %rsp, %rax ; KNL-NEXT: movb (%rdi,%rax), %al @@ -2295,12 +2315,12 @@ define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) { ; KNL-LABEL: test_extractelement_varible_v2i1: ; KNL: ## BB#0: -; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: andl $1, %edi ; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax ; KNL-NEXT: andl $1, %eax @@ -2325,12 +2345,12 @@ define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) { ; KNL-LABEL: test_extractelement_varible_v4i1: ; KNL: ## BB#0: -; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) +; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: andl $3, %edi ; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax ; KNL-NEXT: andl $1, %eax Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -2880,7 +2880,6 @@ define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { ; CHECK-LABEL: test_mask_vextractf32x4: ; CHECK: ## BB#0: -; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1 ; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: kshiftlw $12, %k0, %k1 ; CHECK-NEXT: kshiftrw $15, %k1, %k1 @@ -2898,6 +2897,7 @@ ; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 ; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq @@ -2941,7 +2941,6 @@ define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: test_maskz_vextracti32x4: ; CHECK: ## BB#0: -; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: kshiftlw $12, %k0, %k1 ; CHECK-NEXT: kshiftrw $15, %k1, %k1 @@ -2959,6 +2958,7 @@ ; CHECK-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 ; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1 ; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 Index: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll +++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll @@ -1837,73 +1837,8 @@ ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: subq $32, %rsp -; KNL-NEXT: vmovups (%rdi), %zmm2 -; KNL-NEXT: vmovups 64(%rdi), %zmm3 -; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 -; KNL-NEXT: kshiftlw $14, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $15, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vmovd %ecx, %xmm3 -; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $13, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $12, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $11, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $10, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $9, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $8, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $7, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $6, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $5, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $4, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $3, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $2, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftlw $1, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 -; KNL-NEXT: kshiftrw $15, %k1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; KNL-NEXT: vmovups 64(%rdi), %zmm2 +; KNL-NEXT: vcmpltps %zmm1, %zmm2, %k2 ; KNL-NEXT: kshiftlw $14, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1967,138 +1902,203 @@ ; KNL-NEXT: kshiftrw $15, %k2, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z} -; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z} +; KNL-NEXT: vmovups (%rdi), %zmm3 +; KNL-NEXT: vcmpltps %zmm0, %zmm3, %k1 +; KNL-NEXT: kshiftlw $14, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $15, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vmovd %ecx, %xmm3 +; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $13, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $12, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $11, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $10, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $9, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $8, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $7, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $6, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $5, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $4, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $3, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $2, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $1, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftrw $15, %k1, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z} ; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: kshiftlw $15, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kshiftlw $14, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax +; KNL-NEXT: kshiftlw $15, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %ecx ; KNL-NEXT: vmovd %ecx, %xmm4 ; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $13, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $13, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $12, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $12, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $11, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $11, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $10, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $10, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $9, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $9, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $8, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $8, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $7, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $7, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $6, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $6, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $5, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $5, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $4, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $4, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $3, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $3, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $2, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $2, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $1, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $1, %k0, %k2 +; KNL-NEXT: kshiftrw $15, %k2, %k2 +; KNL-NEXT: kmovw %k2, %eax ; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 -; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 +; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 +; KNL-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z} +; KNL-NEXT: vcmpltps %zmm4, %zmm0, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm3 -; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; KNL-NEXT: vmovd %ecx, %xmm4 +; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 +; KNL-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 ; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 @@ -2943,36 +2943,6 @@ ; ; KNL-LABEL: store_64i1: ; KNL: ## BB#0: -; KNL-NEXT: pushq %rbp -; KNL-NEXT: Lcfi9: -; KNL-NEXT: .cfi_def_cfa_offset 16 -; KNL-NEXT: pushq %r15 -; KNL-NEXT: Lcfi10: -; KNL-NEXT: .cfi_def_cfa_offset 24 -; KNL-NEXT: pushq %r14 -; KNL-NEXT: Lcfi11: -; KNL-NEXT: .cfi_def_cfa_offset 32 -; KNL-NEXT: pushq %r13 -; KNL-NEXT: Lcfi12: -; KNL-NEXT: .cfi_def_cfa_offset 40 -; KNL-NEXT: pushq %r12 -; KNL-NEXT: Lcfi13: -; KNL-NEXT: .cfi_def_cfa_offset 48 -; KNL-NEXT: pushq %rbx -; KNL-NEXT: Lcfi14: -; KNL-NEXT: .cfi_def_cfa_offset 56 -; KNL-NEXT: Lcfi15: -; KNL-NEXT: .cfi_offset %rbx, -56 -; KNL-NEXT: Lcfi16: -; KNL-NEXT: .cfi_offset %r12, -48 -; KNL-NEXT: Lcfi17: -; KNL-NEXT: .cfi_offset %r13, -40 -; KNL-NEXT: Lcfi18: -; KNL-NEXT: .cfi_offset %r14, -32 -; KNL-NEXT: Lcfi19: -; KNL-NEXT: .cfi_offset %r15, -24 -; KNL-NEXT: Lcfi20: -; KNL-NEXT: .cfi_offset %rbp, -16 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 @@ -2984,281 +2954,275 @@ ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r8d +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r9d +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r10d +; KNL-NEXT: vmovd %ecx, %xmm3 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r11d +; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r14d +; KNL-NEXT: vpinsrb $2, %ecx, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r15d +; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r12d +; KNL-NEXT: vpinsrb $4, %ecx, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r13d +; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ebx +; KNL-NEXT: vpinsrb $6, %ecx, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ebp +; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $8, %ecx, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %edx +; KNL-NEXT: vpinsrb $10, %ecx, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %esi +; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vmovd %r9d, %xmm3 -; KNL-NEXT: kmovw %k1, %r9d -; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 -; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 -; KNL-NEXT: vpslld $31, %zmm2, %zmm2 -; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 -; KNL-NEXT: kmovw %k0, 6(%rdi) -; KNL-NEXT: kshiftlw $14, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r8d -; KNL-NEXT: kshiftlw $15, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r10d -; KNL-NEXT: kshiftlw $13, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r9d -; KNL-NEXT: kshiftlw $12, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r11d -; KNL-NEXT: kshiftlw $11, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r14d -; KNL-NEXT: kshiftlw $10, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r15d -; KNL-NEXT: kshiftlw $9, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r12d -; KNL-NEXT: kshiftlw $8, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r13d -; KNL-NEXT: kshiftlw $7, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $6, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %esi -; KNL-NEXT: kshiftlw $5, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ebp -; KNL-NEXT: kshiftlw $4, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ebx -; KNL-NEXT: kshiftlw $3, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $2, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %edx -; KNL-NEXT: kshiftlw $1, %k2, %k0 +; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vmovd %r10d, %xmm2 -; KNL-NEXT: kmovw %k0, %r10d -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL-NEXT: kshiftrw $15, %k2, %k0 -; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1 -; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm2 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, 4(%rdi) ; KNL-NEXT: kshiftlw $14, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r8d +; KNL-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2 +; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kshiftlw $15, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r10d +; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $13, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r9d +; KNL-NEXT: vmovd %eax, %xmm3 +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $12, %k1, %k0 +; KNL-NEXT: vpinsrb $1, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r11d +; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kshiftlw $11, %k1, %k0 +; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r14d +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $10, %k1, %k0 +; KNL-NEXT: vpinsrb $3, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r15d +; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kshiftlw $9, %k1, %k0 +; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r12d +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $8, %k1, %k0 +; KNL-NEXT: vpinsrb $5, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %r13d +; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kshiftlw $7, %k1, %k0 +; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $6, %k1, %k0 +; KNL-NEXT: vpinsrb $7, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %esi +; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kshiftlw $5, %k1, %k0 +; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ebp +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $4, %k1, %k0 +; KNL-NEXT: vpinsrb $9, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ebx +; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kshiftlw $3, %k1, %k0 +; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $2, %k1, %k0 +; KNL-NEXT: vpinsrb $11, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kshiftlw $1, %k1, %k0 +; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vmovd %r10d, %xmm1 -; KNL-NEXT: kmovw %k0, %r10d -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0 -; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL-NEXT: kmovw %k1, 2(%rdi) +; KNL-NEXT: vpmovsxbd %xmm2, %zmm1 +; KNL-NEXT: vpslld $31, %zmm1, %zmm1 +; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm2 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm1 +; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, 6(%rdi) +; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 +; KNL-NEXT: vpslld $31, %zmm1, %zmm1 +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: kmovw %k1, 4(%rdi) ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r8d +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r9d +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r10d +; KNL-NEXT: vmovd %ecx, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r11d +; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r14d +; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r15d +; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r12d +; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %r13d +; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %edx +; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %esi +; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ebp +; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ebx +; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vmovd %r9d, %xmm0 -; KNL-NEXT: kmovw %k1, %r9d -; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm0 +; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, 2(%rdi) +; KNL-NEXT: kshiftlw $14, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $15, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftlw $13, %k1, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vmovd %ecx, %xmm0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftlw $12, %k1, %k0 +; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $11, %k1, %k0 +; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftlw $10, %k1, %k0 +; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $9, %k1, %k0 +; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftlw $8, %k1, %k0 +; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $7, %k1, %k0 +; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftlw $6, %k1, %k0 +; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $5, %k1, %k0 +; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftlw $4, %k1, %k0 +; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $3, %k1, %k0 +; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftlw $2, %k1, %k0 +; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $1, %k1, %k0 +; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftrw $15, %k1, %k0 +; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, (%rdi) -; KNL-NEXT: popq %rbx -; KNL-NEXT: popq %r12 -; KNL-NEXT: popq %r13 -; KNL-NEXT: popq %r14 -; KNL-NEXT: popq %r15 -; KNL-NEXT: popq %rbp ; KNL-NEXT: retq ; ; SKX-LABEL: store_64i1: Index: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll +++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll @@ -8,6 +8,7 @@ ; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask = fcmp ole <16 x float> %x, %y %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y ret <16 x float> %max @@ -19,6 +20,7 @@ ; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask = fcmp ole <8 x double> %x, %y %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y ret <8 x double> %max @@ -30,6 +32,7 @@ ; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %y = load <16 x i32>, <16 x i32>* %yp, align 4 %mask = icmp eq <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -42,6 +45,7 @@ ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask = icmp uge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y ret <16 x i32> %max @@ -53,6 +57,7 @@ ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask = icmp eq <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y ret <8 x i64> %max @@ -64,6 +69,7 @@ ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask = icmp ugt <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y ret <8 x i64> %max @@ -117,12 +123,14 @@ ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test9: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %mask = icmp eq <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y ret <8 x i32> %max @@ -137,12 +145,14 @@ ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test10: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %mask = fcmp oeq <8 x float> %x, %y %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y @@ -154,6 +164,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask = icmp ugt <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y ret <8 x i32> %max @@ -168,6 +179,7 @@ ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: %AX %AX %EAX ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test12: ; SKX: ## BB#0: @@ -178,6 +190,7 @@ ; SKX-NEXT: ## kill: %AX %AX %EAX ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %res = icmp eq <16 x i64> %a, %b %res1 = bitcast <16 x i1> %res to i16 ret i16 %res1 @@ -330,6 +343,7 @@ ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test12_v32i32: ; SKX: ## BB#0: @@ -339,6 +353,7 @@ ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %res = icmp eq <32 x i32> %a, %b %res1 = bitcast <32 x i1> %res to i32 ret i32 %res1 @@ -562,72 +577,72 @@ ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0 -; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm1 +; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 +; KNL-NEXT: vpslld $31, %zmm1, %zmm1 +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm0 -; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; KNL-NEXT: vmovd %ecx, %xmm1 +; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm0 +; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 @@ -642,6 +657,7 @@ ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test12_v64i16: ; SKX: ## BB#0: @@ -651,6 +667,7 @@ ; SKX-NEXT: kmovq %k0, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %res = icmp eq <64 x i16> %a, %b %res1 = bitcast <64 x i1> %res to i64 ret i64 %res1 @@ -704,6 +721,7 @@ ; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1 ; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask = icmp sge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y ret <16 x i32> %max @@ -715,6 +733,7 @@ ; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sgt <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -727,6 +746,7 @@ ; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sle <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -739,6 +759,7 @@ ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp ule <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -752,6 +773,7 @@ ; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask1 = icmp eq <16 x i32> %x1, %y1 %mask0 = icmp eq <16 x i32> %x, %y %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer @@ -766,6 +788,7 @@ ; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask1 = icmp sge <8 x i64> %x1, %y1 %mask0 = icmp sle <8 x i64> %x, %y %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer @@ -780,6 +803,7 @@ ; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask1 = icmp sgt <8 x i64> %x1, %y1 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 %mask0 = icmp sgt <8 x i64> %x, %y @@ -795,6 +819,7 @@ ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask1 = icmp sge <16 x i32> %x1, %y1 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask0 = icmp ule <16 x i32> %x, %y @@ -809,6 +834,7 @@ ; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer @@ -823,6 +849,7 @@ ; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer @@ -838,6 +865,7 @@ ; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask1 = icmp sge <16 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 @@ -855,6 +883,7 @@ ; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %mask1 = icmp sge <8 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 @@ -920,12 +949,14 @@ ; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test30: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %mask = fcmp oeq <4 x double> %x, %y %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y @@ -938,12 +969,14 @@ ; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test31: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %y = load <2 x double>, <2 x double>* %yp, align 4 %mask = fcmp olt <2 x double> %x, %y @@ -957,12 +990,14 @@ ; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test32: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %y = load <4 x double>, <4 x double>* %yp, align 4 %mask = fcmp ogt <4 x double> %y, %x @@ -976,6 +1011,7 @@ ; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %y = load <8 x double>, <8 x double>* %yp, align 4 %mask = fcmp olt <8 x double> %x, %y %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 @@ -988,12 +1024,14 @@ ; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test34: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %y = load <4 x float>, <4 x float>* %yp, align 4 %mask = fcmp olt <4 x float> %x, %y %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 @@ -1010,12 +1048,14 @@ ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test35: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %y = load <8 x float>, <8 x float>* %yp, align 4 %mask = fcmp ogt <8 x float> %y, %x @@ -1029,6 +1069,7 @@ ; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %y = load <16 x float>, <16 x float>* %yp, align 4 %mask = fcmp olt <16 x float> %x, %y %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 @@ -1041,6 +1082,7 @@ ; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %a = load double, double* %ptr %v = insertelement <8 x double> undef, double %a, i32 0 @@ -1058,12 +1100,14 @@ ; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test38: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %a = load double, double* %ptr %v = insertelement <4 x double> undef, double %a, i32 0 @@ -1081,12 +1125,14 @@ ; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test39: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %a = load double, double* %ptr %v = insertelement <2 x double> undef, double %a, i32 0 @@ -1104,6 +1150,7 @@ ; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function %a = load float, float* %ptr %v = insertelement <16 x float> undef, float %a, i32 0 @@ -1124,12 +1171,14 @@ ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test41: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %a = load float, float* %ptr %v = insertelement <8 x float> undef, float %a, i32 0 @@ -1147,12 +1196,14 @@ ; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test42: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %a = load float, float* %ptr %v = insertelement <4 x float> undef, float %a, i32 0 @@ -1172,6 +1223,7 @@ ; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: retq +; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test43: ; SKX: ## BB#0: @@ -1180,6 +1232,7 @@ ; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq +; SKX-NEXT: ## -- End function %a = load double, double* %ptr %v = insertelement <8 x double> undef, double %a, i32 0 Index: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -1685,8 +1685,6 @@ ; AVX512F-32-NEXT: .cfi_offset %esi, -12 ; AVX512F-32-NEXT: .Lcfi9: ; AVX512F-32-NEXT: .cfi_offset %ebx, -8 -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 -; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $5, %al @@ -1707,39 +1705,39 @@ ; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 ; AVX512F-32-NEXT: kmovd %ecx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 ; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %edx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpslld $24, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %ebx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %eax, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpsllq $40, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $6, %al @@ -1748,8 +1746,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al @@ -1758,8 +1756,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %al @@ -1767,8 +1765,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: andb $2, %al @@ -1777,8 +1775,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %dl @@ -1789,8 +1787,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al @@ -1798,8 +1796,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1809,8 +1807,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1820,8 +1818,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1831,8 +1829,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1842,8 +1840,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1852,8 +1850,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1864,8 +1862,8 @@ ; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %bl @@ -1877,8 +1875,8 @@ ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %dl @@ -1887,8 +1885,8 @@ ; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1898,8 +1896,8 @@ ; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1910,8 +1908,8 @@ ; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1921,8 +1919,8 @@ ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: # kill: %AL %AL %EAX %EAX @@ -1932,8 +1930,8 @@ ; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1942,8 +1940,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1952,444 +1950,444 @@ ; AVX512F-32-NEXT: kmovd %edx, %k1 ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm3 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm3, %k0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 +; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm4 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm5 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm4, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4] +; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm5 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm6 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm6, %ymm5, %ymm5 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm6[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $28, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm5 +; AVX512F-32-NEXT: vpbroadcastd %xmm5, %xmm5 +; AVX512F-32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm6 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm7 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm7, %ymm6, %ymm6 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm6 = zmm6[0,1,2,3],zmm7[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm6, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: movl %ecx, %esi ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm6 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm6 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm6[0,1,2] +; AVX512F-32-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm7 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm2, %ymm7, %ymm7 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %esi, %eax ; AVX512F-32-NEXT: shrl $30, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %esi, %eax ; AVX512F-32-NEXT: shrl $31, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm7 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $4, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $5, %al ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $6, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $7, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000 ; AVX512F-32-NEXT: shrl $12, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $13, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000 ; AVX512F-32-NEXT: shrl $14, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000 ; AVX512F-32-NEXT: shrl $15, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %ebx ; AVX512F-32-NEXT: shrl $16, %ebx ; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %bl, %dl ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: andb $15, %al ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: shrb $2, %al ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $4, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $5, %al ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $6, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: # kill: %BL %BL %EBX %EBX ; AVX512F-32-NEXT: shrb $7, %bl ; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $24, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 ; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: kmovd %eax, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4] +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 +; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: shrl $28, %eax +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm1, %ymm1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vpbroadcastd %xmm4, %xmm4 +; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2] +; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm3, %ymm4, %ymm3 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $30, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vpbroadcastw %xmm4, %xmm4 +; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm4, %ymm3 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $31, %eax ; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0 @@ -2397,12 +2395,12 @@ ; AVX512F-32-NEXT: kmovd %eax, %k1 ; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 ; AVX512F-32-NEXT: korq %k1, %k0, %k1 -; AVX512F-32-NEXT: vpcmpeqb %zmm6, %zmm5, %k0 {%k1} -; AVX512F-32-NEXT: vpcmpgtb %zmm5, %zmm6, %k2 {%k1} -; AVX512F-32-NEXT: vpcmpleb %zmm6, %zmm5, %k3 {%k1} -; AVX512F-32-NEXT: vpcmpneqb %zmm6, %zmm5, %k4 {%k1} -; AVX512F-32-NEXT: vpcmpleb %zmm5, %zmm6, %k5 {%k1} -; AVX512F-32-NEXT: vpcmpgtb %zmm6, %zmm5, %k1 {%k1} +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} +; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k3 {%k1} +; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k4 {%k1} +; AVX512F-32-NEXT: vpcmpleb %zmm0, %zmm1, %k5 {%k1} +; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k1 {%k1} ; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2571,8 +2569,6 @@ ; AVX512F-32-NEXT: .cfi_offset %esi, -12 ; AVX512F-32-NEXT: .Lcfi15: ; AVX512F-32-NEXT: .cfi_offset %ebx, -8 -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 -; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $5, %al @@ -2593,39 +2589,39 @@ ; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 ; AVX512F-32-NEXT: kmovd %ecx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 ; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %edx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpslld $24, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %ebx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %eax, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpsllq $40, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $6, %al @@ -2634,8 +2630,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al @@ -2644,8 +2640,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %al @@ -2653,8 +2649,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: andb $2, %al @@ -2663,8 +2659,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %dl @@ -2675,8 +2671,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al @@ -2684,8 +2680,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2695,8 +2691,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2706,8 +2702,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2717,8 +2713,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2728,8 +2724,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2738,8 +2734,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2750,8 +2746,8 @@ ; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %bl @@ -2763,8 +2759,8 @@ ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %dl @@ -2773,8 +2769,8 @@ ; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2784,8 +2780,8 @@ ; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2796,8 +2792,8 @@ ; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2807,8 +2803,8 @@ ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: # kill: %AL %AL %EAX %EAX @@ -2818,8 +2814,8 @@ ; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2828,8 +2824,8 @@ ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2838,444 +2834,444 @@ ; AVX512F-32-NEXT: kmovd %edx, %k1 ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm3 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm3, %k0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 +; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm4 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm5 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm4, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4] +; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm5 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm6 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm6, %ymm5, %ymm5 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm6[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $28, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm5 +; AVX512F-32-NEXT: vpbroadcastd %xmm5, %xmm5 +; AVX512F-32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm6 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm7 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm7, %ymm6, %ymm6 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm6 = zmm6[0,1,2,3],zmm7[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm6, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: movl %ecx, %esi ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm6 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm6 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm6[0,1,2] +; AVX512F-32-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm7 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm2, %ymm7, %ymm7 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %esi, %eax ; AVX512F-32-NEXT: shrl $30, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %esi, %eax ; AVX512F-32-NEXT: shrl $31, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm7 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $4, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $5, %al ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $6, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $7, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000 ; AVX512F-32-NEXT: shrl $12, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $13, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000 ; AVX512F-32-NEXT: shrl $14, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000 ; AVX512F-32-NEXT: shrl $15, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %ebx ; AVX512F-32-NEXT: shrl $16, %ebx ; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %bl, %dl ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: andb $15, %al ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: shrb $2, %al ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $4, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $5, %al ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $6, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: # kill: %BL %BL %EBX %EBX ; AVX512F-32-NEXT: shrb $7, %bl ; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $24, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 ; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: kmovd %eax, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4] +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 +; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: shrl $28, %eax +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm1, %ymm1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vpbroadcastd %xmm4, %xmm4 +; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2] +; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm3, %ymm4, %ymm3 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $30, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vpbroadcastw %xmm4, %xmm4 +; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm4, %ymm3 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $31, %eax ; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0 @@ -3283,12 +3279,12 @@ ; AVX512F-32-NEXT: kmovd %eax, %k1 ; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 ; AVX512F-32-NEXT: korq %k1, %k0, %k1 -; AVX512F-32-NEXT: vpcmpeqb %zmm6, %zmm5, %k0 {%k1} -; AVX512F-32-NEXT: vpcmpltub %zmm6, %zmm5, %k2 {%k1} -; AVX512F-32-NEXT: vpcmpleub %zmm6, %zmm5, %k3 {%k1} -; AVX512F-32-NEXT: vpcmpneqb %zmm6, %zmm5, %k4 {%k1} -; AVX512F-32-NEXT: vpcmpnltub %zmm6, %zmm5, %k5 {%k1} -; AVX512F-32-NEXT: vpcmpnleub %zmm6, %zmm5, %k1 {%k1} +; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k2 {%k1} +; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k3 {%k1} +; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k4 {%k1} +; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k5 {%k1} +; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k1 {%k1} ; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx Index: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -2695,32 +2695,32 @@ define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-LABEL: test_cmp_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] -; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] -; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] ; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] -; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xc0,0x02] -; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] -; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] -; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] +; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] +; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01] +; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02] ; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] -; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] -; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] -; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] -; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] -; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] -; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0] -; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] +; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03] +; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8] +; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0] +; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] +; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 @@ -2750,23 +2750,23 @@ ; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] ; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] ; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] -; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] -; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] -; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] ; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] ; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01] -; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] +; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] +; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] ; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] -; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] -; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] +; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02] +; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03] ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) @@ -2793,32 +2793,32 @@ define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-LABEL: test_ucmp_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] -; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] -; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] ; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] -; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] -; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] -; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] -; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] +; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] +; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01] +; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02] ; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] -; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] -; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] -; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] -; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] -; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] -; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0] -; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] +; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03] +; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] +; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8] +; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0] +; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] +; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1] +; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 @@ -2848,23 +2848,23 @@ ; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] ; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] ; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] -; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] -; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] -; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] ; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] ; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] -; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01] -; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] +; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] +; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] +; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] +; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] ; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] -; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] -; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] +; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02] +; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03] ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) Index: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -453,10 +453,10 @@ ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4 -; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 ; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) +; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 ; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: andb $1, %al Index: llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll +++ llvm/trunk/test/CodeGen/X86/extractelement-legalization-store-ordering.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" @@ -6,31 +7,32 @@ ; into loads, off the stack or a previous store. ; Be very explicit about the ordering/stack offsets. -; CHECK-LABEL: test_extractelement_legalization_storereuse: -; CHECK: # BB#0 -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl 16(%esp), %eax -; CHECK-NEXT: movl 24(%esp), %ecx -; CHECK-NEXT: movl 20(%esp), %edx -; CHECK-NEXT: paddd (%edx), %xmm0 -; CHECK-NEXT: movdqa %xmm0, (%edx) -; CHECK-NEXT: movl (%edx), %esi -; CHECK-NEXT: movl 4(%edx), %edi -; CHECK-NEXT: shll $4, %ecx -; CHECK-NEXT: movl 8(%edx), %ebx -; CHECK-NEXT: movl 12(%edx), %edx -; CHECK-NEXT: movl %esi, 12(%eax,%ecx) -; CHECK-NEXT: movl %edi, (%eax,%ecx) -; CHECK-NEXT: movl %ebx, 8(%eax,%ecx) -; CHECK-NEXT: movl %edx, 4(%eax,%ecx) -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi -; CHECK-NEXT: popl %ebx -; CHECK-NEXT: retl define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 { +; CHECK-LABEL: _test_extractelement_legalization_storereuse: ## @test_extractelement_legalization_storereuse +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: paddd (%ecx), %xmm0 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movdqa %xmm0, (%ecx) +; CHECK-NEXT: movl (%ecx), %esi +; CHECK-NEXT: movl 4(%ecx), %edi +; CHECK-NEXT: shll $4, %edx +; CHECK-NEXT: movl 8(%ecx), %ebx +; CHECK-NEXT: movl 12(%ecx), %ecx +; CHECK-NEXT: movl %esi, 12(%eax,%edx) +; CHECK-NEXT: movl %edi, (%eax,%edx) +; CHECK-NEXT: movl %ebx, 8(%eax,%edx) +; CHECK-NEXT: movl %ecx, 4(%eax,%edx) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl +; CHECK-NEXT: ## -- End function entry: %0 = bitcast i32* %y to <4 x i32>* %1 = load <4 x i32>, <4 x i32>* %0, align 16 Index: llvm/trunk/test/CodeGen/X86/fp128-i128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fp128-i128.ll +++ llvm/trunk/test/CodeGen/X86/fp128-i128.ll @@ -50,8 +50,8 @@ ; CHECK-NEXT: andq %rdi, %rcx ; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 ; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx -; CHECK-NEXT: orq %rcx, %rdx ; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: orq %rcx, %rdx ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: jmp foo # TAILCALL Index: llvm/trunk/test/CodeGen/X86/gather-addresses.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/gather-addresses.ll +++ llvm/trunk/test/CodeGen/X86/gather-addresses.ll @@ -16,11 +16,10 @@ ; LIN: sarq $32, %r[[REG2]] ; LIN: movslq %e[[REG4]], %r[[REG3:.+]] ; LIN: sarq $32, %r[[REG4]] -; LIN: movsd (%rdi,%r[[REG1]],8), %xmm0 -; LIN: movhpd (%rdi,%r[[REG2]],8), %xmm0 -; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1 -; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1 - +; LIN: movsd (%rdi,%rsi,8), %xmm1 +; LIN: movhpd (%rdi,%rax,8), %xmm1 +; LIN: movdqa (%rsi), %xmm0 +; LIN: movq %rdi, %xmm1 ; WIN: movdqa (%rdx), %xmm0 ; WIN: pand (%r8), %xmm0 ; WIN: pextrq $1, %xmm0, %r[[REG4:.+]] @@ -29,10 +28,10 @@ ; WIN: sarq $32, %r[[REG2]] ; WIN: movslq %e[[REG4]], %r[[REG3:.+]] ; WIN: sarq $32, %r[[REG4]] -; WIN: movsd (%rcx,%r[[REG1]],8), %xmm0 -; WIN: movhpd (%rcx,%r[[REG2]],8), %xmm0 -; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1 -; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1 +; WIN: movsd (%rcx,%r9,8), %xmm1 +; WIN: movhpd (%rcx,%rax,8), %xmm1 +; WIN: movdqa (%rdx), %xmm0 +; WIN: movq %rdx, %xmm1 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { %a = load <4 x i32>, <4 x i32>* %i Index: llvm/trunk/test/CodeGen/X86/half.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/half.ll +++ llvm/trunk/test/CodeGen/X86/half.ll @@ -1,266 +1,834 @@ -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false -fixup-byte-word-insts=1 \ -; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL -check-prefix=BWON -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false -fixup-byte-word-insts=0 \ -; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL -check-prefix=BWOFF -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false -fixup-byte-word-insts=1 \ -; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C -check-prefix=BWON -; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false -fixup-byte-word-insts=0 \ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -fixup-byte-word-insts=1 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,NOF16-BWINSTS +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -fixup-byte-word-insts=0 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF,NOF16-NOBWINSTS +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -fixup-byte-word-insts=1 \ +; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,CHECK-F16C +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \ ; RUN: | FileCheck %s -check-prefix=CHECK-I686 -define void @test_load_store(half* %in, half* %out) { -; CHECK-LABEL: test_load_store: -; BWON: movzwl (%rdi), %eax -; BWOFF: movw (%rdi), %ax -; CHECK: movw %ax, (%rsi) +define void @test_load_store(half* %in, half* %out) #0 { +; BWON-LABEL: test_load_store: +; BWON: # BB#0: +; BWON-NEXT: movzwl (%rdi), %eax +; BWON-NEXT: movw %ax, (%rsi) +; BWON-NEXT: retq +; +; BWOFF-LABEL: test_load_store: +; BWOFF: # BB#0: +; BWOFF-NEXT: movw (%rdi), %ax +; BWOFF-NEXT: movw %ax, (%rsi) +; BWOFF-NEXT: retq +; +; CHECK-I686-LABEL: test_load_store: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-I686-NEXT: movw (%ecx), %cx +; CHECK-I686-NEXT: movw %cx, (%eax) +; CHECK-I686-NEXT: retl %val = load half, half* %in store half %val, half* %out ret void } -define i16 @test_bitcast_from_half(half* %addr) { -; CHECK-LABEL: test_bitcast_from_half: -; BWON: movzwl (%rdi), %eax -; BWOFF: movw (%rdi), %ax +define i16 @test_bitcast_from_half(half* %addr) #0 { +; BWON-LABEL: test_bitcast_from_half: +; BWON: # BB#0: +; BWON-NEXT: movzwl (%rdi), %eax +; BWON-NEXT: retq +; +; BWOFF-LABEL: test_bitcast_from_half: +; BWOFF: # BB#0: +; BWOFF-NEXT: movw (%rdi), %ax +; BWOFF-NEXT: retq +; +; CHECK-I686-LABEL: test_bitcast_from_half: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-I686-NEXT: movw (%eax), %ax +; CHECK-I686-NEXT: retl %val = load half, half* %addr %val_int = bitcast half %val to i16 ret i16 %val_int } -define void @test_bitcast_to_half(half* %addr, i16 %in) { +define void @test_bitcast_to_half(half* %addr, i16 %in) #0 { ; CHECK-LABEL: test_bitcast_to_half: -; CHECK: movw %si, (%rdi) +; CHECK: # BB#0: +; CHECK-NEXT: movw %si, (%rdi) +; CHECK-NEXT: retq +; +; CHECK-I686-LABEL: test_bitcast_to_half: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-I686-NEXT: movw %ax, (%ecx) +; CHECK-I686-NEXT: retl %val_fp = bitcast i16 %in to half store half %val_fp, half* %addr ret void } -define float @test_extend32(half* %addr) { -; CHECK-LABEL: test_extend32: - -; CHECK-LIBCALL: jmp __gnu_h2f_ieee -; CHECK-F16C: vcvtph2ps +define float @test_extend32(half* %addr) #0 { +; CHECK-LIBCALL-LABEL: test_extend32: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi +; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee # TAILCALL +; +; CHECK-F16C-LABEL: test_extend32: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: movswl (%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_extend32: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: subl $12, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-I686-NEXT: movzwl (%eax), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: addl $12, %esp +; CHECK-I686-NEXT: retl %val16 = load half, half* %addr %val32 = fpext half %val16 to float ret float %val32 } -define double @test_extend64(half* %addr) { -; CHECK-LABEL: test_extend64: - -; CHECK-LIBCALL: callq __gnu_h2f_ieee -; CHECK-LIBCALL: cvtss2sd -; CHECK-F16C: vcvtph2ps -; CHECK-F16C: vcvtss2sd +define double @test_extend64(half* %addr) #0 { +; CHECK-LIBCALL-LABEL: test_extend64: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rax +; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-LIBCALL-NEXT: popq %rax +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_extend64: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: movswl (%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_extend64: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: subl $12, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-I686-NEXT: movzwl (%eax), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: addl $12, %esp +; CHECK-I686-NEXT: retl %val16 = load half, half* %addr %val32 = fpext half %val16 to double ret double %val32 } -define void @test_trunc32(float %in, half* %addr) { -; CHECK-LABEL: test_trunc32: - -; CHECK-LIBCALL: callq __gnu_f2h_ieee -; CHECK-F16C: vcvtps2ph +define void @test_trunc32(float %in, half* %addr) #0 { +; CHECK-LIBCALL-LABEL: test_trunc32: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rbx +; CHECK-LIBCALL-NEXT: movq %rdi, %rbx +; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee +; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) +; CHECK-LIBCALL-NEXT: popq %rbx +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_trunc32: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-F16C-NEXT: vmovd %xmm0, %eax +; CHECK-F16C-NEXT: movw %ax, (%rdi) +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_trunc32: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: pushl %esi +; CHECK-I686-NEXT: subl $8, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: movss %xmm0, (%esp) +; CHECK-I686-NEXT: calll __gnu_f2h_ieee +; CHECK-I686-NEXT: movw %ax, (%esi) +; CHECK-I686-NEXT: addl $8, %esp +; CHECK-I686-NEXT: popl %esi +; CHECK-I686-NEXT: retl %val16 = fptrunc float %in to half store half %val16, half* %addr ret void } -define void @test_trunc64(double %in, half* %addr) { +define void @test_trunc64(double %in, half* %addr) #0 { ; CHECK-LABEL: test_trunc64: - -; CHECK-LIBCALL: callq __truncdfhf2 -; CHECK-F16C: callq __truncdfhf2 +; CHECK: # BB#0: +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: callq __truncdfhf2 +; CHECK-NEXT: movw %ax, (%rbx) +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: retq +; +; CHECK-I686-LABEL: test_trunc64: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: pushl %esi +; CHECK-I686-NEXT: subl $8, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-I686-NEXT: movsd %xmm0, (%esp) +; CHECK-I686-NEXT: calll __truncdfhf2 +; CHECK-I686-NEXT: movw %ax, (%esi) +; CHECK-I686-NEXT: addl $8, %esp +; CHECK-I686-NEXT: popl %esi +; CHECK-I686-NEXT: retl %val16 = fptrunc double %in to half store half %val16, half* %addr ret void } define i64 @test_fptosi_i64(half* %p) #0 { -; CHECK-LABEL: test_fptosi_i64: - -; CHECK-LIBCALL-NEXT: pushq %rax -; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax -; CHECK-LIBCALL-NEXT: popq %rcx -; CHECK-LIBCALL-NEXT: retq - -; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax -; CHECK-F16C-NEXT: retq +; CHECK-LIBCALL-LABEL: test_fptosi_i64: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rax +; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax +; CHECK-LIBCALL-NEXT: popq %rcx +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_fptosi_i64: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: movswl (%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-F16C-NEXT: vcvttss2si %xmm0, %rax +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_fptosi_i64: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: subl $12, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-I686-NEXT: movzwl (%eax), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstps (%esp) +; CHECK-I686-NEXT: calll __fixsfdi +; CHECK-I686-NEXT: addl $12, %esp +; CHECK-I686-NEXT: retl %a = load half, half* %p, align 2 %r = fptosi half %a to i64 ret i64 %r } define void @test_sitofp_i64(i64 %a, half* %p) #0 { -; CHECK-LABEL: test_sitofp_i64: - -; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]] -; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] -; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 -; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee -; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) -; CHECK_LIBCALL-NEXT: popq [[ADDR]] -; CHECK_LIBCALL-NEXT: retq - -; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]] -; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]] -; CHECK-F16C-NEXT: vmovd [[REG0]], %eax -; CHECK-F16C-NEXT: movw %ax, (%rsi) -; CHECK-F16C-NEXT: retq +; CHECK-LIBCALL-LABEL: test_sitofp_i64: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rbx +; CHECK-LIBCALL-NEXT: movq %rsi, %rbx +; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 +; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee +; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) +; CHECK-LIBCALL-NEXT: popq %rbx +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_sitofp_i64: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-F16C-NEXT: vmovd %xmm0, %eax +; CHECK-F16C-NEXT: movw %ax, (%rsi) +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_sitofp_i64: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: pushl %esi +; CHECK-I686-NEXT: subl $24, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: movss %xmm0, (%esp) +; CHECK-I686-NEXT: calll __gnu_f2h_ieee +; CHECK-I686-NEXT: movw %ax, (%esi) +; CHECK-I686-NEXT: addl $24, %esp +; CHECK-I686-NEXT: popl %esi +; CHECK-I686-NEXT: retl %r = sitofp i64 %a to half store half %r, half* %p ret void } define i64 @test_fptoui_i64(half* %p) #0 { -; CHECK-LABEL: test_fptoui_i64: - -; FP_TO_UINT is expanded using FP_TO_SINT -; CHECK-LIBCALL-NEXT: pushq %rax -; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]] -; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]] -; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]] -; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]] -; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, [[REG4:%[a-z0-9]+]] -; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]] -; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]] -; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0 -; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]] -; CHECK-LIBCALL-NEXT: popq %rcx -; CHECK-LIBCALL-NEXT: retq - -; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]] -; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]] -; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]] -; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax -; CHECK-F16C-NEXT: retq +; CHECK-LIBCALL-LABEL: test_fptoui_i64: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rax +; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm2 +; CHECK-LIBCALL-NEXT: subss %xmm1, %xmm2 +; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rcx +; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000 +; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax +; CHECK-LIBCALL-NEXT: xorq %rcx, %rdx +; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 +; CHECK-LIBCALL-NEXT: cmovaeq %rdx, %rax +; CHECK-LIBCALL-NEXT: popq %rcx +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_fptoui_i64: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: movswl (%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-F16C-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; CHECK-F16C-NEXT: vcvttss2si %xmm2, %rcx +; CHECK-F16C-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000 +; CHECK-F16C-NEXT: vcvttss2si %xmm0, %rax +; CHECK-F16C-NEXT: xorq %rcx, %rdx +; CHECK-F16C-NEXT: vucomiss %xmm1, %xmm0 +; CHECK-F16C-NEXT: cmovaeq %rdx, %rax +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_fptoui_i64: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: subl $12, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-I686-NEXT: movzwl (%eax), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstps (%esp) +; CHECK-I686-NEXT: calll __fixunssfdi +; CHECK-I686-NEXT: addl $12, %esp +; CHECK-I686-NEXT: retl %a = load half, half* %p, align 2 %r = fptoui half %a to i64 ret i64 %r } define void @test_uitofp_i64(i64 %a, half* %p) #0 { -; CHECK-LABEL: test_uitofp_i64: -; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]] -; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]] - -; simple conversion to float if non-negative -; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]] -; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]] - -; convert using shift+or if negative -; CHECK-NEXT: [[LABEL1]]: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shrq %rax -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: orq %rax, [[REG2:%[a-z0-9]+]] -; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]] -; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]] -; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]] -; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]] - -; convert float to half -; CHECK-NEXT: [[LABEL2]]: -; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee -; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) -; CHECK-LIBCALL-NEXT: popq [[ADDR]] -; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]] -; CHECK-F16C-NEXT: vmovd [[REG4]], %eax -; CHECK-F16C-NEXT: movw %ax, (%rsi) -; CHECK-NEXT: retq - +; CHECK-LIBCALL-LABEL: test_uitofp_i64: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rbx +; CHECK-LIBCALL-NEXT: movq %rsi, %rbx +; CHECK-LIBCALL-NEXT: testq %rdi, %rdi +; CHECK-LIBCALL-NEXT: js .LBB10_1 +; CHECK-LIBCALL-NEXT: # BB#2: +; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 +; CHECK-LIBCALL-NEXT: jmp .LBB10_3 +; CHECK-LIBCALL-NEXT: .LBB10_1: +; CHECK-LIBCALL-NEXT: movq %rdi, %rax +; CHECK-LIBCALL-NEXT: shrq %rax +; CHECK-LIBCALL-NEXT: andl $1, %edi +; CHECK-LIBCALL-NEXT: orq %rax, %rdi +; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 +; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0 +; CHECK-LIBCALL-NEXT: .LBB10_3: +; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee +; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) +; CHECK-LIBCALL-NEXT: popq %rbx +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_uitofp_i64: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: testq %rdi, %rdi +; CHECK-F16C-NEXT: js .LBB10_1 +; CHECK-F16C-NEXT: # BB#2: +; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 +; CHECK-F16C-NEXT: jmp .LBB10_3 +; CHECK-F16C-NEXT: .LBB10_1: +; CHECK-F16C-NEXT: movq %rdi, %rax +; CHECK-F16C-NEXT: shrq %rax +; CHECK-F16C-NEXT: andl $1, %edi +; CHECK-F16C-NEXT: orq %rax, %rdi +; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 +; CHECK-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; CHECK-F16C-NEXT: .LBB10_3: +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-F16C-NEXT: vmovd %xmm0, %eax +; CHECK-F16C-NEXT: movw %ax, (%rsi) +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_uitofp_i64: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: pushl %esi +; CHECK-I686-NEXT: subl $24, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: xorl %eax, %eax +; CHECK-I686-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: setns %al +; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; CHECK-I686-NEXT: fstps (%esp) +; CHECK-I686-NEXT: calll __gnu_f2h_ieee +; CHECK-I686-NEXT: movw %ax, (%esi) +; CHECK-I686-NEXT: addl $24, %esp +; CHECK-I686-NEXT: popl %esi +; CHECK-I686-NEXT: retl %r = uitofp i64 %a to half store half %r, half* %p ret void } define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { -; CHECK-LABEL: test_extend32_vec4: - -; CHECK-LIBCALL: callq __gnu_h2f_ieee -; CHECK-LIBCALL: callq __gnu_h2f_ieee -; CHECK-LIBCALL: callq __gnu_h2f_ieee -; CHECK-LIBCALL: callq __gnu_h2f_ieee -; CHECK-F16C: vcvtph2ps -; CHECK-F16C: vcvtph2ps -; CHECK-F16C: vcvtph2ps -; CHECK-F16C: vcvtph2ps +; CHECK-LIBCALL-LABEL: test_extend32_vec4: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rbx +; CHECK-LIBCALL-NEXT: subq $48, %rsp +; CHECK-LIBCALL-NEXT: movq %rdi, %rbx +; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-LIBCALL-NEXT: movzwl 4(%rbx), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-LIBCALL-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-LIBCALL-NEXT: insertps $32, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0,1],mem[0],xmm1[3] +; CHECK-LIBCALL-NEXT: insertps $48, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload +; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0,1,2],mem[0] +; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0 +; CHECK-LIBCALL-NEXT: addq $48, %rsp +; CHECK-LIBCALL-NEXT: popq %rbx +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_extend32_vec4: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: movswl 6(%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-F16C-NEXT: movswl 4(%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm1 +; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 +; CHECK-F16C-NEXT: movswl (%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm2 +; CHECK-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 +; CHECK-F16C-NEXT: movswl 2(%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm3 +; CHECK-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 +; CHECK-F16C-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] +; CHECK-F16C-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] +; CHECK-F16C-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_extend32_vec4: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: pushl %esi +; CHECK-I686-NEXT: subl $56, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-I686-NEXT: movzwl 2(%esi), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill +; CHECK-I686-NEXT: movzwl 4(%esi), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill +; CHECK-I686-NEXT: movzwl 6(%esi), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: movzwl (%esi), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload +; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload +; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-I686-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-I686-NEXT: addl $56, %esp +; CHECK-I686-NEXT: popl %esi +; CHECK-I686-NEXT: retl %a = load <4 x half>, <4 x half>* %p, align 8 %b = fpext <4 x half> %a to <4 x float> ret <4 x float> %b } define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { -; CHECK-LABEL: test_extend64_vec4 - -; CHECK-LIBCALL: callq __gnu_h2f_ieee -; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee -; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee -; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee -; CHECK-LIBCALL-DAG: cvtss2sd -; CHECK-LIBCALL-DAG: cvtss2sd -; CHECK-LIBCALL-DAG: cvtss2sd -; CHECK-LIBCALL: cvtss2sd -; CHECK-F16C: vcvtph2ps -; CHECK-F16C-DAG: vcvtph2ps -; CHECK-F16C-DAG: vcvtph2ps -; CHECK-F16C-DAG: vcvtph2ps -; CHECK-F16C-DAG: vcvtss2sd -; CHECK-F16C-DAG: vcvtss2sd -; CHECK-F16C-DAG: vcvtss2sd -; CHECK-F16C: vcvtss2sd +; CHECK-LIBCALL-LABEL: test_extend64_vec4: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rbx +; CHECK-LIBCALL-NEXT: subq $16, %rsp +; CHECK-LIBCALL-NEXT: movq %rdi, %rbx +; CHECK-LIBCALL-NEXT: movzwl 4(%rbx), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1 +; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload +; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 +; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload +; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm2 +; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload +; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero +; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm1 +; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-LIBCALL-NEXT: addq $16, %rsp +; CHECK-LIBCALL-NEXT: popq %rbx +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_extend64_vec4: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: movswl (%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-F16C-NEXT: movswl 2(%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm1 +; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 +; CHECK-F16C-NEXT: movswl 4(%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm2 +; CHECK-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 +; CHECK-F16C-NEXT: movswl 6(%rdi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm3 +; CHECK-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 +; CHECK-F16C-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 +; CHECK-F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 +; CHECK-F16C-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; CHECK-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 +; CHECK-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; CHECK-F16C-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_extend64_vec4: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: pushl %esi +; CHECK-I686-NEXT: subl $88, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi +; CHECK-I686-NEXT: movzwl 6(%esi), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill +; CHECK-I686-NEXT: movzwl 4(%esi), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill +; CHECK-I686-NEXT: movzwl 2(%esi), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill +; CHECK-I686-NEXT: movzwl (%esi), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload +; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload +; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload +; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-I686-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] +; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-I686-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] +; CHECK-I686-NEXT: addl $88, %esp +; CHECK-I686-NEXT: popl %esi +; CHECK-I686-NEXT: retl %a = load <4 x half>, <4 x half>* %p, align 8 %b = fpext <4 x half> %a to <4 x double> ret <4 x double> %b } -define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) { -; CHECK-LABEL: test_trunc32_vec4: - -; CHECK-LIBCALL: callq __gnu_f2h_ieee -; CHECK-LIBCALL: callq __gnu_f2h_ieee -; CHECK-LIBCALL: callq __gnu_f2h_ieee -; CHECK-LIBCALL: callq __gnu_f2h_ieee -; CHECK-F16C: vcvtps2ph -; CHECK-F16C: vcvtps2ph -; CHECK-F16C: vcvtps2ph -; CHECK-F16C: vcvtps2ph -; CHECK: movw -; CHECK: movw -; CHECK: movw -; CHECK: movw +define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { +; NOF16-BWINSTS-LABEL: test_trunc32_vec4: +; NOF16-BWINSTS: # BB#0: +; NOF16-BWINSTS-NEXT: pushq %rbp +; NOF16-BWINSTS-NEXT: pushq %r15 +; NOF16-BWINSTS-NEXT: pushq %r14 +; NOF16-BWINSTS-NEXT: pushq %rbx +; NOF16-BWINSTS-NEXT: subq $24, %rsp +; NOF16-BWINSTS-NEXT: movq %rdi, %rbx +; NOF16-BWINSTS-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NOF16-BWINSTS-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; NOF16-BWINSTS-NEXT: callq __gnu_f2h_ieee +; NOF16-BWINSTS-NEXT: movl %eax, %r14d +; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; NOF16-BWINSTS-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; NOF16-BWINSTS-NEXT: callq __gnu_f2h_ieee +; NOF16-BWINSTS-NEXT: movl %eax, %r15d +; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; NOF16-BWINSTS-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; NOF16-BWINSTS-NEXT: callq __gnu_f2h_ieee +; NOF16-BWINSTS-NEXT: movl %eax, %ebp +; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; NOF16-BWINSTS-NEXT: callq __gnu_f2h_ieee +; NOF16-BWINSTS-NEXT: movw %ax, (%rbx) +; NOF16-BWINSTS-NEXT: movw %bp, 6(%rbx) +; NOF16-BWINSTS-NEXT: movw %r15w, 4(%rbx) +; NOF16-BWINSTS-NEXT: movw %r14w, 2(%rbx) +; NOF16-BWINSTS-NEXT: addq $24, %rsp +; NOF16-BWINSTS-NEXT: popq %rbx +; NOF16-BWINSTS-NEXT: popq %r14 +; NOF16-BWINSTS-NEXT: popq %r15 +; NOF16-BWINSTS-NEXT: popq %rbp +; NOF16-BWINSTS-NEXT: retq +; +; BWOFF-LABEL: test_trunc32_vec4: +; BWOFF: # BB#0: +; BWOFF-NEXT: pushq %rbp +; BWOFF-NEXT: pushq %r15 +; BWOFF-NEXT: pushq %r14 +; BWOFF-NEXT: pushq %rbx +; BWOFF-NEXT: subq $24, %rsp +; BWOFF-NEXT: movq %rdi, %rbx +; BWOFF-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; BWOFF-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; BWOFF-NEXT: callq __gnu_f2h_ieee +; BWOFF-NEXT: movw %ax, %r14w +; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; BWOFF-NEXT: callq __gnu_f2h_ieee +; BWOFF-NEXT: movw %ax, %r15w +; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; BWOFF-NEXT: callq __gnu_f2h_ieee +; BWOFF-NEXT: movw %ax, %bp +; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; BWOFF-NEXT: callq __gnu_f2h_ieee +; BWOFF-NEXT: movw %ax, (%rbx) +; BWOFF-NEXT: movw %bp, 6(%rbx) +; BWOFF-NEXT: movw %r15w, 4(%rbx) +; BWOFF-NEXT: movw %r14w, 2(%rbx) +; BWOFF-NEXT: addq $24, %rsp +; BWOFF-NEXT: popq %rbx +; BWOFF-NEXT: popq %r14 +; BWOFF-NEXT: popq %r15 +; BWOFF-NEXT: popq %rbp +; BWOFF-NEXT: retq +; +; CHECK-F16C-LABEL: test_trunc32_vec4: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 +; CHECK-F16C-NEXT: vmovd %xmm1, %eax +; CHECK-F16C-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 +; CHECK-F16C-NEXT: vmovd %xmm1, %ecx +; CHECK-F16C-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 +; CHECK-F16C-NEXT: vmovd %xmm1, %edx +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-F16C-NEXT: vmovd %xmm0, %esi +; CHECK-F16C-NEXT: movw %si, (%rdi) +; CHECK-F16C-NEXT: movw %dx, 6(%rdi) +; CHECK-F16C-NEXT: movw %cx, 4(%rdi) +; CHECK-F16C-NEXT: movw %ax, 2(%rdi) +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_trunc32_vec4: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: pushl %ebp +; CHECK-I686-NEXT: pushl %ebx +; CHECK-I686-NEXT: pushl %edi +; CHECK-I686-NEXT: pushl %esi +; CHECK-I686-NEXT: subl $44, %esp +; CHECK-I686-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) # 16-byte Spill +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-I686-NEXT: movaps %xmm0, %xmm1 +; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; CHECK-I686-NEXT: movss %xmm1, (%esp) +; CHECK-I686-NEXT: calll __gnu_f2h_ieee +; CHECK-I686-NEXT: movw %ax, %si +; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload +; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-I686-NEXT: movss %xmm0, (%esp) +; CHECK-I686-NEXT: calll __gnu_f2h_ieee +; CHECK-I686-NEXT: movw %ax, %di +; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload +; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; CHECK-I686-NEXT: movss %xmm0, (%esp) +; CHECK-I686-NEXT: calll __gnu_f2h_ieee +; CHECK-I686-NEXT: movw %ax, %bx +; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload +; CHECK-I686-NEXT: movss %xmm0, (%esp) +; CHECK-I686-NEXT: calll __gnu_f2h_ieee +; CHECK-I686-NEXT: movw %ax, (%ebp) +; CHECK-I686-NEXT: movw %bx, 6(%ebp) +; CHECK-I686-NEXT: movw %di, 4(%ebp) +; CHECK-I686-NEXT: movw %si, 2(%ebp) +; CHECK-I686-NEXT: addl $44, %esp +; CHECK-I686-NEXT: popl %esi +; CHECK-I686-NEXT: popl %edi +; CHECK-I686-NEXT: popl %ebx +; CHECK-I686-NEXT: popl %ebp +; CHECK-I686-NEXT: retl %v = fptrunc <4 x float> %a to <4 x half> store <4 x half> %v, <4 x half>* %p ret void } -define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) { -; CHECK-LABEL: test_trunc64_vec4: -; CHECK: callq __truncdfhf2 -; CHECK: callq __truncdfhf2 -; CHECK: callq __truncdfhf2 -; CHECK: callq __truncdfhf2 -; CHECK: movw -; CHECK: movw -; CHECK: movw -; CHECK: movw +define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 { +; NOF16-BWINSTS-LABEL: test_trunc64_vec4: +; NOF16-BWINSTS: # BB#0: +; NOF16-BWINSTS-NEXT: pushq %rbp +; NOF16-BWINSTS-NEXT: pushq %r15 +; NOF16-BWINSTS-NEXT: pushq %r14 +; NOF16-BWINSTS-NEXT: pushq %rbx +; NOF16-BWINSTS-NEXT: subq $40, %rsp +; NOF16-BWINSTS-NEXT: movq %rdi, %rbx +; NOF16-BWINSTS-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; NOF16-BWINSTS-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; NOF16-BWINSTS-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; NOF16-BWINSTS-NEXT: callq __truncdfhf2 +; NOF16-BWINSTS-NEXT: movl %eax, %r14d +; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; NOF16-BWINSTS-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; NOF16-BWINSTS-NEXT: callq __truncdfhf2 +; NOF16-BWINSTS-NEXT: movl %eax, %r15d +; NOF16-BWINSTS-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; NOF16-BWINSTS-NEXT: callq __truncdfhf2 +; NOF16-BWINSTS-NEXT: movl %eax, %ebp +; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; NOF16-BWINSTS-NEXT: callq __truncdfhf2 +; NOF16-BWINSTS-NEXT: movw %ax, 4(%rbx) +; NOF16-BWINSTS-NEXT: movw %bp, (%rbx) +; NOF16-BWINSTS-NEXT: movw %r15w, 6(%rbx) +; NOF16-BWINSTS-NEXT: movw %r14w, 2(%rbx) +; NOF16-BWINSTS-NEXT: addq $40, %rsp +; NOF16-BWINSTS-NEXT: popq %rbx +; NOF16-BWINSTS-NEXT: popq %r14 +; NOF16-BWINSTS-NEXT: popq %r15 +; NOF16-BWINSTS-NEXT: popq %rbp +; NOF16-BWINSTS-NEXT: retq +; +; BWOFF-LABEL: test_trunc64_vec4: +; BWOFF: # BB#0: +; BWOFF-NEXT: pushq %rbp +; BWOFF-NEXT: pushq %r15 +; BWOFF-NEXT: pushq %r14 +; BWOFF-NEXT: pushq %rbx +; BWOFF-NEXT: subq $40, %rsp +; BWOFF-NEXT: movq %rdi, %rbx +; BWOFF-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; BWOFF-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; BWOFF-NEXT: callq __truncdfhf2 +; BWOFF-NEXT: movw %ax, %r14w +; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; BWOFF-NEXT: callq __truncdfhf2 +; BWOFF-NEXT: movw %ax, %r15w +; BWOFF-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; BWOFF-NEXT: callq __truncdfhf2 +; BWOFF-NEXT: movw %ax, %bp +; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; BWOFF-NEXT: callq __truncdfhf2 +; BWOFF-NEXT: movw %ax, 4(%rbx) +; BWOFF-NEXT: movw %bp, (%rbx) +; BWOFF-NEXT: movw %r15w, 6(%rbx) +; BWOFF-NEXT: movw %r14w, 2(%rbx) +; BWOFF-NEXT: addq $40, %rsp +; BWOFF-NEXT: popq %rbx +; BWOFF-NEXT: popq %r14 +; BWOFF-NEXT: popq %r15 +; BWOFF-NEXT: popq %rbp +; BWOFF-NEXT: retq +; +; CHECK-F16C-LABEL: test_trunc64_vec4: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: pushq %rbp +; CHECK-F16C-NEXT: pushq %r15 +; CHECK-F16C-NEXT: pushq %r14 +; CHECK-F16C-NEXT: pushq %rbx +; CHECK-F16C-NEXT: subq $88, %rsp +; CHECK-F16C-NEXT: movq %rdi, %rbx +; CHECK-F16C-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill +; CHECK-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; CHECK-F16C-NEXT: vzeroupper +; CHECK-F16C-NEXT: callq __truncdfhf2 +; CHECK-F16C-NEXT: movl %eax, %r14d +; CHECK-F16C-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; CHECK-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-F16C-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; CHECK-F16C-NEXT: vzeroupper +; CHECK-F16C-NEXT: callq __truncdfhf2 +; CHECK-F16C-NEXT: movl %eax, %r15d +; CHECK-F16C-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; CHECK-F16C-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; CHECK-F16C-NEXT: vzeroupper +; CHECK-F16C-NEXT: callq __truncdfhf2 +; CHECK-F16C-NEXT: movl %eax, %ebp +; CHECK-F16C-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-F16C-NEXT: callq __truncdfhf2 +; CHECK-F16C-NEXT: movw %ax, 4(%rbx) +; CHECK-F16C-NEXT: movw %bp, (%rbx) +; CHECK-F16C-NEXT: movw %r15w, 6(%rbx) +; CHECK-F16C-NEXT: movw %r14w, 2(%rbx) +; CHECK-F16C-NEXT: addq $88, %rsp +; CHECK-F16C-NEXT: popq %rbx +; CHECK-F16C-NEXT: popq %r14 +; CHECK-F16C-NEXT: popq %r15 +; CHECK-F16C-NEXT: popq %rbp +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_trunc64_vec4: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: pushl %ebp +; CHECK-I686-NEXT: pushl %ebx +; CHECK-I686-NEXT: pushl %edi +; CHECK-I686-NEXT: pushl %esi +; CHECK-I686-NEXT: subl $60, %esp +; CHECK-I686-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) # 16-byte Spill +; CHECK-I686-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) # 16-byte Spill +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; CHECK-I686-NEXT: movlps %xmm0, (%esp) +; CHECK-I686-NEXT: calll __truncdfhf2 +; CHECK-I686-NEXT: movw %ax, %si +; CHECK-I686-NEXT: movapd {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload +; CHECK-I686-NEXT: movhpd %xmm0, (%esp) +; CHECK-I686-NEXT: calll __truncdfhf2 +; CHECK-I686-NEXT: movw %ax, %di +; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload +; CHECK-I686-NEXT: movlps %xmm0, (%esp) +; CHECK-I686-NEXT: calll __truncdfhf2 +; CHECK-I686-NEXT: movw %ax, %bx +; CHECK-I686-NEXT: movapd {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload +; CHECK-I686-NEXT: movhpd %xmm0, (%esp) +; CHECK-I686-NEXT: calll __truncdfhf2 +; CHECK-I686-NEXT: movw %ax, 6(%ebp) +; CHECK-I686-NEXT: movw %bx, 4(%ebp) +; CHECK-I686-NEXT: movw %di, 2(%ebp) +; CHECK-I686-NEXT: movw %si, (%ebp) +; CHECK-I686-NEXT: addl $60, %esp +; CHECK-I686-NEXT: popl %esi +; CHECK-I686-NEXT: popl %edi +; CHECK-I686-NEXT: popl %ebx +; CHECK-I686-NEXT: popl %ebp +; CHECK-I686-NEXT: retl %v = fptrunc <4 x double> %a to <4 x half> store <4 x half> %v, <4 x half>* %p ret void @@ -268,44 +836,99 @@ declare float @test_floatret(); -; On i686, if SSE2 is available, the return value from test_floatret is loaded -; to f80 and then rounded to f32. The DAG combiner should not combine this -; fp_round and the subsequent fptrunc from float to half. define half @test_f80trunc_nodagcombine() #0 { -; CHECK-LABEL: test_f80trunc_nodagcombine: -; CHECK-I686-NOT: calll __truncxfhf2 +; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rax +; CHECK-LIBCALL-NEXT: callq test_floatret +; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee +; CHECK-LIBCALL-NEXT: movzwl %ax, %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: popq %rax +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_f80trunc_nodagcombine: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: pushq %rax +; CHECK-F16C-NEXT: callq test_floatret +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-F16C-NEXT: popq %rax +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_f80trunc_nodagcombine: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: subl $12, %esp +; CHECK-I686-NEXT: calll test_floatret +; CHECK-I686-NEXT: fstps (%esp) +; CHECK-I686-NEXT: calll __gnu_f2h_ieee +; CHECK-I686-NEXT: movzwl %ax, %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: addl $12, %esp +; CHECK-I686-NEXT: retl %1 = call float @test_floatret() %2 = fptrunc float %1 to half ret half %2 } -; CHECK-LABEL: test_sitofp_fadd_i32: -; CHECK-LIBCALL-NEXT: pushq %rbx -; CHECK-LIBCALL-NEXT: subq $16, %rsp -; CHECK-LIBCALL-NEXT: movl %edi, %ebx -; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movss %xmm0, 12(%rsp) -; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0 -; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee -; CHECK-LIBCALL-NEXT: movzwl %ax, %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: addss 12(%rsp), %xmm0 -; CHECK-LIBCALL-NEXT: addq $16, %rsp -; CHECK-LIBCALL-NEXT: popq %rbx -; CHECK-LIBCALL-NEXT: retq - -; CHECK-F16C-NEXT: movswl (%rsi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 -; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-F16C-NEXT: vcvtsi2ssl %edi, %xmm1, %xmm1 -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 -; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-F16C-NEXT: retq + define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { +; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32: +; CHECK-LIBCALL: # BB#0: +; CHECK-LIBCALL-NEXT: pushq %rbx +; CHECK-LIBCALL-NEXT: subq $16, %rsp +; CHECK-LIBCALL-NEXT: movl %edi, %ebx +; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill +; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0 +; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee +; CHECK-LIBCALL-NEXT: movzwl %ax, %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: addss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload +; CHECK-LIBCALL-NEXT: addq $16, %rsp +; CHECK-LIBCALL-NEXT: popq %rbx +; CHECK-LIBCALL-NEXT: retq +; +; CHECK-F16C-LABEL: test_sitofp_fadd_i32: +; CHECK-F16C: # BB#0: +; CHECK-F16C-NEXT: movswl (%rsi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vcvtsi2ssl %edi, %xmm1, %xmm1 +; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 +; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 +; CHECK-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-F16C-NEXT: retq +; +; CHECK-I686-LABEL: test_sitofp_fadd_i32: +; CHECK-I686: # BB#0: +; CHECK-I686-NEXT: subl $28, %esp +; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-I686-NEXT: movzwl (%eax), %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-I686-NEXT: xorps %xmm0, %xmm0 +; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 +; CHECK-I686-NEXT: movss %xmm0, (%esp) +; CHECK-I686-NEXT: calll __gnu_f2h_ieee +; CHECK-I686-NEXT: movzwl %ax, %eax +; CHECK-I686-NEXT: movl %eax, (%esp) +; CHECK-I686-NEXT: calll __gnu_h2f_ieee +; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # 4-byte Reload +; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0 +; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-I686-NEXT: addl $28, %esp +; CHECK-I686-NEXT: retl %tmp0 = load half, half* %b %tmp1 = sitofp i32 %a to half %tmp2 = fadd half %tmp0, %tmp1 Index: llvm/trunk/test/CodeGen/X86/illegal-bitfield-loadstore.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ llvm/trunk/test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -112,23 +112,23 @@ define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { ; CHECK-LABEL: i56_insert_bit: ; CHECK: # BB#0: -; CHECK-NEXT: movzbl %sil, %eax -; CHECK-NEXT: movzwl 4(%rdi), %ecx -; CHECK-NEXT: movzbl 6(%rdi), %edx -; CHECK-NEXT: movl (%rdi), %esi -; CHECK-NEXT: movb %dl, 6(%rdi) -; CHECK-NEXT: # kill: %EDX %EDX %RDX %RDX -; CHECK-NEXT: shll $16, %edx -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: shlq $32, %rdx -; CHECK-NEXT: orq %rdx, %rsi -; CHECK-NEXT: shlq $13, %rax -; CHECK-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF -; CHECK-NEXT: andq %rsi, %rcx -; CHECK-NEXT: orq %rax, %rcx -; CHECK-NEXT: movl %ecx, (%rdi) -; CHECK-NEXT: shrq $32, %rcx -; CHECK-NEXT: movw %cx, 4(%rdi) +; CHECK-NEXT: movzwl 4(%rdi), %eax +; CHECK-NEXT: movzbl 6(%rdi), %ecx +; CHECK-NEXT: movl (%rdi), %edx +; CHECK-NEXT: movb %cl, 6(%rdi) +; CHECK-NEXT: movzbl %sil, %esi +; CHECK-NEXT: # kill: %ECX %ECX %RCX %RCX +; CHECK-NEXT: shll $16, %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: shlq $32, %rcx +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: shlq $13, %rsi +; CHECK-NEXT: movabsq $72057594037919743, %rax # imm = 0xFFFFFFFFFFDFFF +; CHECK-NEXT: andq %rdx, %rax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: shrq $32, %rax +; CHECK-NEXT: movw %ax, 4(%rdi) ; CHECK-NEXT: retq %extbit = zext i1 %bit to i56 %b = load i56, i56* %a, align 1 Index: llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll +++ llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll @@ -17,7 +17,7 @@ ; X64-HSW-LABEL: test_mul_by_1: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_1: ; X64-JAG: # BB#0: @@ -32,7 +32,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_1: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_1: ; JAG-NOOPT: # BB#0: @@ -63,7 +63,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_2: ; X64-JAG: # BB#0: @@ -81,7 +81,7 @@ ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_2: ; JAG-NOOPT: # BB#0: @@ -114,7 +114,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_3: ; X64-JAG: # BB#0: @@ -131,7 +131,7 @@ ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_3: ; JAG-NOOPT: # BB#0: @@ -165,7 +165,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_4: ; X64-JAG: # BB#0: @@ -183,7 +183,7 @@ ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_4: ; JAG-NOOPT: # BB#0: @@ -216,7 +216,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_5: ; X64-JAG: # BB#0: @@ -233,7 +233,7 @@ ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_5: ; JAG-NOOPT: # BB#0: @@ -269,7 +269,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_6: ; X64-JAG: # BB#0: @@ -285,8 +285,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_6: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_6: ; JAG-NOOPT: # BB#0: @@ -321,7 +321,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_7: ; X64-JAG: # BB#0: @@ -337,8 +337,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_7: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_7: ; JAG-NOOPT: # BB#0: @@ -371,7 +371,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_8: ; X64-JAG: # BB#0: @@ -389,7 +389,7 @@ ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_8: ; JAG-NOOPT: # BB#0: @@ -422,7 +422,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_9: ; X64-JAG: # BB#0: @@ -439,7 +439,7 @@ ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_9: ; JAG-NOOPT: # BB#0: @@ -475,7 +475,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_10: ; X64-JAG: # BB#0: @@ -491,8 +491,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_10: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_10: ; JAG-NOOPT: # BB#0: @@ -527,7 +527,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_11: ; X64-JAG: # BB#0: @@ -543,8 +543,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_11: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_11: ; JAG-NOOPT: # BB#0: @@ -575,9 +575,9 @@ ; X64-HSW-LABEL: test_mul_by_12: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI -; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-HSW-NEXT: shll $2, %edi # sched: [1:1.00] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_12: ; X64-JAG: # BB#0: @@ -593,8 +593,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_12: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_12: ; JAG-NOOPT: # BB#0: @@ -629,7 +629,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_13: ; X64-JAG: # BB#0: @@ -645,8 +645,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_13: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_13: ; JAG-NOOPT: # BB#0: @@ -681,7 +681,7 @@ ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG: # BB#0: @@ -698,8 +698,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_14: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_14: ; JAG-NOOPT: # BB#0: @@ -732,7 +732,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_15: ; X64-JAG: # BB#0: @@ -748,8 +748,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_15: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_15: ; JAG-NOOPT: # BB#0: @@ -780,9 +780,9 @@ ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] +; X64-HSW-NEXT: shll $4, %edi # sched: [1:1.00] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # BB#0: @@ -798,9 +798,9 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] +; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # BB#0: @@ -836,9 +836,9 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $4, %eax # sched: [1:1.00] ; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_17: ; X64-JAG: # BB#0: @@ -855,8 +855,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_17: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_17: ; JAG-NOOPT: # BB#0: @@ -892,7 +892,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_18: ; X64-JAG: # BB#0: @@ -908,8 +908,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_18: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_18: ; JAG-NOOPT: # BB#0: @@ -944,9 +944,9 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $2, %eax # sched: [1:1.00] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG: # BB#0: @@ -963,8 +963,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_19: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_19: ; JAG-NOOPT: # BB#0: @@ -995,9 +995,9 @@ ; X64-HSW-LABEL: test_mul_by_20: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI -; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] +; X64-HSW-NEXT: shll $2, %edi # sched: [1:1.00] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_20: ; X64-JAG: # BB#0: @@ -1013,8 +1013,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_20: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_20: ; JAG-NOOPT: # BB#0: @@ -1049,7 +1049,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_21: ; X64-JAG: # BB#0: @@ -1065,8 +1065,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_21: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_21: ; JAG-NOOPT: # BB#0: @@ -1101,7 +1101,7 @@ ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG: # BB#0: @@ -1118,8 +1118,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_22: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_22: ; JAG-NOOPT: # BB#0: @@ -1152,9 +1152,9 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $3, %eax # sched: [1:1.00] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_23: ; X64-JAG: # BB#0: @@ -1171,8 +1171,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_23: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_23: ; JAG-NOOPT: # BB#0: @@ -1203,9 +1203,9 @@ ; X64-HSW-LABEL: test_mul_by_24: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI -; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50] +; X64-HSW-NEXT: shll $3, %edi # sched: [1:1.00] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_24: ; X64-JAG: # BB#0: @@ -1221,8 +1221,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_24: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_24: ; JAG-NOOPT: # BB#0: @@ -1257,7 +1257,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_25: ; X64-JAG: # BB#0: @@ -1273,8 +1273,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_25: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_25: ; JAG-NOOPT: # BB#0: @@ -1311,7 +1311,7 @@ ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_26: ; X64-JAG: # BB#0: @@ -1328,8 +1328,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_26: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_26: ; JAG-NOOPT: # BB#0: @@ -1362,7 +1362,7 @@ ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_27: ; X64-JAG: # BB#0: @@ -1378,8 +1378,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_27: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_27: ; JAG-NOOPT: # BB#0: @@ -1416,7 +1416,7 @@ ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_28: ; X64-JAG: # BB#0: @@ -1433,8 +1433,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_28: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_28: ; JAG-NOOPT: # BB#0: @@ -1471,7 +1471,7 @@ ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # BB#0: @@ -1489,8 +1489,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_29: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_29: ; JAG-NOOPT: # BB#0: @@ -1523,10 +1523,10 @@ ; X64-HSW-LABEL: test_mul_by_30: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:1.00] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_30: ; X64-JAG: # BB#0: @@ -1543,8 +1543,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_30: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_30: ; JAG-NOOPT: # BB#0: @@ -1576,9 +1576,9 @@ ; X64-HSW-LABEL: test_mul_by_31: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:1.00] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_31: ; X64-JAG: # BB#0: @@ -1594,8 +1594,8 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_31: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_31: ; JAG-NOOPT: # BB#0: @@ -1626,9 +1626,9 @@ ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] +; X64-HSW-NEXT: shll $5, %edi # sched: [1:1.00] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # BB#0: @@ -1644,9 +1644,9 @@ ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] +; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # BB#0: @@ -1686,8 +1686,8 @@ ; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25] -; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: imull %ecx, %eax # sched: [3:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_spec: ; X64-JAG: # BB#0: @@ -1712,8 +1712,8 @@ ; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25] ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_spec: ; JAG-NOOPT: # BB#0: Index: llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll +++ llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll @@ -18,7 +18,7 @@ ; X64-HSW-LABEL: test_mul_by_1: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_1: ; X64-JAG: # BB#0: @@ -34,7 +34,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_1: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_1: ; JAG-NOOPT: # BB#0: @@ -66,7 +66,7 @@ ; X64-HSW-LABEL: test_mul_by_2: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_2: ; X64-JAG: # BB#0: @@ -84,7 +84,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_2: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_2: ; JAG-NOOPT: # BB#0: @@ -116,7 +116,7 @@ ; X64-HSW-LABEL: test_mul_by_3: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_3: ; X64-JAG: # BB#0: @@ -134,7 +134,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_3: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_3: ; JAG-NOOPT: # BB#0: @@ -166,7 +166,7 @@ ; X64-HSW-LABEL: test_mul_by_4: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_4: ; X64-JAG: # BB#0: @@ -184,7 +184,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_4: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_4: ; JAG-NOOPT: # BB#0: @@ -216,7 +216,7 @@ ; X64-HSW-LABEL: test_mul_by_5: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_5: ; X64-JAG: # BB#0: @@ -234,7 +234,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_5: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_5: ; JAG-NOOPT: # BB#0: @@ -268,7 +268,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_6: ; X64-JAG: # BB#0: @@ -287,7 +287,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_6: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_6: ; JAG-NOOPT: # BB#0: @@ -323,7 +323,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_7: ; X64-JAG: # BB#0: @@ -342,7 +342,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_7: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_7: ; JAG-NOOPT: # BB#0: @@ -375,7 +375,7 @@ ; X64-HSW-LABEL: test_mul_by_8: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_8: ; X64-JAG: # BB#0: @@ -393,7 +393,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_8: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_8: ; JAG-NOOPT: # BB#0: @@ -425,7 +425,7 @@ ; X64-HSW-LABEL: test_mul_by_9: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_9: ; X64-JAG: # BB#0: @@ -443,7 +443,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_9: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_9: ; JAG-NOOPT: # BB#0: @@ -477,7 +477,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_10: ; X64-JAG: # BB#0: @@ -496,7 +496,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_10: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_10: ; JAG-NOOPT: # BB#0: @@ -532,7 +532,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_11: ; X64-JAG: # BB#0: @@ -551,7 +551,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_11: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_11: ; JAG-NOOPT: # BB#0: @@ -585,7 +585,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_12: ; X64-JAG: # BB#0: @@ -604,7 +604,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_12: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_12: ; JAG-NOOPT: # BB#0: @@ -640,7 +640,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_13: ; X64-JAG: # BB#0: @@ -659,7 +659,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_13: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_13: ; JAG-NOOPT: # BB#0: @@ -696,7 +696,7 @@ ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG: # BB#0: @@ -716,7 +716,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_14: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_14: ; JAG-NOOPT: # BB#0: @@ -751,7 +751,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_15: ; X64-JAG: # BB#0: @@ -770,7 +770,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_15: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_15: ; JAG-NOOPT: # BB#0: @@ -804,7 +804,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # BB#0: @@ -824,7 +824,7 @@ ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # BB#0: @@ -864,7 +864,7 @@ ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_17: ; X64-JAG: # BB#0: @@ -884,7 +884,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_17: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_17: ; JAG-NOOPT: # BB#0: @@ -920,7 +920,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_18: ; X64-JAG: # BB#0: @@ -939,7 +939,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_18: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_18: ; JAG-NOOPT: # BB#0: @@ -977,7 +977,7 @@ ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG: # BB#0: @@ -997,7 +997,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_19: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_19: ; JAG-NOOPT: # BB#0: @@ -1031,7 +1031,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_20: ; X64-JAG: # BB#0: @@ -1050,7 +1050,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_20: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_20: ; JAG-NOOPT: # BB#0: @@ -1086,7 +1086,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_21: ; X64-JAG: # BB#0: @@ -1105,7 +1105,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_21: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_21: ; JAG-NOOPT: # BB#0: @@ -1142,7 +1142,7 @@ ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG: # BB#0: @@ -1162,7 +1162,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_22: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_22: ; JAG-NOOPT: # BB#0: @@ -1199,7 +1199,7 @@ ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_23: ; X64-JAG: # BB#0: @@ -1219,7 +1219,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_23: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_23: ; JAG-NOOPT: # BB#0: @@ -1253,7 +1253,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_24: ; X64-JAG: # BB#0: @@ -1272,7 +1272,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_24: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_24: ; JAG-NOOPT: # BB#0: @@ -1308,7 +1308,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_25: ; X64-JAG: # BB#0: @@ -1327,7 +1327,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_25: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_25: ; JAG-NOOPT: # BB#0: @@ -1365,7 +1365,7 @@ ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_26: ; X64-JAG: # BB#0: @@ -1385,7 +1385,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_26: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_26: ; JAG-NOOPT: # BB#0: @@ -1420,7 +1420,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_27: ; X64-JAG: # BB#0: @@ -1439,7 +1439,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_27: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_27: ; JAG-NOOPT: # BB#0: @@ -1477,7 +1477,7 @@ ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_28: ; X64-JAG: # BB#0: @@ -1497,7 +1497,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_28: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_28: ; JAG-NOOPT: # BB#0: @@ -1536,7 +1536,7 @@ ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # BB#0: @@ -1557,7 +1557,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_29: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_29: ; JAG-NOOPT: # BB#0: @@ -1596,7 +1596,7 @@ ; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_30: ; X64-JAG: # BB#0: @@ -1617,7 +1617,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_30: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_30: ; JAG-NOOPT: # BB#0: @@ -1654,7 +1654,7 @@ ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_31: ; X64-JAG: # BB#0: @@ -1674,7 +1674,7 @@ ; HSW-NOOPT-LABEL: test_mul_by_31: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_31: ; JAG-NOOPT: # BB#0: @@ -1709,7 +1709,7 @@ ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # BB#0: @@ -1729,7 +1729,7 @@ ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # BB#0: @@ -1793,7 +1793,7 @@ ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25] ; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00] -; X64-HSW-NEXT: retq # sched: [1:1.00] +; X64-HSW-NEXT: retq # sched: [2:1.00] ; ; X64-JAG-LABEL: test_mul_spec: ; X64-JAG: # BB#0: @@ -1841,7 +1841,7 @@ ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25] ; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [1:1.00] +; HSW-NOOPT-NEXT: retq # sched: [2:1.00] ; ; JAG-NOOPT-LABEL: test_mul_spec: ; JAG-NOOPT: # BB#0: Index: llvm/trunk/test/CodeGen/X86/pr32329.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32329.ll +++ llvm/trunk/test/CodeGen/X86/pr32329.ll @@ -59,8 +59,8 @@ ; X86-NEXT: cmovnel %ecx, %esi ; X86-NEXT: cmpl %edx, %edi ; X86-NEXT: movl %ebp, var_50+4 -; X86-NEXT: movl %esi, var_50 ; X86-NEXT: setge var_205 +; X86-NEXT: movl %esi, var_50 ; X86-NEXT: imull %eax, %ebx ; X86-NEXT: movb %bl, var_218 ; X86-NEXT: popl %esi Index: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll +++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll @@ -45,15 +45,15 @@ ; ; SANDY-LABEL: f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_no_estimate: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_no_estimate: ; HASWELL-NO-FMA: # BB#0: @@ -63,9 +63,9 @@ ; ; AVX512-LABEL: f32_no_estimate: ; AVX512: # BB#0: -; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; AVX512-NEXT: retq # sched: [1:1.00] +; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00] +; AVX512-NEXT: retq # sched: [2:1.00] %div = fdiv fast float 1.0, %x ret float %div } @@ -113,18 +113,18 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_one_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_one_step: ; HASWELL-NO-FMA: # BB#0: @@ -139,9 +139,9 @@ ; AVX512-LABEL: f32_one_step: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 -; AVX512-NEXT: retq # sched: [1:1.00] +; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: retq # sched: [2:1.00] %div = fdiv fast float 1.0, %x ret float %div } @@ -207,7 +207,7 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -215,18 +215,18 @@ ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_two_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 -; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 -; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_two_step: ; HASWELL-NO-FMA: # BB#0: @@ -245,13 +245,13 @@ ; AVX512-LABEL: f32_two_step: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 -; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 -; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 -; AVX512-NEXT: retq # sched: [1:1.00] +; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: retq # sched: [2:1.00] %div = fdiv fast float 1.0, %x ret float %div } @@ -284,15 +284,15 @@ ; ; SANDY-LABEL: v4f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] -; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_no_estimate: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] -; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_no_estimate: ; HASWELL-NO-FMA: # BB#0: @@ -302,9 +302,9 @@ ; ; AVX512-LABEL: v4f32_no_estimate: ; AVX512: # BB#0: -; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] -; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; AVX512-NEXT: retq # sched: [1:1.00] +; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [?:5.000000e-01] +; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00] +; AVX512-NEXT: retq # sched: [2:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -350,21 +350,21 @@ ; ; SANDY-LABEL: v4f32_one_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_one_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step: ; HASWELL-NO-FMA: # BB#0: @@ -379,17 +379,17 @@ ; KNL-LABEL: v4f32_one_step: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v4f32_one_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -453,9 +453,9 @@ ; ; SANDY-LABEL: v4f32_two_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -463,18 +463,18 @@ ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_two_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 -; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_two_step: ; HASWELL-NO-FMA: # BB#0: @@ -493,24 +493,24 @@ ; KNL-LABEL: v4f32_two_step: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] ; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 -; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v4f32_two_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -546,15 +546,15 @@ ; ; SANDY-LABEL: v8f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] -; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_no_estimate: ; HASWELL: # BB#0: ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] -; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_no_estimate: ; HASWELL-NO-FMA: # BB#0: @@ -565,8 +565,8 @@ ; AVX512-LABEL: v8f32_no_estimate: ; AVX512: # BB#0: ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] -; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] -; AVX512-NEXT: retq # sched: [1:1.00] +; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00] +; AVX512-NEXT: retq # sched: [2:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -621,19 +621,19 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_one_step: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step: ; HASWELL-NO-FMA: # BB#0: @@ -647,18 +647,18 @@ ; ; KNL-LABEL: v8f32_one_step: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v8f32_one_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -737,7 +737,7 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] @@ -745,18 +745,18 @@ ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_two_step: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 -; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_two_step: ; HASWELL-NO-FMA: # BB#0: @@ -774,25 +774,25 @@ ; ; KNL-LABEL: v8f32_two_step: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 -; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v8f32_two_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] +; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } Index: llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll +++ llvm/trunk/test/CodeGen/X86/recip-fastmath2.ll @@ -39,26 +39,26 @@ ; SANDY-LABEL: f32_no_step_2: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_no_step_2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_no_step_2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; AVX512-LABEL: f32_no_step_2: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; AVX512-NEXT: retq # sched: [1:1.00] +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: retq # sched: [2:1.00] %div = fdiv fast float 1234.0, %x ret float %div } @@ -110,39 +110,39 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_one_step_2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_one_step_2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; AVX512-LABEL: f32_one_step_2: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 -; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; AVX512-NEXT: retq # sched: [1:1.00] +; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: retq # sched: [2:1.00] %div = fdiv fast float 3456.0, %x ret float %div } @@ -198,43 +198,43 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_one_step_2_divs: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] +; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] ; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; AVX512-LABEL: f32_one_step_2_divs: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 -; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] +; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: retq # sched: [1:1.00] +; AVX512-NEXT: retq # sched: [2:1.00] %div = fdiv fast float 3456.0, %x %div2 = fdiv fast float %div, %x ret float %div2 @@ -305,7 +305,7 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -313,26 +313,26 @@ ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_two_step_2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 -; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 -; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_two_step_2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -340,20 +340,20 @@ ; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; AVX512-LABEL: f32_two_step_2: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 -; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 -; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 -; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; AVX512-NEXT: retq # sched: [1:1.00] +; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; AVX512-NEXT: retq # sched: [2:1.00] %div = fdiv fast float 6789.0, %x ret float %div } @@ -403,51 +403,51 @@ ; ; SANDY-LABEL: v4f32_one_step2: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_one_step2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; KNL-LABEL: v4f32_one_step2: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v4f32_one_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -501,56 +501,56 @@ ; ; SANDY-LABEL: v4f32_one_step_2_divs: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_one_step_2_divs: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] ; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; KNL-LABEL: v4f32_one_step_2_divs: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] ; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v4f32_one_step_2_divs: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] ; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <4 x float> , %x %div2 = fdiv fast <4 x float> %div, %x ret <4 x float> %div2 @@ -619,9 +619,9 @@ ; ; SANDY-LABEL: v4f32_two_step2: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -629,26 +629,26 @@ ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_two_step2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 -; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_two_step2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [4:0.50] +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [?:5.000000e-01] ; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -656,32 +656,32 @@ ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; KNL-LABEL: v4f32_two_step2: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] ; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 -; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v4f32_two_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] +; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -741,49 +741,49 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_one_step2: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step2: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; KNL-LABEL: v8f32_one_step2: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v8f32_one_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -848,54 +848,54 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:2.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_one_step_2_divs: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] -; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50] +; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; KNL-LABEL: v8f32_one_step_2_divs: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] -; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50] +; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v8f32_one_step_2_divs: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] -; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50] +; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <8 x float> , %x %div2 = fdiv fast <8 x float> %div, %x ret <8 x float> %div2 @@ -980,7 +980,7 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] @@ -988,59 +988,59 @@ ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_two_step2: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 -; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_two_step2: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; KNL-LABEL: v8f32_two_step2: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 -; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v8f32_two_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] +; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -1070,27 +1070,27 @@ ; SANDY-LABEL: v8f32_no_step: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_no_step: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_no_step: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; KNL-LABEL: v8f32_no_step: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v8f32_no_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm0 -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -1125,32 +1125,32 @@ ; SANDY-LABEL: v8f32_no_step2: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_no_step2: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_no_step2: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] ; ; KNL-LABEL: v8f32_no_step2: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; KNL-NEXT: retq # sched: [1:1.00] +; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; KNL-NEXT: retq # sched: [2:1.00] ; ; SKX-LABEL: v8f32_no_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm0 -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [1:1.00] +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] +; SKX-NEXT: retq # sched: [2:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } Index: llvm/trunk/test/CodeGen/X86/sse-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse-schedule.ll @@ -31,14 +31,14 @@ ; SANDY-LABEL: test_addps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addps: ; BTVER2: # BB#0: @@ -73,14 +73,14 @@ ; SANDY-LABEL: test_addss: ; SANDY: # BB#0: ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addss: ; BTVER2: # BB#0: @@ -122,15 +122,15 @@ ; ; SANDY-LABEL: test_andps: ; SANDY: # BB#0: -; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_andps: ; BTVER2: # BB#0: @@ -176,15 +176,15 @@ ; ; SANDY-LABEL: test_andnotps: ; SANDY: # BB#0: -; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # BB#0: @@ -228,16 +228,16 @@ ; SANDY-LABEL: test_cmpps: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: @@ -277,13 +277,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmpss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cmpss: ; BTVER2: # BB#0: @@ -347,30 +347,30 @@ ; SANDY-LABEL: test_comiss: ; SANDY: # BB#0: ; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %cl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %dl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %dl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_comiss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-NEXT: setnp %al # sched: [1:1.00] +; HASWELL-NEXT: sete %cl # sched: [1:1.00] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-NEXT: setnp %al # sched: [1:1.00] +; HASWELL-NEXT: sete %dl # sched: [1:1.00] ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_comiss: ; BTVER2: # BB#0: @@ -417,17 +417,17 @@ ; ; SANDY-LABEL: test_cvtsi2ss: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsi2ss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] ; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtsi2ss: ; BTVER2: # BB#0: @@ -466,17 +466,17 @@ ; ; SANDY-LABEL: test_cvtsi2ssq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsi2ssq: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00] +; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtsi2ssq: ; BTVER2: # BB#0: @@ -515,17 +515,17 @@ ; ; SANDY-LABEL: test_cvtss2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [7:1.00] +; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtss2si: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00] +; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00] ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtss2si: ; BTVER2: # BB#0: @@ -567,17 +567,17 @@ ; ; SANDY-LABEL: test_cvtss2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [7:1.00] +; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtss2siq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00] +; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00] ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtss2siq: ; BTVER2: # BB#0: @@ -619,17 +619,17 @@ ; ; SANDY-LABEL: test_cvttss2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [7:1.00] +; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttss2si: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00] +; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00] ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvttss2si: ; BTVER2: # BB#0: @@ -668,17 +668,17 @@ ; ; SANDY-LABEL: test_cvttss2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [7:1.00] +; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttss2siq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00] +; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00] ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvttss2siq: ; BTVER2: # BB#0: @@ -714,15 +714,15 @@ ; ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: -; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00] +; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_divps: ; BTVER2: # BB#0: @@ -756,15 +756,15 @@ ; ; SANDY-LABEL: test_divss: ; SANDY: # BB#0: -; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00] +; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_divss: ; BTVER2: # BB#0: @@ -799,14 +799,14 @@ ; SANDY-LABEL: test_ldmxcsr: ; SANDY: # BB#0: ; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ldmxcsr: ; HASWELL: # BB#0: ; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [6:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_ldmxcsr: ; BTVER2: # BB#0: @@ -843,14 +843,14 @@ ; SANDY-LABEL: test_maxps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: @@ -886,14 +886,14 @@ ; SANDY-LABEL: test_maxss: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maxss: ; BTVER2: # BB#0: @@ -929,14 +929,14 @@ ; SANDY-LABEL: test_minps: ; SANDY: # BB#0: ; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: @@ -972,14 +972,14 @@ ; SANDY-LABEL: test_minss: ; SANDY: # BB#0: ; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_minss: ; BTVER2: # BB#0: @@ -1017,17 +1017,17 @@ ; ; SANDY-LABEL: test_movaps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movaps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movaps: ; BTVER2: # BB#0: @@ -1068,12 +1068,12 @@ ; SANDY-LABEL: test_movhlps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movhlps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movhlps: ; BTVER2: # BB#0: @@ -1111,17 +1111,17 @@ ; ; SANDY-LABEL: test_movhps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] +; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movhps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] +; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movhps: ; BTVER2: # BB#0: @@ -1164,13 +1164,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movlhps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movlhps: ; BTVER2: # BB#0: @@ -1206,17 +1206,17 @@ ; ; SANDY-LABEL: test_movlps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] +; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movlps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] +; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movlps: ; BTVER2: # BB#0: @@ -1254,13 +1254,13 @@ ; ; SANDY-LABEL: test_movmskps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: @@ -1295,13 +1295,13 @@ ; ; SANDY-LABEL: test_movntps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: @@ -1335,17 +1335,17 @@ ; ; SANDY-LABEL: test_movss_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movss_mem: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movss_mem: ; BTVER2: # BB#0: @@ -1383,13 +1383,13 @@ ; ; SANDY-LABEL: test_movss_reg: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movss_reg: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movss_reg: ; BTVER2: # BB#0: @@ -1423,17 +1423,17 @@ ; ; SANDY-LABEL: test_movups: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movups: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movups: ; BTVER2: # BB#0: @@ -1469,14 +1469,14 @@ ; SANDY-LABEL: test_mulps: ; SANDY: # BB#0: ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_mulps: ; BTVER2: # BB#0: @@ -1511,14 +1511,14 @@ ; SANDY-LABEL: test_mulss: ; SANDY: # BB#0: ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_mulss: ; BTVER2: # BB#0: @@ -1560,15 +1560,15 @@ ; ; SANDY-LABEL: test_orps: ; SANDY: # BB#0: -; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_orps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_orps: ; BTVER2: # BB#0: @@ -1609,13 +1609,13 @@ ; ; SANDY-LABEL: test_prefetchnta: ; SANDY: # BB#0: -; SANDY-NEXT: prefetchnta (%rdi) # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_prefetchnta: ; HASWELL: # BB#0: -; HASWELL-NEXT: prefetchnta (%rdi) # sched: [4:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: prefetchnta (%rdi) # sched: [?:5.000000e-01] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_prefetchnta: ; BTVER2: # BB#0: @@ -1652,17 +1652,17 @@ ; ; SANDY-LABEL: test_rcpps: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [7:3.00] +; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rcpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00] +; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # BB#0: @@ -1708,18 +1708,18 @@ ; SANDY-LABEL: test_rcpss: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rcpss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_rcpss: ; BTVER2: # BB#0: @@ -1765,16 +1765,16 @@ ; SANDY-LABEL: test_rsqrtps: ; SANDY: # BB#0: ; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rsqrtps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00] +; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # BB#0: @@ -1819,19 +1819,19 @@ ; ; SANDY-LABEL: test_rsqrtss: ; SANDY: # BB#0: -; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rsqrtss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_rsqrtss: ; BTVER2: # BB#0: @@ -1875,12 +1875,12 @@ ; SANDY-LABEL: test_sfence: ; SANDY: # BB#0: ; SANDY-NEXT: sfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sfence: ; HASWELL: # BB#0: -; HASWELL-NEXT: sfence # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: sfence # sched: [1:0.33] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_sfence: ; BTVER2: # BB#0: @@ -1917,14 +1917,14 @@ ; SANDY-LABEL: test_shufps: ; SANDY: # BB#0: ; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_shufps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_shufps: ; BTVER2: # BB#0: @@ -1962,17 +1962,17 @@ ; ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00] -; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] +; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # BB#0: @@ -2017,19 +2017,19 @@ ; ; SANDY-LABEL: test_sqrtss: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] -; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00] +; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] +; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00] -; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] -; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] +; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_sqrtss: ; BTVER2: # BB#0: @@ -2067,15 +2067,15 @@ ; ; SANDY-LABEL: test_stmxcsr: ; SANDY: # BB#0: -; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_stmxcsr: ; HASWELL: # BB#0: -; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] -; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [?:5.000000e-01] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_stmxcsr: ; BTVER2: # BB#0: @@ -2112,14 +2112,14 @@ ; SANDY-LABEL: test_subps: ; SANDY: # BB#0: ; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_subps: ; BTVER2: # BB#0: @@ -2154,14 +2154,14 @@ ; SANDY-LABEL: test_subss: ; SANDY: # BB#0: ; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_subss: ; BTVER2: # BB#0: @@ -2220,30 +2220,30 @@ ; SANDY-LABEL: test_ucomiss: ; SANDY: # BB#0: ; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %cl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %dl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %dl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ucomiss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-NEXT: setnp %al # sched: [1:1.00] +; HASWELL-NEXT: sete %cl # sched: [1:1.00] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-NEXT: setnp %al # sched: [1:1.00] +; HASWELL-NEXT: sete %dl # sched: [1:1.00] ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_ucomiss: ; BTVER2: # BB#0: @@ -2292,14 +2292,14 @@ ; SANDY-LABEL: test_unpckhps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpckhps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # BB#0: @@ -2338,14 +2338,14 @@ ; SANDY-LABEL: test_unpcklps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpcklps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # BB#0: @@ -2387,15 +2387,15 @@ ; ; SANDY-LABEL: test_xorps: ; SANDY: # BB#0: -; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_xorps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_xorps: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/sse2-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse2-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse2-schedule.ll @@ -31,14 +31,14 @@ ; SANDY-LABEL: test_addpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addpd: ; BTVER2: # BB#0: @@ -73,14 +73,14 @@ ; SANDY-LABEL: test_addsd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addsd: ; BTVER2: # BB#0: @@ -117,17 +117,17 @@ ; ; SANDY-LABEL: test_andpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_andpd: ; BTVER2: # BB#0: @@ -170,17 +170,17 @@ ; ; SANDY-LABEL: test_andnotpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # BB#0: @@ -226,16 +226,16 @@ ; SANDY-LABEL: test_cmppd: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: @@ -275,13 +275,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmpsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cmpsd: ; BTVER2: # BB#0: @@ -345,30 +345,30 @@ ; SANDY-LABEL: test_comisd: ; SANDY: # BB#0: ; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %cl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %dl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %dl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_comisd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-NEXT: setnp %al # sched: [1:1.00] +; HASWELL-NEXT: sete %cl # sched: [1:1.00] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-NEXT: setnp %al # sched: [1:1.00] +; HASWELL-NEXT: sete %dl # sched: [1:1.00] ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_comisd: ; BTVER2: # BB#0: @@ -416,16 +416,16 @@ ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] +; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [4:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: @@ -467,17 +467,17 @@ ; ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] +; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [3:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: @@ -517,17 +517,17 @@ ; ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] +; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # BB#0: @@ -568,17 +568,17 @@ ; ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] +; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: @@ -620,16 +620,16 @@ ; SANDY-LABEL: test_cvtps2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00] +; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [3:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # BB#0: @@ -670,17 +670,17 @@ ; ; SANDY-LABEL: test_cvtps2pd: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtps2pd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtps2pd: ; BTVER2: # BB#0: @@ -724,14 +724,14 @@ ; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [3:1.00] ; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsd2si: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00] +; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [3:1.00] +; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [7:1.00] ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtsd2si: ; BTVER2: # BB#0: @@ -773,17 +773,17 @@ ; ; SANDY-LABEL: test_cvtsd2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [3:1.00] -; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [7:1.00] +; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsd2siq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00] +; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [4:1.00] ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtsd2siq: ; BTVER2: # BB#0: @@ -830,18 +830,18 @@ ; SANDY-LABEL: test_cvtsd2ss: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] +; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsd2ss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] -; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] +; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00] ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtsd2ss: ; BTVER2: # BB#0: @@ -882,16 +882,16 @@ ; SANDY-LABEL: test_cvtsi2sd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsi2sd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] ; HASWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtsi2sd: ; BTVER2: # BB#0: @@ -931,16 +931,16 @@ ; SANDY-LABEL: test_cvtsi2sdq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsi2sdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] ; HASWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtsi2sdq: ; BTVER2: # BB#0: @@ -985,19 +985,19 @@ ; ; SANDY-LABEL: test_cvtss2sd: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [3:1.00] +; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtss2sd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvtss2sd: ; BTVER2: # BB#0: @@ -1038,17 +1038,17 @@ ; ; SANDY-LABEL: test_cvttpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttpd2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] +; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvttpd2dq: ; BTVER2: # BB#0: @@ -1091,16 +1091,16 @@ ; SANDY-LABEL: test_cvttps2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttps2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00] +; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [3:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvttps2dq: ; BTVER2: # BB#0: @@ -1139,17 +1139,17 @@ ; ; SANDY-LABEL: test_cvttsd2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [3:1.00] +; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] ; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttsd2si: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00] +; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [7:1.00] ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvttsd2si: ; BTVER2: # BB#0: @@ -1188,17 +1188,17 @@ ; ; SANDY-LABEL: test_cvttsd2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [3:1.00] -; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [7:1.00] +; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttsd2siq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00] +; HASWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [4:1.00] ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_cvttsd2siq: ; BTVER2: # BB#0: @@ -1234,15 +1234,15 @@ ; ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] +; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:4.00] +; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_divpd: ; BTVER2: # BB#0: @@ -1276,15 +1276,15 @@ ; ; SANDY-LABEL: test_divsd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] +; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divsd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:4.00] +; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_divsd: ; BTVER2: # BB#0: @@ -1322,12 +1322,12 @@ ; SANDY-LABEL: test_lfence: ; SANDY: # BB#0: ; SANDY-NEXT: lfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_lfence: ; HASWELL: # BB#0: -; HASWELL-NEXT: lfence # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: lfence # sched: [2:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_lfence: ; BTVER2: # BB#0: @@ -1363,12 +1363,12 @@ ; SANDY-LABEL: test_mfence: ; SANDY: # BB#0: ; SANDY-NEXT: mfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mfence: ; HASWELL: # BB#0: -; HASWELL-NEXT: mfence # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: mfence # sched: [2:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_mfence: ; BTVER2: # BB#0: @@ -1402,12 +1402,12 @@ ; SANDY-LABEL: test_maskmovdqu: ; SANDY: # BB#0: ; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovdqu: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [14:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maskmovdqu: ; BTVER2: # BB#0: @@ -1440,14 +1440,14 @@ ; SANDY-LABEL: test_maxpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: @@ -1483,14 +1483,14 @@ ; SANDY-LABEL: test_maxsd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_maxsd: ; BTVER2: # BB#0: @@ -1526,14 +1526,14 @@ ; SANDY-LABEL: test_minpd: ; SANDY: # BB#0: ; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: @@ -1569,14 +1569,14 @@ ; SANDY-LABEL: test_minsd: ; SANDY: # BB#0: ; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_minsd: ; BTVER2: # BB#0: @@ -1614,17 +1614,17 @@ ; ; SANDY-LABEL: test_movapd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movapd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movapd: ; BTVER2: # BB#0: @@ -1662,17 +1662,17 @@ ; ; SANDY-LABEL: test_movdqa: ; SANDY: # BB#0: -; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movdqa: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movdqa: ; BTVER2: # BB#0: @@ -1710,17 +1710,17 @@ ; ; SANDY-LABEL: test_movdqu: ; SANDY: # BB#0: -; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movdqu: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movdqu: ; BTVER2: # BB#0: @@ -1768,22 +1768,22 @@ ; SANDY-LABEL: test_movd: ; SANDY: # BB#0: ; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:0.33] -; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovd %xmm0, %eax # sched: [1:0.33] -; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] +; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; HASWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:0.25] +; HASWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] -; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movd: ; BTVER2: # BB#0: @@ -1838,23 +1838,23 @@ ; ; SANDY-LABEL: test_movd_64: ; SANDY: # BB#0: -; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:0.33] -; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50] +; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] +; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, %rax # sched: [1:0.33] -; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] +; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movd_64: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; HASWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50] +; HASWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movd_64: ; BTVER2: # BB#0: @@ -1900,17 +1900,17 @@ ; ; SANDY-LABEL: test_movhpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] +; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movhpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] +; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movhpd: ; BTVER2: # BB#0: @@ -1951,17 +1951,17 @@ ; ; SANDY-LABEL: test_movlpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] +; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movlpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] +; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movlpd: ; BTVER2: # BB#0: @@ -1998,13 +1998,13 @@ ; ; SANDY-LABEL: test_movmskpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: @@ -2039,14 +2039,14 @@ ; SANDY-LABEL: test_movntdqa: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # BB#0: @@ -2080,14 +2080,14 @@ ; SANDY-LABEL: test_movntpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: @@ -2123,17 +2123,17 @@ ; ; SANDY-LABEL: test_movq_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] +; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movq_mem: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] +; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movq_mem: ; BTVER2: # BB#0: @@ -2174,13 +2174,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movq_reg: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movq_reg: ; BTVER2: # BB#0: @@ -2216,17 +2216,17 @@ ; ; SANDY-LABEL: test_movsd_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50] +; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movsd_mem: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50] +; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movsd_mem: ; BTVER2: # BB#0: @@ -2266,12 +2266,12 @@ ; SANDY-LABEL: test_movsd_reg: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movsd_reg: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movsd_reg: ; BTVER2: # BB#0: @@ -2305,17 +2305,17 @@ ; ; SANDY-LABEL: test_movupd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movupd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [4:0.50] +; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [?:1.000000e+00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movupd: ; BTVER2: # BB#0: @@ -2351,14 +2351,14 @@ ; SANDY-LABEL: test_mulpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # BB#0: @@ -2393,14 +2393,14 @@ ; SANDY-LABEL: test_mulsd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_mulsd: ; BTVER2: # BB#0: @@ -2437,17 +2437,17 @@ ; ; SANDY-LABEL: test_orpd: ; SANDY: # BB#0: -; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_orpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_orpd: ; BTVER2: # BB#0: @@ -2496,14 +2496,14 @@ ; SANDY-LABEL: test_packssdw: ; SANDY: # BB#0: ; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packssdw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_packssdw: ; BTVER2: # BB#0: @@ -2548,14 +2548,14 @@ ; SANDY-LABEL: test_packsswb: ; SANDY: # BB#0: ; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packsswb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_packsswb: ; BTVER2: # BB#0: @@ -2600,14 +2600,14 @@ ; SANDY-LABEL: test_packuswb: ; SANDY: # BB#0: ; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packuswb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_packuswb: ; BTVER2: # BB#0: @@ -2648,14 +2648,14 @@ ; SANDY-LABEL: test_paddb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_paddb: ; BTVER2: # BB#0: @@ -2694,14 +2694,14 @@ ; SANDY-LABEL: test_paddd: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_paddd: ; BTVER2: # BB#0: @@ -2736,14 +2736,14 @@ ; SANDY-LABEL: test_paddq: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_paddq: ; BTVER2: # BB#0: @@ -2782,14 +2782,14 @@ ; SANDY-LABEL: test_paddsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_paddsb: ; BTVER2: # BB#0: @@ -2829,14 +2829,14 @@ ; SANDY-LABEL: test_paddsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_paddsw: ; BTVER2: # BB#0: @@ -2876,14 +2876,14 @@ ; SANDY-LABEL: test_paddusb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddusb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_paddusb: ; BTVER2: # BB#0: @@ -2923,14 +2923,14 @@ ; SANDY-LABEL: test_paddusw: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddusw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_paddusw: ; BTVER2: # BB#0: @@ -2970,14 +2970,14 @@ ; SANDY-LABEL: test_paddw: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_paddw: ; BTVER2: # BB#0: @@ -3015,16 +3015,16 @@ ; SANDY-LABEL: test_pand: ; SANDY: # BB#0: ; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pand: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pand: ; BTVER2: # BB#0: @@ -3070,16 +3070,16 @@ ; SANDY-LABEL: test_pandn: ; SANDY: # BB#0: ; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pandn: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50] +; HASWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pandn: ; BTVER2: # BB#0: @@ -3122,14 +3122,14 @@ ; SANDY-LABEL: test_pavgb: ; SANDY: # BB#0: ; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pavgb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pavgb: ; BTVER2: # BB#0: @@ -3169,14 +3169,14 @@ ; SANDY-LABEL: test_pavgw: ; SANDY: # BB#0: ; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pavgw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pavgw: ; BTVER2: # BB#0: @@ -3217,16 +3217,16 @@ ; SANDY-LABEL: test_pcmpeqb: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpeqb: ; BTVER2: # BB#0: @@ -3269,16 +3269,16 @@ ; SANDY-LABEL: test_pcmpeqd: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpeqd: ; BTVER2: # BB#0: @@ -3321,16 +3321,16 @@ ; SANDY-LABEL: test_pcmpeqw: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpeqw: ; BTVER2: # BB#0: @@ -3374,16 +3374,16 @@ ; SANDY-LABEL: test_pcmpgtb: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpgtb: ; BTVER2: # BB#0: @@ -3427,16 +3427,16 @@ ; SANDY-LABEL: test_pcmpgtd: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpgtd: ; BTVER2: # BB#0: @@ -3480,16 +3480,16 @@ ; SANDY-LABEL: test_pcmpgtw: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpgtw: ; BTVER2: # BB#0: @@ -3526,15 +3526,15 @@ ; ; SANDY-LABEL: test_pextrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50] +; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: # kill: %AX %AX %EAX -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:1.00] +; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: # kill: %AX %AX %EAX -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # BB#0: @@ -3570,15 +3570,15 @@ ; ; SANDY-LABEL: test_pinsrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pinsrw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pinsrw: ; BTVER2: # BB#0: @@ -3620,15 +3620,15 @@ ; ; SANDY-LABEL: test_pmaddwd: ; SANDY: # BB#0: -; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaddwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmaddwd: ; BTVER2: # BB#0: @@ -3669,14 +3669,14 @@ ; SANDY-LABEL: test_pmaxsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmaxsw: ; BTVER2: # BB#0: @@ -3716,14 +3716,14 @@ ; SANDY-LABEL: test_pmaxub: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxub: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmaxub: ; BTVER2: # BB#0: @@ -3763,14 +3763,14 @@ ; SANDY-LABEL: test_pminsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pminsw: ; BTVER2: # BB#0: @@ -3810,14 +3810,14 @@ ; SANDY-LABEL: test_pminub: ; SANDY: # BB#0: ; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminub: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pminub: ; BTVER2: # BB#0: @@ -3851,13 +3851,13 @@ ; ; SANDY-LABEL: test_pmovmskb: ; SANDY: # BB#0: -; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovmskb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovmskb: ; BTVER2: # BB#0: @@ -3891,13 +3891,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhuw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmulhuw: ; BTVER2: # BB#0: @@ -3932,15 +3932,15 @@ ; ; SANDY-LABEL: test_pmulhw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmulhw: ; BTVER2: # BB#0: @@ -3975,15 +3975,15 @@ ; ; SANDY-LABEL: test_pmullw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmullw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmullw: ; BTVER2: # BB#0: @@ -4027,13 +4027,13 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmuludq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmuludq: ; BTVER2: # BB#0: @@ -4073,16 +4073,16 @@ ; SANDY-LABEL: test_por: ; SANDY: # BB#0: ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_por: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_por: ; BTVER2: # BB#0: @@ -4126,15 +4126,15 @@ ; ; SANDY-LABEL: test_psadbw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psadbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psadbw: ; BTVER2: # BB#0: @@ -4176,16 +4176,16 @@ ; SANDY-LABEL: test_pshufd: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] -; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:0.50] +; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; HASWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00] +; HASWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pshufd: ; BTVER2: # BB#0: @@ -4227,16 +4227,16 @@ ; SANDY-LABEL: test_pshufhw: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:0.50] +; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufhw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; HASWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:1.00] +; HASWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pshufhw: ; BTVER2: # BB#0: @@ -4278,16 +4278,16 @@ ; SANDY-LABEL: test_pshuflw: ; SANDY: # BB#0: ; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:0.50] +; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshuflw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; HASWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:1.00] +; HASWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pshuflw: ; BTVER2: # BB#0: @@ -4328,15 +4328,15 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pslld: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; HASWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pslld: ; BTVER2: # BB#0: @@ -4378,12 +4378,12 @@ ; SANDY-LABEL: test_pslldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pslldq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pslldq: ; BTVER2: # BB#0: @@ -4419,15 +4419,15 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllq: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; HASWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psllq: ; BTVER2: # BB#0: @@ -4470,15 +4470,15 @@ ; SANDY: # BB#0: ; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] +; HASWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psllw: ; BTVER2: # BB#0: @@ -4519,17 +4519,17 @@ ; ; SANDY-LABEL: test_psrad: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrad: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [2:1.00] ; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psrad: ; BTVER2: # BB#0: @@ -4570,17 +4570,17 @@ ; ; SANDY-LABEL: test_psraw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psraw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] ; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psraw: ; BTVER2: # BB#0: @@ -4621,17 +4621,17 @@ ; ; SANDY-LABEL: test_psrld: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrld: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [2:1.00] ; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psrld: ; BTVER2: # BB#0: @@ -4673,12 +4673,12 @@ ; SANDY-LABEL: test_psrldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrldq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psrldq: ; BTVER2: # BB#0: @@ -4712,17 +4712,17 @@ ; ; SANDY-LABEL: test_psrlq: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [2:1.00] ; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psrlq: ; BTVER2: # BB#0: @@ -4763,17 +4763,17 @@ ; ; SANDY-LABEL: test_psrlw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] ; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psrlw: ; BTVER2: # BB#0: @@ -4816,14 +4816,14 @@ ; SANDY-LABEL: test_psubb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psubb: ; BTVER2: # BB#0: @@ -4862,14 +4862,14 @@ ; SANDY-LABEL: test_psubd: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psubd: ; BTVER2: # BB#0: @@ -4904,14 +4904,14 @@ ; SANDY-LABEL: test_psubq: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psubq: ; BTVER2: # BB#0: @@ -4950,14 +4950,14 @@ ; SANDY-LABEL: test_psubsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psubsb: ; BTVER2: # BB#0: @@ -4997,14 +4997,14 @@ ; SANDY-LABEL: test_psubsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psubsw: ; BTVER2: # BB#0: @@ -5044,14 +5044,14 @@ ; SANDY-LABEL: test_psubusb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubusb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psubusb: ; BTVER2: # BB#0: @@ -5091,14 +5091,14 @@ ; SANDY-LABEL: test_psubusw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubusw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psubusw: ; BTVER2: # BB#0: @@ -5138,14 +5138,14 @@ ; SANDY-LABEL: test_psubw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psubw: ; BTVER2: # BB#0: @@ -5184,14 +5184,14 @@ ; SANDY-LABEL: test_punpckhbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_punpckhbw: ; BTVER2: # BB#0: @@ -5231,16 +5231,16 @@ ; SANDY-LABEL: test_punpckhdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:0.50] +; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:1.00] +; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_punpckhdq: ; BTVER2: # BB#0: @@ -5280,16 +5280,16 @@ ; SANDY-LABEL: test_punpckhqdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:0.50] +; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhqdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00] +; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_punpckhqdq: ; BTVER2: # BB#0: @@ -5330,14 +5330,14 @@ ; SANDY-LABEL: test_punpckhwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_punpckhwd: ; BTVER2: # BB#0: @@ -5376,14 +5376,14 @@ ; SANDY-LABEL: test_punpcklbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_punpcklbw: ; BTVER2: # BB#0: @@ -5423,16 +5423,16 @@ ; SANDY-LABEL: test_punpckldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:0.50] +; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckldq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:1.00] +; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_punpckldq: ; BTVER2: # BB#0: @@ -5472,16 +5472,16 @@ ; SANDY-LABEL: test_punpcklqdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:0.50] +; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklqdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] +; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_punpcklqdq: ; BTVER2: # BB#0: @@ -5522,14 +5522,14 @@ ; SANDY-LABEL: test_punpcklwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_punpcklwd: ; BTVER2: # BB#0: @@ -5567,16 +5567,16 @@ ; SANDY-LABEL: test_pxor: ; SANDY: # BB#0: ; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pxor: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pxor: ; BTVER2: # BB#0: @@ -5616,16 +5616,16 @@ ; SANDY-LABEL: test_shufpd: ; SANDY: # BB#0: ; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00] +; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00] +; HASWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # BB#0: @@ -5665,17 +5665,17 @@ ; ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00] +; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [28:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00] -; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] +; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # BB#0: @@ -5720,19 +5720,19 @@ ; ; SANDY-LABEL: test_sqrtsd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00] -; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50] -; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] +; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] +; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtsd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00] -; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50] -; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] +; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_sqrtsd: ; BTVER2: # BB#0: @@ -5771,14 +5771,14 @@ ; SANDY-LABEL: test_subpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_subpd: ; BTVER2: # BB#0: @@ -5813,14 +5813,14 @@ ; SANDY-LABEL: test_subsd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_subsd: ; BTVER2: # BB#0: @@ -5879,30 +5879,30 @@ ; SANDY-LABEL: test_ucomisd: ; SANDY: # BB#0: ; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %cl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %dl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %dl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ucomisd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-NEXT: setnp %al # sched: [1:1.00] +; HASWELL-NEXT: sete %cl # sched: [1:1.00] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-NEXT: setnp %al # sched: [1:1.00] +; HASWELL-NEXT: sete %dl # sched: [1:1.00] ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_ucomisd: ; BTVER2: # BB#0: @@ -5950,16 +5950,16 @@ ; SANDY-LABEL: test_unpckhpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00] +; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00] +; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # BB#0: @@ -6005,16 +6005,16 @@ ; SANDY-LABEL: test_unpcklpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00] +; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # BB#0: @@ -6053,17 +6053,17 @@ ; ; SANDY-LABEL: test_xorpd: ; SANDY: # BB#0: -; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/sse3-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse3-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse3-schedule.ll @@ -31,14 +31,14 @@ ; SANDY-LABEL: test_addsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsubpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # BB#0: @@ -74,14 +74,14 @@ ; SANDY-LABEL: test_addsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsubps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # BB#0: @@ -116,15 +116,15 @@ ; ; SANDY-LABEL: test_haddpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_haddpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: @@ -159,15 +159,15 @@ ; ; SANDY-LABEL: test_haddps: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_haddps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: @@ -202,15 +202,15 @@ ; ; SANDY-LABEL: test_hsubpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_hsubpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: @@ -245,15 +245,15 @@ ; ; SANDY-LABEL: test_hsubps: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_hsubps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: @@ -287,13 +287,13 @@ ; ; SANDY-LABEL: test_lddqu: ; SANDY: # BB#0: -; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_lddqu: ; HASWELL: # BB#0: -; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # BB#0: @@ -330,16 +330,16 @@ ; SANDY-LABEL: test_movddup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50] +; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movddup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50] +; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # BB#0: @@ -380,16 +380,16 @@ ; SANDY-LABEL: test_movshdup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50] +; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movshdup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50] +; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # BB#0: @@ -430,16 +430,16 @@ ; SANDY-LABEL: test_movsldup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50] +; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movsldup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50] +; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [?:5.000000e-01] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll @@ -25,17 +25,17 @@ ; ; SANDY-LABEL: test_blendpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] +; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # BB#0: @@ -65,15 +65,15 @@ ; ; SANDY-LABEL: test_blendps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_blendps: ; BTVER2: # BB#0: @@ -107,15 +107,15 @@ ; ; SANDY-LABEL: test_blendvpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: @@ -150,15 +150,15 @@ ; ; SANDY-LABEL: test_blendvps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: @@ -187,15 +187,15 @@ ; ; SANDY-LABEL: test_dppd: ; SANDY: # BB#0: -; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_dppd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_dppd: ; BTVER2: # BB#0: @@ -224,15 +224,15 @@ ; ; SANDY-LABEL: test_dpps: ; SANDY: # BB#0: -; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] ; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_dpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] -; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_dpps: ; BTVER2: # BB#0: @@ -262,14 +262,14 @@ ; SANDY-LABEL: test_insertps: ; SANDY: # BB#0: ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_insertps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_insertps: ; BTVER2: # BB#0: @@ -296,13 +296,13 @@ ; ; SANDY-LABEL: test_movntdqa: ; SANDY: # BB#0: -; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # BB#0: @@ -328,15 +328,15 @@ ; ; SANDY-LABEL: test_mpsadbw: ; SANDY: # BB#0: -; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mpsadbw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] +; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_mpsadbw: ; BTVER2: # BB#0: @@ -367,14 +367,14 @@ ; SANDY-LABEL: test_packusdw: ; SANDY: # BB#0: ; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packusdw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_packusdw: ; BTVER2: # BB#0: @@ -411,14 +411,14 @@ ; SANDY-LABEL: test_pblendvb: ; SANDY: # BB#0: ; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pblendvb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pblendvb: ; BTVER2: # BB#0: @@ -448,14 +448,14 @@ ; SANDY-LABEL: test_pblendw: ; SANDY: # BB#0: ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pblendw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pblendw: ; BTVER2: # BB#0: @@ -484,14 +484,14 @@ ; SANDY-LABEL: test_pcmpeqq: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpeqq: ; BTVER2: # BB#0: @@ -521,15 +521,15 @@ ; ; SANDY-LABEL: test_pextrb: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50] +; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrb: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:1.00] -; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pextrb: ; BTVER2: # BB#0: @@ -558,15 +558,15 @@ ; ; SANDY-LABEL: test_pextrd: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50] +; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:1.00] -; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pextrd: ; BTVER2: # BB#0: @@ -594,15 +594,15 @@ ; ; SANDY-LABEL: test_pextrq: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50] +; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrq: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] +; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pextrq: ; BTVER2: # BB#0: @@ -630,15 +630,15 @@ ; ; SANDY-LABEL: test_pextrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50] +; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:1.00] -; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # BB#0: @@ -667,15 +667,15 @@ ; ; SANDY-LABEL: test_phminposuw: ; SANDY: # BB#0: -; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] ; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phminposuw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_phminposuw: ; BTVER2: # BB#0: @@ -704,15 +704,15 @@ ; ; SANDY-LABEL: test_pinsrb: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pinsrb: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pinsrb: ; BTVER2: # BB#0: @@ -740,15 +740,15 @@ ; ; SANDY-LABEL: test_pinsrd: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pinsrd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] +; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pinsrd: ; BTVER2: # BB#0: @@ -778,17 +778,17 @@ ; ; SANDY-LABEL: test_pinsrq: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pinsrq: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] +; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pinsrq: ; BTVER2: # BB#0: @@ -819,14 +819,14 @@ ; SANDY-LABEL: test_pmaxsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmaxsb: ; BTVER2: # BB#0: @@ -856,14 +856,14 @@ ; SANDY-LABEL: test_pmaxsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmaxsd: ; BTVER2: # BB#0: @@ -893,14 +893,14 @@ ; SANDY-LABEL: test_pmaxud: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxud: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmaxud: ; BTVER2: # BB#0: @@ -930,14 +930,14 @@ ; SANDY-LABEL: test_pmaxuw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxuw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmaxuw: ; BTVER2: # BB#0: @@ -967,14 +967,14 @@ ; SANDY-LABEL: test_pminsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pminsb: ; BTVER2: # BB#0: @@ -1004,14 +1004,14 @@ ; SANDY-LABEL: test_pminsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pminsd: ; BTVER2: # BB#0: @@ -1041,14 +1041,14 @@ ; SANDY-LABEL: test_pminud: ; SANDY: # BB#0: ; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminud: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pminud: ; BTVER2: # BB#0: @@ -1078,14 +1078,14 @@ ; SANDY-LABEL: test_pminuw: ; SANDY: # BB#0: ; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminuw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pminuw: ; BTVER2: # BB#0: @@ -1118,16 +1118,16 @@ ; SANDY-LABEL: test_pmovsxbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovsxbw: ; BTVER2: # BB#0: @@ -1162,16 +1162,16 @@ ; SANDY-LABEL: test_pmovsxbd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxbd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovsxbd: ; BTVER2: # BB#0: @@ -1206,16 +1206,16 @@ ; SANDY-LABEL: test_pmovsxbq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxbq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovsxbq: ; BTVER2: # BB#0: @@ -1250,16 +1250,16 @@ ; SANDY-LABEL: test_pmovsxdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovsxdq: ; BTVER2: # BB#0: @@ -1294,16 +1294,16 @@ ; SANDY-LABEL: test_pmovsxwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovsxwd: ; BTVER2: # BB#0: @@ -1338,16 +1338,16 @@ ; SANDY-LABEL: test_pmovsxwq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxwq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovsxwq: ; BTVER2: # BB#0: @@ -1382,16 +1382,16 @@ ; SANDY-LABEL: test_pmovzxbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] +; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovzxbw: ; BTVER2: # BB#0: @@ -1426,16 +1426,16 @@ ; SANDY-LABEL: test_pmovzxbd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxbd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] +; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovzxbd: ; BTVER2: # BB#0: @@ -1470,16 +1470,16 @@ ; SANDY-LABEL: test_pmovzxbq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxbq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] +; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovzxbq: ; BTVER2: # BB#0: @@ -1514,16 +1514,16 @@ ; SANDY-LABEL: test_pmovzxdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:1.00] +; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovzxdq: ; BTVER2: # BB#0: @@ -1558,16 +1558,16 @@ ; SANDY-LABEL: test_pmovzxwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] +; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovzxwd: ; BTVER2: # BB#0: @@ -1602,16 +1602,16 @@ ; SANDY-LABEL: test_pmovzxwq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxwq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00] +; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmovzxwq: ; BTVER2: # BB#0: @@ -1642,15 +1642,15 @@ ; ; SANDY-LABEL: test_pmuldq: ; SANDY: # BB#0: -; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmuldq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmuldq: ; BTVER2: # BB#0: @@ -1680,15 +1680,15 @@ ; ; SANDY-LABEL: test_pmulld: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulld: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmulld: ; BTVER2: # BB#0: @@ -1724,23 +1724,23 @@ ; ; SANDY-LABEL: test_ptest: ; SANDY: # BB#0: -; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [5:0.50] -; SANDY-NEXT: setb %cl # sched: [1:0.33] +; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-NEXT: setb %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ptest: ; HASWELL: # BB#0: ; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: setb %al # sched: [1:0.50] +; HASWELL-NEXT: setb %al # sched: [1:1.00] ; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: setb %cl # sched: [1:0.50] +; HASWELL-NEXT: setb %cl # sched: [1:1.00] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_ptest: ; BTVER2: # BB#0: @@ -1778,16 +1778,16 @@ ; SANDY-LABEL: test_roundpd: ; SANDY: # BB#0: ; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [10:2.00] +; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [5:2.00] +; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [6:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: @@ -1822,16 +1822,16 @@ ; SANDY-LABEL: test_roundps: ; SANDY: # BB#0: ; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [10:2.00] +; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [5:2.00] +; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [6:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: @@ -1867,16 +1867,16 @@ ; SANDY-LABEL: test_roundsd: ; SANDY: # BB#0: ; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundsd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00] -; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:2.00] +; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_roundsd: ; BTVER2: # BB#0: @@ -1912,16 +1912,16 @@ ; SANDY-LABEL: test_roundss: ; SANDY: # BB#0: ; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00] -; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] +; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_roundss: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/sse42-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/sse42-schedule.ll +++ llvm/trunk/test/CodeGen/X86/sse42-schedule.ll @@ -26,16 +26,16 @@ ; SANDY-LABEL: crc32_32_8: ; SANDY: # BB#0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_8: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: crc32_32_8: ; BTVER2: # BB#0: @@ -68,16 +68,16 @@ ; SANDY-LABEL: crc32_32_16: ; SANDY: # BB#0: ; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] +; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_16: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: crc32_32_16: ; BTVER2: # BB#0: @@ -112,14 +112,14 @@ ; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_32: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: crc32_32_32: ; BTVER2: # BB#0: @@ -152,16 +152,16 @@ ; SANDY-LABEL: crc32_64_8: ; SANDY: # BB#0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_8: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: crc32_64_8: ; BTVER2: # BB#0: @@ -196,14 +196,14 @@ ; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_64: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: crc32_64_64: ; BTVER2: # BB#0: @@ -256,20 +256,20 @@ ; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] ; SANDY-NEXT: # kill: %ECX %ECX %RCX ; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpestri: ; HASWELL: # BB#0: ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [11:3.00] +; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] ; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [11:3.00] +; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00] ; HASWELL-NEXT: # kill: %ECX %ECX %RCX ; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpestri: ; BTVER2: # BB#0: @@ -320,17 +320,17 @@ ; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] ; SANDY-NEXT: movl $7, %edx # sched: [1:0.33] ; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpestrm: ; HASWELL: # BB#0: ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00] +; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [10:3.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpestrm: ; BTVER2: # BB#0: @@ -369,12 +369,12 @@ ; ; SANDY-LABEL: test_pcmpistri: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] ; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] ; SANDY-NEXT: # kill: %ECX %ECX %RCX ; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpistri: ; HASWELL: # BB#0: @@ -383,7 +383,7 @@ ; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: # kill: %ECX %ECX %RCX ; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpistri: ; BTVER2: # BB#0: @@ -416,15 +416,15 @@ ; ; SANDY-LABEL: test_pcmpistrm: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] +; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpistrm: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] -; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] +; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpistrm: ; BTVER2: # BB#0: @@ -453,15 +453,15 @@ ; ; SANDY-LABEL: test_pcmpgtq: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pcmpgtq: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll +++ llvm/trunk/test/CodeGen/X86/ssse3-schedule.ll @@ -35,16 +35,16 @@ ; SANDY-LABEL: test_pabsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50] +; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50] ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pabsb: ; BTVER2: # BB#0: @@ -86,16 +86,16 @@ ; SANDY-LABEL: test_pabsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50] +; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50] ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pabsd: ; BTVER2: # BB#0: @@ -136,12 +136,12 @@ ; SANDY-LABEL: test_pabsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pabsw: ; BTVER2: # BB#0: @@ -182,14 +182,14 @@ ; SANDY-LABEL: test_palignr: ; SANDY: # BB#0: ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] -; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_palignr: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_palignr: ; BTVER2: # BB#0: @@ -223,15 +223,15 @@ ; ; SANDY-LABEL: test_phaddd: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_phaddd: ; BTVER2: # BB#0: @@ -274,15 +274,15 @@ ; ; SANDY-LABEL: test_phaddsw: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_phaddsw: ; BTVER2: # BB#0: @@ -317,15 +317,15 @@ ; ; SANDY-LABEL: test_phaddw: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_phaddw: ; BTVER2: # BB#0: @@ -360,15 +360,15 @@ ; ; SANDY-LABEL: test_phsubd: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_phsubd: ; BTVER2: # BB#0: @@ -411,15 +411,15 @@ ; ; SANDY-LABEL: test_phsubsw: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_phsubsw: ; BTVER2: # BB#0: @@ -454,15 +454,15 @@ ; ; SANDY-LABEL: test_phsubw: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_phsubw: ; BTVER2: # BB#0: @@ -497,15 +497,15 @@ ; ; SANDY-LABEL: test_pmaddubsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaddubsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmaddubsw: ; BTVER2: # BB#0: @@ -538,13 +538,13 @@ ; ; SANDY-LABEL: test_pmulhrsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhrsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pmulhrsw: ; BTVER2: # BB#0: @@ -579,14 +579,14 @@ ; SANDY-LABEL: test_pshufb: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_pshufb: ; BTVER2: # BB#0: @@ -630,14 +630,14 @@ ; SANDY-LABEL: test_psignb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psignb: ; BTVER2: # BB#0: @@ -681,14 +681,14 @@ ; SANDY-LABEL: test_psignd: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psignd: ; BTVER2: # BB#0: @@ -732,14 +732,14 @@ ; SANDY-LABEL: test_psignw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [1:1.00] +; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_psignw: ; BTVER2: # BB#0: Index: llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll +++ llvm/trunk/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -201,14 +201,14 @@ ; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9 +; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3 ; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9 ; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5 -; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3 ; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 @@ -328,14 +328,14 @@ ; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9 +; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3 ; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9 ; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5 -; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3 ; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -68,13 +68,13 @@ ; KNL-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,12,13,12,13,10,11,0,1,4,5,4,5,0,1] ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,1,0,3] ; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[0,3,2,2,4,5,6,7] -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm1 -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm5 -; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,8,9,14,15,4,5,2,3,2,3,6,7] -; KNL-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15] -; KNL-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 -; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; KNL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15] +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm5 +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm5[1],ymm0[2],ymm5[3],ymm0[4],ymm5[5],ymm0[6],ymm5[7],ymm0[8],ymm5[9],ymm0[10],ymm5[11],ymm0[12],ymm5[13],ymm0[14],ymm5[15] ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3 ; KNL-NEXT: vpbroadcastw %xmm3, %ymm3 ; KNL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]