Index: llvm/lib/Target/X86/X86SchedBroadwell.td =================================================================== --- llvm/lib/Target/X86/X86SchedBroadwell.td +++ llvm/lib/Target/X86/X86SchedBroadwell.td @@ -889,8 +889,7 @@ let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup47], (instregex "(V?)PCMPGTQ(Y?)rr", - "MUL_(FPrST0|FST0r|FrST0)")>; +def: InstRW<[BWWriteResGroup47], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> { let Latency = 5; @@ -1600,6 +1599,90 @@ def: InstRW<[WriteZero], (instrs CLC)>; + +// Intruction variants handled by the renamer. These might not need execution +// ports in certain conditions. +// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", +// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and +// renaming". +// These can be investigated with llvm-exegesis, e.g. +// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- +// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- + +def BWWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +def BWWriteZeroIdiom : SchedWriteVariant<[ + SchedVar, [BWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[BWWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def BWWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar, [BWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[BWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, + VXORPDrr)>; + +def BWWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar, [BWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[BWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>; + +def BWWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar, [BWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[BWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>; + +def BWWriteVZeroIdiomLogicY : SchedWriteVariant<[ + SchedVar, [BWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[BWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>; + +def BWWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar, [BWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[BWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, + PSUBDrr, VPSUBDrr, + PSUBQrr, VPSUBQrr, + PSUBWrr, VPSUBWrr, + PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTWrr, VPCMPGTWrr)>; + +def BWWriteVZeroIdiomALUY : SchedWriteVariant<[ + SchedVar, [BWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[BWWriteVZeroIdiomALUY], (instrs VPSUBBYrr, + VPSUBDYrr, + VPSUBQYrr, + VPSUBWYrr, + VPCMPGTBYrr, + VPCMPGTDYrr, + VPCMPGTWYrr)>; + +def BWWritePCMPGTQ : SchedWriteRes<[BWPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def BWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ + SchedVar, [BWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[BWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, + VPCMPGTQYrr)>; + + // CMOVs that use both Z and C flag require an extra uop. def BWWriteCMOVA_CMOVBErr : SchedWriteRes<[BWPort06,BWPort0156]> { let Latency = 2; Index: llvm/lib/Target/X86/X86SchedHaswell.td =================================================================== --- llvm/lib/Target/X86/X86SchedHaswell.td +++ llvm/lib/Target/X86/X86SchedHaswell.td @@ -1448,8 +1448,7 @@ let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr", - "MUL_(FPrST0|FST0r|FrST0)")>; +def: InstRW<[HWWriteResGroup89], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 11; @@ -1853,6 +1852,90 @@ def: InstRW<[WriteZero], (instrs CLC)>; + +// Intruction variants handled by the renamer. These might not need execution +// ports in certain conditions. +// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", +// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and +// renaming". +// These can be investigated with llvm-exegesis, e.g. +// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- +// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- + +def HWWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +def HWWriteZeroIdiom : SchedWriteVariant<[ + SchedVar, [HWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[HWWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def HWWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar, [HWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[HWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, + VXORPDrr)>; + +def HWWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar, [HWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[HWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>; + +def HWWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar, [HWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[HWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>; + +def HWWriteVZeroIdiomLogicY : SchedWriteVariant<[ + SchedVar, [HWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[HWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>; + +def HWWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar, [HWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[HWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, + PSUBDrr, VPSUBDrr, + PSUBQrr, VPSUBQrr, + PSUBWrr, VPSUBWrr, + PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTWrr, VPCMPGTWrr)>; + +def HWWriteVZeroIdiomALUY : SchedWriteVariant<[ + SchedVar, [HWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[HWWriteVZeroIdiomALUY], (instrs VPSUBBYrr, + VPSUBDYrr, + VPSUBQYrr, + VPSUBWYrr, + VPCMPGTBYrr, + VPCMPGTDYrr, + VPCMPGTWYrr)>; + +def HWWritePCMPGTQ : SchedWriteRes<[HWPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def HWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ + SchedVar, [HWWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[HWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, + VPCMPGTQYrr)>; + + // The 0x83 ADC/SBB opcodes have special support for immediate 0 to only require // a single uop. It does not apply to the GR8 encoding. And only applies to the // 8-bit immediate since using larger immediate for 0 would be silly. Index: llvm/lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- llvm/lib/Target/X86/X86SchedSandyBridge.td +++ llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -698,12 +698,6 @@ } def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>; -def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} - def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> { let Latency = 5; let NumMicroOps = 1; @@ -1134,6 +1128,12 @@ def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr)>; +def SBWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar, [SBWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SBWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>; + def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[ SchedVar, [SBWriteZeroLatency]>, SchedVar @@ -1152,9 +1152,15 @@ PCMPGTDrr, VPCMPGTDrr, PCMPGTWrr, VPCMPGTWrr)>; +def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ SchedVar, [SBWriteZeroLatency]>, - SchedVar + SchedVar ]>; def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>; Index: llvm/lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -659,8 +659,7 @@ let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup9], (instregex "(V?)PADD(B|D|Q|W)(Y?)rr", - "VPBLENDD(Y?)rri", - "(V?)PSUB(B|D|Q|W)(Y?)rr")>; + "VPBLENDD(Y?)rri")>; def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> { let Latency = 1; @@ -770,8 +769,7 @@ let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", - "VPBROADCAST(B|W)rr", - "(V?)PCMPGTQ(Y?)rr")>; + "VPBROADCAST(B|W)rr")>; def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> { let Latency = 3; @@ -1742,6 +1740,101 @@ def: InstRW<[WriteZero], (instrs CLC)>; + +// Intruction variants handled by the renamer. These might not need execution +// ports in certain conditions. +// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", +// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and +// renaming". +// These can be investigated with llvm-exegesis, e.g. +// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- +// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- + +def SKLWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +def SKLWriteZeroIdiom : SchedWriteVariant<[ + SchedVar, [SKLWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKLWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def SKLWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar, [SKLWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKLWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, + VXORPDrr)>; + +def SKLWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar, [SKLWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKLWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>; + +def SKLWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar, [SKLWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKLWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>; + +def SKLWriteVZeroIdiomLogicY : SchedWriteVariant<[ + SchedVar, [SKLWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKLWriteVZeroIdiomLogicY], (instrs VPXORYrr)>; + +def SKLWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar, [SKLWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKLWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTWrr, VPCMPGTWrr)>; + +def SKLWriteVZeroIdiomALUY : SchedWriteVariant<[ + SchedVar, [SKLWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKLWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, + VPCMPGTDYrr, + VPCMPGTWYrr)>; + +def SKLWritePSUB : SchedWriteRes<[SKLPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def SKLWriteVZeroIdiomPSUB : SchedWriteVariant<[ + SchedVar, [SKLWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKLWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, + PSUBDrr, VPSUBDrr, + PSUBQrr, VPSUBQrr, + PSUBWrr, VPSUBWrr, + VPSUBBYrr, + VPSUBDYrr, + VPSUBQYrr, + VPSUBWYrr)>; + +def SKLWritePCMPGTQ : SchedWriteRes<[SKLPort5]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def SKLWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ + SchedVar, [SKLWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKLWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, + VPCMPGTQYrr)>; + + // CMOVs that use both Z and C flag require an extra uop. def SKLWriteCMOVA_CMOVBErr : SchedWriteRes<[SKLPort06]> { let Latency = 2; Index: llvm/lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -680,8 +680,7 @@ "VPBLENDMD(Z128|Z256)rr", "VPBLENDMQ(Z128|Z256)rr", "VPBLENDMW(Z128|Z256)rr", - "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rr", - "(V?)PSUB(B|D|Q|W)rr", + "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk", "VPTERNLOGD(Z|Z128|Z256)rri", "VPTERNLOGQ(Z|Z128|Z256)rri")>; @@ -828,7 +827,6 @@ "VPCMPD(Z|Z128|Z256)rri", "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr", "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr", - "(V?)PCMPGTQ(Y?)rr", "VPCMPQ(Z|Z128|Z256)rri", "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri", "VPCMPW(Z|Z128|Z256)rri", @@ -2458,6 +2456,122 @@ def: InstRW<[WriteZero], (instrs CLC)>; + +// Intruction variants handled by the renamer. These might not need execution +// ports in certain conditions. +// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", +// section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and +// renaming". +// These can be investigated with llvm-exegesis, e.g. +// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- +// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- + +def SKXWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +def SKXWriteZeroIdiom : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def SKXWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, + XORPDrr, VXORPDrr, + VXORPSZ128rr, + VXORPDZ128rr)>; + +def SKXWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, + VXORPSZ256rr, VXORPDZ256rr)>; + +def SKXWriteFZeroIdiomZ : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>; + +def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, + VPXORDZ128rr, VPXORQZ128rr)>; + +def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr, + VPXORDZ256rr, VPXORQZ256rr)>; + +def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>; + +def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTWrr, VPCMPGTWrr)>; + +def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, + VPCMPGTDYrr, + VPCMPGTWYrr)>; + +def SKXWritePSUB : SchedWriteRes<[SKXPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; + +def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr, + PSUBDrr, VPSUBDrr, VPSUBDZ128rr, + PSUBQrr, VPSUBQrr, VPSUBQZ128rr, + PSUBWrr, VPSUBWrr, VPSUBWZ128rr, + VPSUBBYrr, VPSUBBZ256rr, + VPSUBDYrr, VPSUBDZ256rr, + VPSUBQYrr, VPSUBQZ256rr, + VPSUBWYrr, VPSUBWZ256rr, + VPSUBBZrr, + VPSUBDZrr, + VPSUBQZrr, + VPSUBWZrr)>; +def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ + SchedVar, [SKXWriteZeroLatency]>, + SchedVar +]>; +def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, + VPCMPGTQYrr)>; + + // CMOVs that use both Z and C flag require an extra uop. def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> { let Latency = 2; Index: llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-mca/X86/Broadwell/zero-idioms.s @@ -0,0 +1,450 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -timeline -register-file-stats -iterations=1 < %s | FileCheck %s + +# On BDW, renamer-based zeroing does not work for: +# - 16 and 8-bit GPRs +# - MMX +# - ANDN variants + +subl %eax, %eax +subq %rax, %rax +xorl %eax, %eax +xorq %rax, %rax + +pcmpgtb %mm2, %mm2 +pcmpgtd %mm2, %mm2 +# pcmpgtq %mm2, %mm2 # invalid operand for instruction +pcmpgtw %mm2, %mm2 + +pcmpgtb %xmm2, %xmm2 +pcmpgtd %xmm2, %xmm2 +pcmpgtq %xmm2, %xmm2 +pcmpgtw %xmm2, %xmm2 + +vpcmpgtb %xmm3, %xmm3, %xmm3 +vpcmpgtd %xmm3, %xmm3, %xmm3 +vpcmpgtq %xmm3, %xmm3, %xmm3 +vpcmpgtw %xmm3, %xmm3, %xmm3 + +vpcmpgtb %xmm3, %xmm3, %xmm5 +vpcmpgtd %xmm3, %xmm3, %xmm5 +vpcmpgtq %xmm3, %xmm3, %xmm5 +vpcmpgtw %xmm3, %xmm3, %xmm5 + +psubb %mm2, %mm2 +psubd %mm2, %mm2 +psubq %mm2, %mm2 +psubw %mm2, %mm2 +psubb %xmm2, %xmm2 +psubd %xmm2, %xmm2 +psubq %xmm2, %xmm2 +psubw %xmm2, %xmm2 +vpsubb %xmm3, %xmm3, %xmm3 +vpsubd %xmm3, %xmm3, %xmm3 +vpsubq %xmm3, %xmm3, %xmm3 +vpsubw %xmm3, %xmm3, %xmm3 +vpsubb %ymm3, %ymm3, %ymm3 +vpsubd %ymm3, %ymm3, %ymm3 +vpsubq %ymm3, %ymm3, %ymm3 +vpsubw %ymm3, %ymm3, %ymm3 + +vpsubb %xmm3, %xmm3, %xmm5 +vpsubd %xmm3, %xmm3, %xmm5 +vpsubq %xmm3, %xmm3, %xmm5 +vpsubw %xmm3, %xmm3, %xmm5 +vpsubb %ymm3, %ymm3, %ymm5 +vpsubd %ymm3, %ymm3, %ymm5 +vpsubq %ymm3, %ymm3, %ymm5 +vpsubw %ymm3, %ymm3, %ymm5 + +andnps %xmm0, %xmm0 +andnpd %xmm1, %xmm1 +vandnps %xmm2, %xmm2, %xmm2 +vandnpd %xmm1, %xmm1, %xmm1 +vandnps %ymm2, %ymm2, %ymm2 +vandnpd %ymm1, %ymm1, %ymm1 +pandn %mm2, %mm2 +pandn %xmm2, %xmm2 +vpandn %xmm3, %xmm3, %xmm3 +vpandn %ymm3, %ymm3, %ymm3 + +vandnps %xmm2, %xmm2, %xmm5 +vandnpd %xmm1, %xmm1, %xmm5 +vpandn %xmm3, %xmm3, %xmm5 +vandnps %ymm2, %ymm2, %ymm5 +vandnpd %ymm1, %ymm1, %ymm5 +vpandn %ymm3, %ymm3, %ymm5 + +xorps %xmm0, %xmm0 +xorpd %xmm1, %xmm1 +vxorps %xmm2, %xmm2, %xmm2 +vxorpd %xmm1, %xmm1, %xmm1 +vxorps %ymm2, %ymm2, %ymm2 +vxorpd %ymm1, %ymm1, %ymm1 +pxor %mm2, %mm2 +pxor %xmm2, %xmm2 +vpxor %xmm3, %xmm3, %xmm3 +vpxor %ymm3, %ymm3, %ymm3 + +vxorps %xmm4, %xmm4, %xmm5 +vxorpd %xmm1, %xmm1, %xmm3 +vxorps %ymm4, %ymm4, %ymm5 +vxorpd %ymm1, %ymm1, %ymm3 +vpxor %xmm3, %xmm3, %xmm5 +vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 75 +# CHECK-NEXT: Total Cycles: 23 +# CHECK-NEXT: Total uOps: 75 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 3.26 +# CHECK-NEXT: IPC: 3.26 +# CHECK-NEXT: Block RThroughput: 18.8 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 0 0.25 subl %eax, %eax +# CHECK-NEXT: 1 0 0.25 subq %rax, %rax +# CHECK-NEXT: 1 0 0.25 xorl %eax, %eax +# CHECK-NEXT: 1 0 0.25 xorq %rax, %rax +# CHECK-NEXT: 1 1 0.50 pcmpgtb %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 pcmpgtd %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 pcmpgtw %mm2, %mm2 +# CHECK-NEXT: 1 0 0.25 pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 1 0.50 psubb %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubd %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubq %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubw %mm2, %mm2 +# CHECK-NEXT: 1 0 0.25 psubb %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 psubd %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 psubq %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 psubw %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 1 1.00 andnps %xmm0, %xmm0 +# CHECK-NEXT: 1 1 1.00 andnpd %xmm1, %xmm1 +# CHECK-NEXT: 1 1 1.00 vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 1 1 1.00 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 1 1.00 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 1 1.00 vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 1 1 0.33 pandn %mm2, %mm2 +# CHECK-NEXT: 1 1 0.33 pandn %xmm2, %xmm2 +# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 1 0.33 vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 1 1.00 vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: 1 1 1.00 vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 1 1.00 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 1 1 1.00 vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: 1 1 0.33 vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 xorps %xmm0, %xmm0 +# CHECK-NEXT: 1 0 0.25 xorpd %xmm1, %xmm1 +# CHECK-NEXT: 1 0 0.25 vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 0 0.25 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 0 0.25 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 1 1 0.33 pxor %mm2, %mm2 +# CHECK-NEXT: 1 0 0.25 pxor %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 1 0 0.25 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 1 0 0.25 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 1 0 0.25 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Register File statistics: +# CHECK-NEXT: Total number of mappings created: 79 +# CHECK-NEXT: Max number of mappings used: 24 + +# CHECK: Resources: +# CHECK-NEXT: [0] - BWDivider +# CHECK-NEXT: [1] - BWFPDivider +# CHECK-NEXT: [2] - BWPort0 +# CHECK-NEXT: [3] - BWPort1 +# CHECK-NEXT: [4] - BWPort2 +# CHECK-NEXT: [5] - BWPort3 +# CHECK-NEXT: [6] - BWPort4 +# CHECK-NEXT: [7] - BWPort5 +# CHECK-NEXT: [8] - BWPort6 +# CHECK-NEXT: [9] - BWPort7 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# CHECK-NEXT: - - 4.00 6.00 - - - 14.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# CHECK-NEXT: - - - - - - - - - - subl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - subq %rax, %rax +# CHECK-NEXT: - - - - - - - - - - xorl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - xorq %rax, %rax +# CHECK-NEXT: - - - - - - - 1.00 - - pcmpgtb %mm2, %mm2 +# CHECK-NEXT: - - - 1.00 - - - - - - pcmpgtd %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - pcmpgtw %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - 1.00 - - - - - - psubb %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - psubd %mm2, %mm2 +# CHECK-NEXT: - - - 1.00 - - - - - - psubq %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - psubw %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - psubb %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubd %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubq %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubw %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - 1.00 - - andnps %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - 1.00 - - andnpd %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - 1.00 - - - - - - - pandn %mm2, %mm2 +# CHECK-NEXT: - - - 1.00 - - - - - - pandn %xmm2, %xmm2 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - 1.00 - - - - - - - vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: - - 1.00 - - - - - - - vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - xorps %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - - - - xorpd %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - - - - vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - - - - vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - 1.00 - - - - - - - pxor %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - pxor %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012 + +# CHECK: [0,0] DR . . . . . subl %eax, %eax +# CHECK-NEXT: [0,1] DR . . . . . subq %rax, %rax +# CHECK-NEXT: [0,2] DR . . . . . xorl %eax, %eax +# CHECK-NEXT: [0,3] DR . . . . . xorq %rax, %rax +# CHECK-NEXT: [0,4] .DeER. . . . . pcmpgtb %mm2, %mm2 +# CHECK-NEXT: [0,5] .D=eER . . . . pcmpgtd %mm2, %mm2 +# CHECK-NEXT: [0,6] .D==eER . . . . pcmpgtw %mm2, %mm2 +# CHECK-NEXT: [0,7] .D----R . . . . pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: [0,8] . D---R . . . . pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: [0,9] . D---R . . . . pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: [0,10] . D---R . . . . pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: [0,11] . D---R . . . . vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,12] . D--R . . . . vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,13] . D--R . . . . vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,14] . D--R . . . . vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,15] . D--R . . . . vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,16] . D-R . . . . vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,17] . D-R . . . . vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,18] . D-R . . . . vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,19] . DeER . . . . psubb %mm2, %mm2 +# CHECK-NEXT: [0,20] . DeER . . . . psubd %mm2, %mm2 +# CHECK-NEXT: [0,21] . D=eER. . . . psubq %mm2, %mm2 +# CHECK-NEXT: [0,22] . D==eER . . . psubw %mm2, %mm2 +# CHECK-NEXT: [0,23] . D----R . . . psubb %xmm2, %xmm2 +# CHECK-NEXT: [0,24] . .D---R . . . psubd %xmm2, %xmm2 +# CHECK-NEXT: [0,25] . .D---R . . . psubq %xmm2, %xmm2 +# CHECK-NEXT: [0,26] . .D---R . . . psubw %xmm2, %xmm2 +# CHECK-NEXT: [0,27] . .D---R . . . vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,28] . . D--R . . . vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,29] . . D--R . . . vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,30] . . D--R . . . vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,31] . . D--R . . . vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,32] . . D-R . . . vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,33] . . D-R . . . vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,34] . . D-R . . . vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,35] . . D-R . . . vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,36] . . DR . . . vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,37] . . DR . . . vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,38] . . DR . . . vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,39] . . DR . . . vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,40] . . DR . . . vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,41] . . DR . . . vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,42] . . DR . . . vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,43] . . DeER . . . andnps %xmm0, %xmm0 +# CHECK-NEXT: [0,44] . . .DeER. . . andnpd %xmm1, %xmm1 +# CHECK-NEXT: [0,45] . . .D=eER . . vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,46] . . .D==eER . . vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,47] . . .D===eER . . vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,48] . . . D===eER . . vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,49] . . . DeE---R . . pandn %mm2, %mm2 +# CHECK-NEXT: [0,50] . . . D===eER . . pandn %xmm2, %xmm2 +# CHECK-NEXT: [0,51] . . . DeE---R . . vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,52] . . . DeE--R . . vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,53] . . . D===eER. . vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: [0,54] . . . D====eER . vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: [0,55] . . . D=eE---R . vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,56] . . . D====eER. vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: [0,57] . . . D=====eER vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: [0,58] . . . DeE-----R vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,59] . . . D-------R xorps %xmm0, %xmm0 +# CHECK-NEXT: [0,60] . . . D=E----R xorpd %xmm1, %xmm1 +# CHECK-NEXT: [0,61] . . . D=E----R vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,62] . . . D=E----R vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,63] . . . D=E----R vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,64] . . . .DE----R vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,65] . . . .DeE---R pxor %mm2, %mm2 +# CHECK-NEXT: [0,66] . . . .DE----R pxor %xmm2, %xmm2 +# CHECK-NEXT: [0,67] . . . .D-----R vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,68] . . . . D----R vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,69] . . . . D----R vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: [0,70] . . . . D----R vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: [0,71] . . . . D----R vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: [0,72] . . . . D---R vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: [0,73] . . . . D---R vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,74] . . . . D---R vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 subl %eax, %eax +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 subq %rax, %rax +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 xorl %eax, %eax +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 xorq %rax, %rax +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 pcmpgtb %mm2, %mm2 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 pcmpgtd %mm2, %mm2 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 pcmpgtw %mm2, %mm2 +# CHECK-NEXT: 7. 1 0.0 0.0 4.0 pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: 8. 1 0.0 0.0 3.0 pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: 9. 1 0.0 0.0 3.0 pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: 10. 1 0.0 0.0 3.0 pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: 11. 1 0.0 0.0 3.0 vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 12. 1 0.0 0.0 2.0 vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 13. 1 0.0 0.0 2.0 vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 14. 1 0.0 0.0 2.0 vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 15. 1 0.0 0.0 2.0 vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 16. 1 0.0 0.0 1.0 vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 17. 1 0.0 0.0 1.0 vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 18. 1 0.0 0.0 1.0 vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 19. 1 1.0 0.0 0.0 psubb %mm2, %mm2 +# CHECK-NEXT: 20. 1 1.0 0.0 0.0 psubd %mm2, %mm2 +# CHECK-NEXT: 21. 1 2.0 0.0 0.0 psubq %mm2, %mm2 +# CHECK-NEXT: 22. 1 3.0 0.0 0.0 psubw %mm2, %mm2 +# CHECK-NEXT: 23. 1 0.0 0.0 4.0 psubb %xmm2, %xmm2 +# CHECK-NEXT: 24. 1 0.0 0.0 3.0 psubd %xmm2, %xmm2 +# CHECK-NEXT: 25. 1 0.0 0.0 3.0 psubq %xmm2, %xmm2 +# CHECK-NEXT: 26. 1 0.0 0.0 3.0 psubw %xmm2, %xmm2 +# CHECK-NEXT: 27. 1 0.0 0.0 3.0 vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 28. 1 0.0 0.0 2.0 vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 29. 1 0.0 0.0 2.0 vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 30. 1 0.0 0.0 2.0 vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 31. 1 0.0 0.0 2.0 vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 32. 1 0.0 0.0 1.0 vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 33. 1 0.0 0.0 1.0 vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 34. 1 0.0 0.0 1.0 vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 35. 1 0.0 0.0 1.0 vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 36. 1 0.0 0.0 0.0 vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 37. 1 0.0 0.0 0.0 vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 38. 1 0.0 0.0 0.0 vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 39. 1 0.0 0.0 0.0 vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 40. 1 0.0 0.0 0.0 vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 41. 1 0.0 0.0 0.0 vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 42. 1 0.0 0.0 0.0 vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 43. 1 1.0 1.0 0.0 andnps %xmm0, %xmm0 +# CHECK-NEXT: 44. 1 1.0 1.0 0.0 andnpd %xmm1, %xmm1 +# CHECK-NEXT: 45. 1 2.0 2.0 0.0 vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 46. 1 3.0 1.0 0.0 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 47. 1 4.0 1.0 0.0 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 48. 1 4.0 1.0 0.0 vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 49. 1 1.0 1.0 3.0 pandn %mm2, %mm2 +# CHECK-NEXT: 50. 1 4.0 0.0 0.0 pandn %xmm2, %xmm2 +# CHECK-NEXT: 51. 1 1.0 1.0 3.0 vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 52. 1 1.0 0.0 2.0 vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 53. 1 4.0 0.0 0.0 vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: 54. 1 5.0 1.0 0.0 vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: 55. 1 2.0 0.0 3.0 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 56. 1 5.0 2.0 0.0 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 57. 1 6.0 3.0 0.0 vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: 58. 1 1.0 0.0 5.0 vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 59. 1 0.0 0.0 7.0 xorps %xmm0, %xmm0 +# CHECK-NEXT: 60. 1 2.0 0.0 4.0 xorpd %xmm1, %xmm1 +# CHECK-NEXT: 61. 1 2.0 0.0 4.0 vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 62. 1 2.0 0.0 4.0 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 63. 1 2.0 0.0 4.0 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 64. 1 1.0 0.0 4.0 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 65. 1 1.0 1.0 3.0 pxor %mm2, %mm2 +# CHECK-NEXT: 66. 1 1.0 0.0 4.0 pxor %xmm2, %xmm2 +# CHECK-NEXT: 67. 1 0.0 0.0 5.0 vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 68. 1 0.0 0.0 4.0 vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 69. 1 0.0 0.0 4.0 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 70. 1 0.0 0.0 4.0 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 71. 1 0.0 0.0 4.0 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 72. 1 0.0 0.0 3.0 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 73. 1 0.0 0.0 3.0 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 74. 1 0.0 0.0 3.0 vpxor %ymm3, %ymm3, %ymm5 Index: llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-mca/X86/Haswell/zero-idioms.s @@ -0,0 +1,492 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -timeline -register-file-stats -iterations=1 < %s | FileCheck %s + +# On HSW, renamer-based zeroing does not work for: +# - 16 and 8-bit GPRs +# - MMX +# - ANDN variants + +subl %eax, %eax +subq %rax, %rax +xorl %eax, %eax +xorq %rax, %rax + +pcmpgtb %mm2, %mm2 +pcmpgtd %mm2, %mm2 +# pcmpgtq %mm2, %mm2 # invalid operand for instruction +pcmpgtw %mm2, %mm2 + +pcmpgtb %xmm2, %xmm2 +pcmpgtd %xmm2, %xmm2 +pcmpgtq %xmm2, %xmm2 +pcmpgtw %xmm2, %xmm2 + +vpcmpgtb %xmm3, %xmm3, %xmm3 +vpcmpgtd %xmm3, %xmm3, %xmm3 +vpcmpgtq %xmm3, %xmm3, %xmm3 +vpcmpgtw %xmm3, %xmm3, %xmm3 + +vpcmpgtb %xmm3, %xmm3, %xmm5 +vpcmpgtd %xmm3, %xmm3, %xmm5 +vpcmpgtq %xmm3, %xmm3, %xmm5 +vpcmpgtw %xmm3, %xmm3, %xmm5 + +vpcmpgtb %ymm3, %ymm3, %ymm3 +vpcmpgtd %ymm3, %ymm3, %ymm3 +vpcmpgtq %ymm3, %ymm3, %ymm3 +vpcmpgtw %ymm3, %ymm3, %ymm3 + +vpcmpgtb %ymm3, %ymm3, %ymm5 +vpcmpgtd %ymm3, %ymm3, %ymm5 +vpcmpgtq %ymm3, %ymm3, %ymm5 +vpcmpgtw %ymm3, %ymm3, %ymm5 + +psubb %mm2, %mm2 +psubd %mm2, %mm2 +psubq %mm2, %mm2 +psubw %mm2, %mm2 +psubb %xmm2, %xmm2 +psubd %xmm2, %xmm2 +psubq %xmm2, %xmm2 +psubw %xmm2, %xmm2 +vpsubb %xmm3, %xmm3, %xmm3 +vpsubd %xmm3, %xmm3, %xmm3 +vpsubq %xmm3, %xmm3, %xmm3 +vpsubw %xmm3, %xmm3, %xmm3 +vpsubb %ymm3, %ymm3, %ymm3 +vpsubd %ymm3, %ymm3, %ymm3 +vpsubq %ymm3, %ymm3, %ymm3 +vpsubw %ymm3, %ymm3, %ymm3 + +vpsubb %xmm3, %xmm3, %xmm5 +vpsubd %xmm3, %xmm3, %xmm5 +vpsubq %xmm3, %xmm3, %xmm5 +vpsubw %xmm3, %xmm3, %xmm5 +vpsubb %ymm3, %ymm3, %ymm5 +vpsubd %ymm3, %ymm3, %ymm5 +vpsubq %ymm3, %ymm3, %ymm5 +vpsubw %ymm3, %ymm3, %ymm5 + +andnps %xmm0, %xmm0 +andnpd %xmm1, %xmm1 +vandnps %xmm2, %xmm2, %xmm2 +vandnpd %xmm1, %xmm1, %xmm1 +vandnps %ymm2, %ymm2, %ymm2 +vandnpd %ymm1, %ymm1, %ymm1 +pandn %mm2, %mm2 +pandn %xmm2, %xmm2 +vpandn %xmm3, %xmm3, %xmm3 +vpandn %ymm3, %ymm3, %ymm3 + +vandnps %xmm2, %xmm2, %xmm5 +vandnpd %xmm1, %xmm1, %xmm5 +vpandn %xmm3, %xmm3, %xmm5 +vandnps %ymm2, %ymm2, %ymm5 +vandnpd %ymm1, %ymm1, %ymm5 +vpandn %ymm3, %ymm3, %ymm5 + +xorps %xmm0, %xmm0 +xorpd %xmm1, %xmm1 +vxorps %xmm2, %xmm2, %xmm2 +vxorpd %xmm1, %xmm1, %xmm1 +vxorps %ymm2, %ymm2, %ymm2 +vxorpd %ymm1, %ymm1, %ymm1 +pxor %mm2, %mm2 +pxor %xmm2, %xmm2 +vpxor %xmm3, %xmm3, %xmm3 +vpxor %ymm3, %ymm3, %ymm3 + +vxorps %xmm4, %xmm4, %xmm5 +vxorpd %xmm1, %xmm1, %xmm3 +vxorps %ymm4, %ymm4, %ymm5 +vxorpd %ymm1, %ymm1, %ymm3 +vpxor %xmm3, %xmm3, %xmm5 +vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 83 +# CHECK-NEXT: Total Cycles: 25 +# CHECK-NEXT: Total uOps: 83 + +# CHECK: Dispatch Width: 4 +# CHECK-NEXT: uOps Per Cycle: 3.32 +# CHECK-NEXT: IPC: 3.32 +# CHECK-NEXT: Block RThroughput: 20.8 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 0 0.25 subl %eax, %eax +# CHECK-NEXT: 1 0 0.25 subq %rax, %rax +# CHECK-NEXT: 1 0 0.25 xorl %eax, %eax +# CHECK-NEXT: 1 0 0.25 xorq %rax, %rax +# CHECK-NEXT: 1 1 0.50 pcmpgtb %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 pcmpgtd %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 pcmpgtw %mm2, %mm2 +# CHECK-NEXT: 1 0 0.25 pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 1 0.50 psubb %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubd %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubq %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubw %mm2, %mm2 +# CHECK-NEXT: 1 0 0.25 psubb %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 psubd %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 psubq %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 psubw %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 1 1.00 andnps %xmm0, %xmm0 +# CHECK-NEXT: 1 1 1.00 andnpd %xmm1, %xmm1 +# CHECK-NEXT: 1 1 1.00 vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 1 1 1.00 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 1 1.00 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 1 1.00 vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 1 1 0.33 pandn %mm2, %mm2 +# CHECK-NEXT: 1 1 0.33 pandn %xmm2, %xmm2 +# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 1 0.33 vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 1 1.00 vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: 1 1 1.00 vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 1 1.00 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 1 1 1.00 vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: 1 1 0.33 vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.25 xorps %xmm0, %xmm0 +# CHECK-NEXT: 1 0 0.25 xorpd %xmm1, %xmm1 +# CHECK-NEXT: 1 0 0.25 vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 0 0.25 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 0 0.25 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 1 1 0.33 pxor %mm2, %mm2 +# CHECK-NEXT: 1 0 0.25 pxor %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.25 vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.25 vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.25 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 1 0 0.25 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 1 0 0.25 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 1 0 0.25 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 1 0 0.25 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.25 vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Register File statistics: +# CHECK-NEXT: Total number of mappings created: 87 +# CHECK-NEXT: Max number of mappings used: 24 + +# CHECK: Resources: +# CHECK-NEXT: [0] - HWDivider +# CHECK-NEXT: [1] - HWFPDivider +# CHECK-NEXT: [2] - HWPort0 +# CHECK-NEXT: [3] - HWPort1 +# CHECK-NEXT: [4] - HWPort2 +# CHECK-NEXT: [5] - HWPort3 +# CHECK-NEXT: [6] - HWPort4 +# CHECK-NEXT: [7] - HWPort5 +# CHECK-NEXT: [8] - HWPort6 +# CHECK-NEXT: [9] - HWPort7 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# CHECK-NEXT: - - 4.00 6.00 - - - 14.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# CHECK-NEXT: - - - - - - - - - - subl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - subq %rax, %rax +# CHECK-NEXT: - - - - - - - - - - xorl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - xorq %rax, %rax +# CHECK-NEXT: - - - - - - - 1.00 - - pcmpgtb %mm2, %mm2 +# CHECK-NEXT: - - - 1.00 - - - - - - pcmpgtd %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - pcmpgtw %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - 1.00 - - - - - - psubb %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - psubd %mm2, %mm2 +# CHECK-NEXT: - - - 1.00 - - - - - - psubq %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - psubw %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - psubb %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubd %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubq %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubw %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - 1.00 - - andnps %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - 1.00 - - andnpd %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - 1.00 - - - - - - - pandn %mm2, %mm2 +# CHECK-NEXT: - - - 1.00 - - - - - - pandn %xmm2, %xmm2 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - 1.00 - - - - - - - vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: - - 1.00 - - - - - - - vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - xorps %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - - - - xorpd %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - - - - vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - - - - vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - 1.00 - - - - - - - pxor %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - pxor %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01234 + +# CHECK: [0,0] DR . . . . . subl %eax, %eax +# CHECK-NEXT: [0,1] DR . . . . . subq %rax, %rax +# CHECK-NEXT: [0,2] DR . . . . . xorl %eax, %eax +# CHECK-NEXT: [0,3] DR . . . . . xorq %rax, %rax +# CHECK-NEXT: [0,4] .DeER. . . . . pcmpgtb %mm2, %mm2 +# CHECK-NEXT: [0,5] .D=eER . . . . pcmpgtd %mm2, %mm2 +# CHECK-NEXT: [0,6] .D==eER . . . . pcmpgtw %mm2, %mm2 +# CHECK-NEXT: [0,7] .D----R . . . . pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: [0,8] . D---R . . . . pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: [0,9] . D---R . . . . pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: [0,10] . D---R . . . . pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: [0,11] . D---R . . . . vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,12] . D--R . . . . vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,13] . D--R . . . . vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,14] . D--R . . . . vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,15] . D--R . . . . vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,16] . D-R . . . . vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,17] . D-R . . . . vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,18] . D-R . . . . vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,19] . D-R . . . . vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,20] . DR . . . . vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,21] . DR . . . . vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,22] . DR . . . . vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,23] . DR . . . . vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,24] . .DR . . . . vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,25] . .DR . . . . vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,26] . .DR . . . . vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,27] . .DeER. . . . psubb %mm2, %mm2 +# CHECK-NEXT: [0,28] . . DeER . . . psubd %mm2, %mm2 +# CHECK-NEXT: [0,29] . . D=eER . . . psubq %mm2, %mm2 +# CHECK-NEXT: [0,30] . . D==eER . . . psubw %mm2, %mm2 +# CHECK-NEXT: [0,31] . . D----R . . . psubb %xmm2, %xmm2 +# CHECK-NEXT: [0,32] . . D---R . . . psubd %xmm2, %xmm2 +# CHECK-NEXT: [0,33] . . D---R . . . psubq %xmm2, %xmm2 +# CHECK-NEXT: [0,34] . . D---R . . . psubw %xmm2, %xmm2 +# CHECK-NEXT: [0,35] . . D---R . . . vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,36] . . D--R . . . vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,37] . . D--R . . . vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,38] . . D--R . . . vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,39] . . D--R . . . vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,40] . . D-R . . . vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,41] . . D-R . . . vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,42] . . D-R . . . vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,43] . . D-R . . . vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,44] . . .DR . . . vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,45] . . .DR . . . vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,46] . . .DR . . . vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,47] . . .DR . . . vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,48] . . . DR . . . vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,49] . . . DR . . . vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,50] . . . DR . . . vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,51] . . . DeER . . andnps %xmm0, %xmm0 +# CHECK-NEXT: [0,52] . . . DeER . . andnpd %xmm1, %xmm1 +# CHECK-NEXT: [0,53] . . . D=eER . . vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,54] . . . D==eER . . vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,55] . . . D===eER. . vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,56] . . . D===eER . vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,57] . . . DeE---R . pandn %mm2, %mm2 +# CHECK-NEXT: [0,58] . . . D===eER . pandn %xmm2, %xmm2 +# CHECK-NEXT: [0,59] . . . DeE---R . vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,60] . . . DeE--R . vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,61] . . . D===eER . vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: [0,62] . . . D====eER . vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: [0,63] . . . D=eE---R . vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,64] . . . .D====eER. vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: [0,65] . . . .D=====eER vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: [0,66] . . . .DeE-----R vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,67] . . . .D-------R xorps %xmm0, %xmm0 +# CHECK-NEXT: [0,68] . . . . D=E----R xorpd %xmm1, %xmm1 +# CHECK-NEXT: [0,69] . . . . D=E----R vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,70] . . . . D=E----R vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,71] . . . . D=E----R vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,72] . . . . DE----R vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,73] . . . . DeE---R pxor %mm2, %mm2 +# CHECK-NEXT: [0,74] . . . . DE----R pxor %xmm2, %xmm2 +# CHECK-NEXT: [0,75] . . . . D-----R vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,76] . . . . D----R vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,77] . . . . D----R vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: [0,78] . . . . D----R vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: [0,79] . . . . D----R vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: [0,80] . . . . D---R vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: [0,81] . . . . D---R vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,82] . . . . D---R vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 subl %eax, %eax +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 subq %rax, %rax +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 xorl %eax, %eax +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 xorq %rax, %rax +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 pcmpgtb %mm2, %mm2 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 pcmpgtd %mm2, %mm2 +# CHECK-NEXT: 6. 1 3.0 0.0 0.0 pcmpgtw %mm2, %mm2 +# CHECK-NEXT: 7. 1 0.0 0.0 4.0 pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: 8. 1 0.0 0.0 3.0 pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: 9. 1 0.0 0.0 3.0 pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: 10. 1 0.0 0.0 3.0 pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: 11. 1 0.0 0.0 3.0 vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 12. 1 0.0 0.0 2.0 vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 13. 1 0.0 0.0 2.0 vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 14. 1 0.0 0.0 2.0 vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 15. 1 0.0 0.0 2.0 vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 16. 1 0.0 0.0 1.0 vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 17. 1 0.0 0.0 1.0 vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 18. 1 0.0 0.0 1.0 vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 19. 1 0.0 0.0 1.0 vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 20. 1 0.0 0.0 0.0 vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 21. 1 0.0 0.0 0.0 vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 22. 1 0.0 0.0 0.0 vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 23. 1 0.0 0.0 0.0 vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 24. 1 0.0 0.0 0.0 vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 25. 1 0.0 0.0 0.0 vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 26. 1 0.0 0.0 0.0 vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 27. 1 1.0 1.0 0.0 psubb %mm2, %mm2 +# CHECK-NEXT: 28. 1 1.0 0.0 0.0 psubd %mm2, %mm2 +# CHECK-NEXT: 29. 1 2.0 0.0 0.0 psubq %mm2, %mm2 +# CHECK-NEXT: 30. 1 3.0 0.0 0.0 psubw %mm2, %mm2 +# CHECK-NEXT: 31. 1 0.0 0.0 4.0 psubb %xmm2, %xmm2 +# CHECK-NEXT: 32. 1 0.0 0.0 3.0 psubd %xmm2, %xmm2 +# CHECK-NEXT: 33. 1 0.0 0.0 3.0 psubq %xmm2, %xmm2 +# CHECK-NEXT: 34. 1 0.0 0.0 3.0 psubw %xmm2, %xmm2 +# CHECK-NEXT: 35. 1 0.0 0.0 3.0 vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 36. 1 0.0 0.0 2.0 vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 37. 1 0.0 0.0 2.0 vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 38. 1 0.0 0.0 2.0 vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 39. 1 0.0 0.0 2.0 vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 40. 1 0.0 0.0 1.0 vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 41. 1 0.0 0.0 1.0 vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 42. 1 0.0 0.0 1.0 vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 43. 1 0.0 0.0 1.0 vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 44. 1 0.0 0.0 0.0 vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 45. 1 0.0 0.0 0.0 vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 46. 1 0.0 0.0 0.0 vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 47. 1 0.0 0.0 0.0 vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 48. 1 0.0 0.0 0.0 vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 49. 1 0.0 0.0 0.0 vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 50. 1 0.0 0.0 0.0 vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 51. 1 1.0 1.0 0.0 andnps %xmm0, %xmm0 +# CHECK-NEXT: 52. 1 1.0 1.0 0.0 andnpd %xmm1, %xmm1 +# CHECK-NEXT: 53. 1 2.0 2.0 0.0 vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 54. 1 3.0 1.0 0.0 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 55. 1 4.0 1.0 0.0 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 56. 1 4.0 1.0 0.0 vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 57. 1 1.0 1.0 3.0 pandn %mm2, %mm2 +# CHECK-NEXT: 58. 1 4.0 0.0 0.0 pandn %xmm2, %xmm2 +# CHECK-NEXT: 59. 1 1.0 1.0 3.0 vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 60. 1 1.0 0.0 2.0 vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 61. 1 4.0 0.0 0.0 vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: 62. 1 5.0 1.0 0.0 vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: 63. 1 2.0 0.0 3.0 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 64. 1 5.0 2.0 0.0 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 65. 1 6.0 3.0 0.0 vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: 66. 1 1.0 0.0 5.0 vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 67. 1 0.0 0.0 7.0 xorps %xmm0, %xmm0 +# CHECK-NEXT: 68. 1 2.0 0.0 4.0 xorpd %xmm1, %xmm1 +# CHECK-NEXT: 69. 1 2.0 0.0 4.0 vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 70. 1 2.0 0.0 4.0 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 71. 1 2.0 0.0 4.0 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 72. 1 1.0 0.0 4.0 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 73. 1 1.0 1.0 3.0 pxor %mm2, %mm2 +# CHECK-NEXT: 74. 1 1.0 0.0 4.0 pxor %xmm2, %xmm2 +# CHECK-NEXT: 75. 1 0.0 0.0 5.0 vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 76. 1 0.0 0.0 4.0 vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 77. 1 0.0 0.0 4.0 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 78. 1 0.0 0.0 4.0 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 79. 1 0.0 0.0 4.0 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 80. 1 0.0 0.0 3.0 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 81. 1 0.0 0.0 3.0 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 82. 1 0.0 0.0 3.0 vpxor %ymm3, %ymm3, %ymm5 Index: llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s =================================================================== --- llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s +++ llvm/test/tools/llvm-mca/X86/SandyBridge/zero-idioms.s @@ -53,6 +53,8 @@ andnpd %xmm1, %xmm1 vandnps %xmm2, %xmm2, %xmm2 vandnpd %xmm1, %xmm1, %xmm1 +vandnps %ymm2, %ymm2, %ymm2 +vandnpd %ymm1, %ymm1, %ymm1 pandn %mm2, %mm2 pandn %xmm2, %xmm2 vpandn %xmm3, %xmm3, %xmm3 @@ -60,28 +62,34 @@ vandnps %xmm2, %xmm2, %xmm5 vandnpd %xmm1, %xmm1, %xmm5 vpandn %xmm3, %xmm3, %xmm5 +vandnps %ymm2, %ymm2, %ymm5 +vandnpd %ymm1, %ymm1, %ymm5 xorps %xmm0, %xmm0 xorpd %xmm1, %xmm1 vxorps %xmm2, %xmm2, %xmm2 vxorpd %xmm1, %xmm1, %xmm1 +vxorps %ymm2, %ymm2, %ymm2 +vxorpd %ymm1, %ymm1, %ymm1 pxor %mm2, %mm2 pxor %xmm2, %xmm2 vpxor %xmm3, %xmm3, %xmm3 vxorps %xmm4, %xmm4, %xmm5 vxorpd %xmm1, %xmm1, %xmm3 +vxorps %ymm4, %ymm4, %ymm5 +vxorpd %ymm1, %ymm1, %ymm3 vpxor %xmm3, %xmm3, %xmm5 # CHECK: Iterations: 1 -# CHECK-NEXT: Instructions: 55 +# CHECK-NEXT: Instructions: 63 # CHECK-NEXT: Total Cycles: 27 -# CHECK-NEXT: Total uOps: 55 +# CHECK-NEXT: Total uOps: 63 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 2.04 -# CHECK-NEXT: IPC: 2.04 -# CHECK-NEXT: Block RThroughput: 13.8 +# CHECK-NEXT: uOps Per Cycle: 2.33 +# CHECK-NEXT: IPC: 2.33 +# CHECK-NEXT: Block RThroughput: 15.8 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -131,26 +139,34 @@ # CHECK-NEXT: 1 1 1.00 andnpd %xmm1, %xmm1 # CHECK-NEXT: 1 1 1.00 vandnps %xmm2, %xmm2, %xmm2 # CHECK-NEXT: 1 1 1.00 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 1 1.00 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 1 1.00 vandnpd %ymm1, %ymm1, %ymm1 # CHECK-NEXT: 1 1 0.33 pandn %mm2, %mm2 # CHECK-NEXT: 1 1 0.33 pandn %xmm2, %xmm2 # CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm3 # CHECK-NEXT: 1 1 1.00 vandnps %xmm2, %xmm2, %xmm5 # CHECK-NEXT: 1 1 1.00 vandnpd %xmm1, %xmm1, %xmm5 # CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 1 1.00 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 1 1 1.00 vandnpd %ymm1, %ymm1, %ymm5 # CHECK-NEXT: 1 0 0.25 xorps %xmm0, %xmm0 # CHECK-NEXT: 1 0 0.25 xorpd %xmm1, %xmm1 # CHECK-NEXT: 1 0 0.25 vxorps %xmm2, %xmm2, %xmm2 # CHECK-NEXT: 1 0 0.25 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 0 0.25 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 0 0.25 vxorpd %ymm1, %ymm1, %ymm1 # CHECK-NEXT: 1 1 0.33 pxor %mm2, %mm2 # CHECK-NEXT: 1 0 0.25 pxor %xmm2, %xmm2 # CHECK-NEXT: 1 0 0.25 vpxor %xmm3, %xmm3, %xmm3 # CHECK-NEXT: 1 0 0.25 vxorps %xmm4, %xmm4, %xmm5 # CHECK-NEXT: 1 0 0.25 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 1 0 0.25 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 1 0 0.25 vxorpd %ymm1, %ymm1, %ymm3 # CHECK-NEXT: 1 0 0.25 vpxor %xmm3, %xmm3, %xmm5 # CHECK: Register File statistics: -# CHECK-NEXT: Total number of mappings created: 59 -# CHECK-NEXT: Max number of mappings used: 42 +# CHECK-NEXT: Total number of mappings created: 67 +# CHECK-NEXT: Max number of mappings used: 43 # CHECK: Resources: # CHECK-NEXT: [0] - SBDivider @@ -164,7 +180,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 4.00 8.00 - 6.00 - - +# CHECK-NEXT: - - 4.00 8.00 - 10.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -207,21 +223,29 @@ # CHECK-NEXT: - - - - - 1.00 - - andnpd %xmm1, %xmm1 # CHECK-NEXT: - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm2 # CHECK-NEXT: - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - - - - 1.00 - - vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - - - 1.00 - - vandnpd %ymm1, %ymm1, %ymm1 # CHECK-NEXT: - - 1.00 - - - - - pandn %mm2, %mm2 -# CHECK-NEXT: - - 1.00 - - - - - pandn %xmm2, %xmm2 +# CHECK-NEXT: - - - 1.00 - - - - pandn %xmm2, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - vpandn %xmm3, %xmm3, %xmm3 # CHECK-NEXT: - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm5 # CHECK-NEXT: - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm5 -# CHECK-NEXT: - - - 1.00 - - - - vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - 1.00 - - - - - vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - 1.00 - - vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: - - - - - 1.00 - - vandnpd %ymm1, %ymm1, %ymm5 # CHECK-NEXT: - - - - - - - - xorps %xmm0, %xmm0 # CHECK-NEXT: - - - - - - - - xorpd %xmm1, %xmm1 # CHECK-NEXT: - - - - - - - - vxorps %xmm2, %xmm2, %xmm2 # CHECK-NEXT: - - - - - - - - vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - - vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - - - - - - vxorpd %ymm1, %ymm1, %ymm1 # CHECK-NEXT: - - 1.00 - - - - - pxor %mm2, %mm2 # CHECK-NEXT: - - - - - - - - pxor %xmm2, %xmm2 # CHECK-NEXT: - - - - - - - - vpxor %xmm3, %xmm3, %xmm3 # CHECK-NEXT: - - - - - - - - vxorps %xmm4, %xmm4, %xmm5 # CHECK-NEXT: - - - - - - - - vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: - - - - - - - - vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: - - - - - - - - vxorpd %ymm1, %ymm1, %ymm3 # CHECK-NEXT: - - - - - - - - vpxor %xmm3, %xmm3, %xmm5 # CHECK: Timeline view: @@ -266,23 +290,31 @@ # CHECK-NEXT: [0,35] . . DeE-------------R.. andnps %xmm0, %xmm0 # CHECK-NEXT: [0,36] . . DeE------------R.. andnpd %xmm1, %xmm1 # CHECK-NEXT: [0,37] . . D=eE-----------R.. vandnps %xmm2, %xmm2, %xmm2 -# CHECK-NEXT: [0,38] . . D==eE----------R.. vandnpd %xmm1, %xmm1, %xmm1 -# CHECK-NEXT: [0,39] . . D=============eER. pandn %mm2, %mm2 -# CHECK-NEXT: [0,40] . . D=eE-----------R. pandn %xmm2, %xmm2 -# CHECK-NEXT: [0,41] . . DeE------------R. vpandn %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,42] . . D==eE----------R. vandnps %xmm2, %xmm2, %xmm5 -# CHECK-NEXT: [0,43] . . D===eE---------R. vandnpd %xmm1, %xmm1, %xmm5 -# CHECK-NEXT: [0,44] . . .DeE-----------R. vpandn %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,45] . . .D-------------R. xorps %xmm0, %xmm0 -# CHECK-NEXT: [0,46] . . .D=E-----------R. xorpd %xmm1, %xmm1 -# CHECK-NEXT: [0,47] . . .D=E-----------R. vxorps %xmm2, %xmm2, %xmm2 -# CHECK-NEXT: [0,48] . . . DE-----------R. vxorpd %xmm1, %xmm1, %xmm1 -# CHECK-NEXT: [0,49] . . . D===========eER pxor %mm2, %mm2 -# CHECK-NEXT: [0,50] . . . DE------------R pxor %xmm2, %xmm2 -# CHECK-NEXT: [0,51] . . . D-------------R vpxor %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,52] . . . D------------R vxorps %xmm4, %xmm4, %xmm5 -# CHECK-NEXT: [0,53] . . . D------------R vxorpd %xmm1, %xmm1, %xmm3 -# CHECK-NEXT: [0,54] . . . D------------R vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,38] . . D===eE---------R.. vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,39] . . D==eE----------R.. vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,40] . . D===eE--------R.. vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,41] . . D============eER. pandn %mm2, %mm2 +# CHECK-NEXT: [0,42] . . D==eE----------R. pandn %xmm2, %xmm2 +# CHECK-NEXT: [0,43] . . DeE------------R. vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,44] . . .D===eE--------R. vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: [0,45] . . .D====eE-------R. vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: [0,46] . . .DeE-----------R. vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,47] . . .D=====eE------R. vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: [0,48] . . . D=====eE-----R. vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: [0,49] . . . D------------R. xorps %xmm0, %xmm0 +# CHECK-NEXT: [0,50] . . . D==E---------R. xorpd %xmm1, %xmm1 +# CHECK-NEXT: [0,51] . . . D=E----------R. vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,52] . . . D=E---------R. vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,53] . . . DE----------R. vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,54] . . . D=E---------R. vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,55] . . . D==========eER pxor %mm2, %mm2 +# CHECK-NEXT: [0,56] . . . D-----------R pxor %xmm2, %xmm2 +# CHECK-NEXT: [0,57] . . . D-----------R vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,58] . . . D-----------R vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: [0,59] . . . DE----------R vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: [0,60] . . . D----------R vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: [0,61] . . . D----------R vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: [0,62] . . . D----------R vpxor %xmm3, %xmm3, %xmm5 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -329,20 +361,28 @@ # CHECK-NEXT: 35. 1 1.0 1.0 13.0 andnps %xmm0, %xmm0 # CHECK-NEXT: 36. 1 1.0 1.0 12.0 andnpd %xmm1, %xmm1 # CHECK-NEXT: 37. 1 2.0 2.0 11.0 vandnps %xmm2, %xmm2, %xmm2 -# CHECK-NEXT: 38. 1 3.0 1.0 10.0 vandnpd %xmm1, %xmm1, %xmm1 -# CHECK-NEXT: 39. 1 14.0 0.0 0.0 pandn %mm2, %mm2 -# CHECK-NEXT: 40. 1 2.0 0.0 11.0 pandn %xmm2, %xmm2 -# CHECK-NEXT: 41. 1 1.0 1.0 12.0 vpandn %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: 42. 1 3.0 0.0 10.0 vandnps %xmm2, %xmm2, %xmm5 -# CHECK-NEXT: 43. 1 4.0 1.0 9.0 vandnpd %xmm1, %xmm1, %xmm5 -# CHECK-NEXT: 44. 1 1.0 0.0 11.0 vpandn %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: 45. 1 0.0 0.0 13.0 xorps %xmm0, %xmm0 -# CHECK-NEXT: 46. 1 2.0 0.0 11.0 xorpd %xmm1, %xmm1 -# CHECK-NEXT: 47. 1 2.0 0.0 11.0 vxorps %xmm2, %xmm2, %xmm2 -# CHECK-NEXT: 48. 1 1.0 0.0 11.0 vxorpd %xmm1, %xmm1, %xmm1 -# CHECK-NEXT: 49. 1 12.0 0.0 0.0 pxor %mm2, %mm2 -# CHECK-NEXT: 50. 1 1.0 0.0 12.0 pxor %xmm2, %xmm2 -# CHECK-NEXT: 51. 1 0.0 0.0 13.0 vpxor %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: 52. 1 0.0 0.0 12.0 vxorps %xmm4, %xmm4, %xmm5 -# CHECK-NEXT: 53. 1 0.0 0.0 12.0 vxorpd %xmm1, %xmm1, %xmm3 -# CHECK-NEXT: 54. 1 0.0 0.0 12.0 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 38. 1 4.0 2.0 9.0 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 39. 1 3.0 0.0 10.0 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 40. 1 4.0 0.0 8.0 vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 41. 1 13.0 0.0 0.0 pandn %mm2, %mm2 +# CHECK-NEXT: 42. 1 3.0 0.0 10.0 pandn %xmm2, %xmm2 +# CHECK-NEXT: 43. 1 1.0 1.0 12.0 vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 44. 1 4.0 1.0 8.0 vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: 45. 1 5.0 1.0 7.0 vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: 46. 1 1.0 0.0 11.0 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 47. 1 6.0 3.0 6.0 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 48. 1 6.0 3.0 5.0 vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: 49. 1 0.0 0.0 12.0 xorps %xmm0, %xmm0 +# CHECK-NEXT: 50. 1 3.0 0.0 9.0 xorpd %xmm1, %xmm1 +# CHECK-NEXT: 51. 1 2.0 0.0 10.0 vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 52. 1 2.0 0.0 9.0 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 53. 1 1.0 0.0 10.0 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 54. 1 2.0 0.0 9.0 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 55. 1 11.0 0.0 0.0 pxor %mm2, %mm2 +# CHECK-NEXT: 56. 1 0.0 0.0 11.0 pxor %xmm2, %xmm2 +# CHECK-NEXT: 57. 1 0.0 0.0 11.0 vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 58. 1 0.0 0.0 11.0 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 59. 1 1.0 0.0 10.0 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 60. 1 0.0 0.0 10.0 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 61. 1 0.0 0.0 10.0 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 62. 1 0.0 0.0 10.0 vpxor %xmm3, %xmm3, %xmm5 Index: llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-mca/X86/SkylakeClient/zero-idioms.s @@ -0,0 +1,492 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -timeline -register-file-stats -iterations=1 < %s | FileCheck %s + +# On SKL, renamer-based zeroing does not work for: +# - 16 and 8-bit GPRs +# - MMX +# - ANDN variants + +subl %eax, %eax +subq %rax, %rax +xorl %eax, %eax +xorq %rax, %rax + +pcmpgtb %mm2, %mm2 +pcmpgtd %mm2, %mm2 +# pcmpgtq %mm2, %mm2 # invalid operand for instruction +pcmpgtw %mm2, %mm2 + +pcmpgtb %xmm2, %xmm2 +pcmpgtd %xmm2, %xmm2 +pcmpgtq %xmm2, %xmm2 +pcmpgtw %xmm2, %xmm2 + +vpcmpgtb %xmm3, %xmm3, %xmm3 +vpcmpgtd %xmm3, %xmm3, %xmm3 +vpcmpgtq %xmm3, %xmm3, %xmm3 +vpcmpgtw %xmm3, %xmm3, %xmm3 + +vpcmpgtb %xmm3, %xmm3, %xmm5 +vpcmpgtd %xmm3, %xmm3, %xmm5 +vpcmpgtq %xmm3, %xmm3, %xmm5 +vpcmpgtw %xmm3, %xmm3, %xmm5 + +vpcmpgtb %ymm3, %ymm3, %ymm3 +vpcmpgtd %ymm3, %ymm3, %ymm3 +vpcmpgtq %ymm3, %ymm3, %ymm3 +vpcmpgtw %ymm3, %ymm3, %ymm3 + +vpcmpgtb %ymm3, %ymm3, %ymm5 +vpcmpgtd %ymm3, %ymm3, %ymm5 +vpcmpgtq %ymm3, %ymm3, %ymm5 +vpcmpgtw %ymm3, %ymm3, %ymm5 + +psubb %mm2, %mm2 +psubd %mm2, %mm2 +psubq %mm2, %mm2 +psubw %mm2, %mm2 +psubb %xmm2, %xmm2 +psubd %xmm2, %xmm2 +psubq %xmm2, %xmm2 +psubw %xmm2, %xmm2 +vpsubb %xmm3, %xmm3, %xmm3 +vpsubd %xmm3, %xmm3, %xmm3 +vpsubq %xmm3, %xmm3, %xmm3 +vpsubw %xmm3, %xmm3, %xmm3 +vpsubb %ymm3, %ymm3, %ymm3 +vpsubd %ymm3, %ymm3, %ymm3 +vpsubq %ymm3, %ymm3, %ymm3 +vpsubw %ymm3, %ymm3, %ymm3 + +vpsubb %xmm3, %xmm3, %xmm5 +vpsubd %xmm3, %xmm3, %xmm5 +vpsubq %xmm3, %xmm3, %xmm5 +vpsubw %xmm3, %xmm3, %xmm5 +vpsubb %ymm3, %ymm3, %ymm5 +vpsubd %ymm3, %ymm3, %ymm5 +vpsubq %ymm3, %ymm3, %ymm5 +vpsubw %ymm3, %ymm3, %ymm5 + +andnps %xmm0, %xmm0 +andnpd %xmm1, %xmm1 +vandnps %xmm2, %xmm2, %xmm2 +vandnpd %xmm1, %xmm1, %xmm1 +vandnps %ymm2, %ymm2, %ymm2 +vandnpd %ymm1, %ymm1, %ymm1 +pandn %mm2, %mm2 +pandn %xmm2, %xmm2 +vpandn %xmm3, %xmm3, %xmm3 +vpandn %ymm3, %ymm3, %ymm3 + +vandnps %xmm2, %xmm2, %xmm5 +vandnpd %xmm1, %xmm1, %xmm5 +vpandn %xmm3, %xmm3, %xmm5 +vandnps %ymm2, %ymm2, %ymm5 +vandnpd %ymm1, %ymm1, %ymm5 +vpandn %ymm3, %ymm3, %ymm5 + +xorps %xmm0, %xmm0 +xorpd %xmm1, %xmm1 +vxorps %xmm2, %xmm2, %xmm2 +vxorpd %xmm1, %xmm1, %xmm1 +vxorps %ymm2, %ymm2, %ymm2 +vxorpd %ymm1, %ymm1, %ymm1 +pxor %mm2, %mm2 +pxor %xmm2, %xmm2 +vpxor %xmm3, %xmm3, %xmm3 +vpxor %ymm3, %ymm3, %ymm3 + +vxorps %xmm4, %xmm4, %xmm5 +vxorpd %xmm1, %xmm1, %xmm3 +vxorps %ymm4, %ymm4, %ymm5 +vxorpd %ymm1, %ymm1, %ymm3 +vpxor %xmm3, %xmm3, %xmm5 +vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 83 +# CHECK-NEXT: Total Cycles: 17 +# CHECK-NEXT: Total uOps: 83 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 4.88 +# CHECK-NEXT: IPC: 4.88 +# CHECK-NEXT: Block RThroughput: 13.8 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 0 0.17 subl %eax, %eax +# CHECK-NEXT: 1 0 0.17 subq %rax, %rax +# CHECK-NEXT: 1 0 0.17 xorl %eax, %eax +# CHECK-NEXT: 1 0 0.17 xorq %rax, %rax +# CHECK-NEXT: 1 1 1.00 pcmpgtb %mm2, %mm2 +# CHECK-NEXT: 1 1 1.00 pcmpgtd %mm2, %mm2 +# CHECK-NEXT: 1 1 1.00 pcmpgtw %mm2, %mm2 +# CHECK-NEXT: 1 0 0.17 pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 1 0.50 psubb %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubd %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubq %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubw %mm2, %mm2 +# CHECK-NEXT: 1 0 0.17 psubb %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 psubd %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 psubq %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 psubw %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 1 0.33 andnps %xmm0, %xmm0 +# CHECK-NEXT: 1 1 0.33 andnpd %xmm1, %xmm1 +# CHECK-NEXT: 1 1 0.33 vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 1 1 0.33 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 1 0.33 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 1 0.33 vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 1 1 0.50 pandn %mm2, %mm2 +# CHECK-NEXT: 1 1 0.33 pandn %xmm2, %xmm2 +# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 1 0.33 vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 1 0.33 vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: 1 1 0.33 vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 1 0.33 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 1 1 0.33 vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: 1 1 0.33 vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 xorps %xmm0, %xmm0 +# CHECK-NEXT: 1 0 0.17 xorpd %xmm1, %xmm1 +# CHECK-NEXT: 1 0 0.17 vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 0 0.17 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 0 0.17 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 1 1 0.50 pxor %mm2, %mm2 +# CHECK-NEXT: 1 0 0.17 pxor %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 1 0 0.17 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 1 0 0.17 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 1 0 0.17 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Register File statistics: +# CHECK-NEXT: Total number of mappings created: 87 +# CHECK-NEXT: Max number of mappings used: 30 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SKLDivider +# CHECK-NEXT: [1] - SKLFPDivider +# CHECK-NEXT: [2] - SKLPort0 +# CHECK-NEXT: [3] - SKLPort1 +# CHECK-NEXT: [4] - SKLPort2 +# CHECK-NEXT: [5] - SKLPort3 +# CHECK-NEXT: [6] - SKLPort4 +# CHECK-NEXT: [7] - SKLPort5 +# CHECK-NEXT: [8] - SKLPort6 +# CHECK-NEXT: [9] - SKLPort7 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# CHECK-NEXT: - - 10.00 6.00 - - - 8.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# CHECK-NEXT: - - - - - - - - - - subl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - subq %rax, %rax +# CHECK-NEXT: - - - - - - - - - - xorl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - xorq %rax, %rax +# CHECK-NEXT: - - 1.00 - - - - - - - pcmpgtb %mm2, %mm2 +# CHECK-NEXT: - - 1.00 - - - - - - - pcmpgtd %mm2, %mm2 +# CHECK-NEXT: - - 1.00 - - - - - - - pcmpgtw %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - 1.00 - - psubb %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - psubd %mm2, %mm2 +# CHECK-NEXT: - - 1.00 - - - - - - - psubq %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - psubw %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - psubb %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubd %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubq %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubw %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - 1.00 - - - - - - andnps %xmm0, %xmm0 +# CHECK-NEXT: - - 1.00 - - - - - - - andnpd %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: - - - 1.00 - - - - - - vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - 1.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - 1.00 - - - - - - vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - - - - - - 1.00 - - pandn %mm2, %mm2 +# CHECK-NEXT: - - 1.00 - - - - - - - pandn %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - 1.00 - - - - - - - vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - 1.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - xorps %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - - - - xorpd %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - - - - vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - - - - vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - 1.00 - - - - - - - pxor %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - pxor %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DR . . .. subl %eax, %eax +# CHECK-NEXT: [0,1] DR . . .. subq %rax, %rax +# CHECK-NEXT: [0,2] DR . . .. xorl %eax, %eax +# CHECK-NEXT: [0,3] DR . . .. xorq %rax, %rax +# CHECK-NEXT: [0,4] DeER . . .. pcmpgtb %mm2, %mm2 +# CHECK-NEXT: [0,5] D=eER. . .. pcmpgtd %mm2, %mm2 +# CHECK-NEXT: [0,6] .D=eER . .. pcmpgtw %mm2, %mm2 +# CHECK-NEXT: [0,7] .D---R . .. pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: [0,8] .D---R . .. pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: [0,9] .D---R . .. pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: [0,10] .D---R . .. pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: [0,11] .D---R . .. vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,12] . D--R . .. vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,13] . D--R . .. vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,14] . D--R . .. vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,15] . D--R . .. vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,16] . D--R . .. vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,17] . D--R . .. vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,18] . D-R . .. vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,19] . D-R . .. vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,20] . D-R . .. vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,21] . D-R . .. vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,22] . D-R . .. vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,23] . D-R . .. vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,24] . DR . .. vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,25] . DR . .. vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,26] . DR . .. vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,27] . DeER . .. psubb %mm2, %mm2 +# CHECK-NEXT: [0,28] . D=eER . .. psubd %mm2, %mm2 +# CHECK-NEXT: [0,29] . D==eER. .. psubq %mm2, %mm2 +# CHECK-NEXT: [0,30] . D==eER .. psubw %mm2, %mm2 +# CHECK-NEXT: [0,31] . D----R .. psubb %xmm2, %xmm2 +# CHECK-NEXT: [0,32] . D----R .. psubd %xmm2, %xmm2 +# CHECK-NEXT: [0,33] . D----R .. psubq %xmm2, %xmm2 +# CHECK-NEXT: [0,34] . D----R .. psubw %xmm2, %xmm2 +# CHECK-NEXT: [0,35] . D----R .. vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,36] . .D---R .. vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,37] . .D---R .. vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,38] . .D---R .. vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,39] . .D---R .. vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,40] . .D---R .. vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,41] . .D---R .. vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,42] . . D--R .. vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,43] . . D--R .. vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,44] . . D--R .. vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,45] . . D--R .. vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,46] . . D--R .. vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,47] . . D--R .. vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,48] . . D-R .. vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,49] . . D-R .. vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,50] . . D-R .. vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,51] . . DeER .. andnps %xmm0, %xmm0 +# CHECK-NEXT: [0,52] . . DeER .. andnpd %xmm1, %xmm1 +# CHECK-NEXT: [0,53] . . DeER .. vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,54] . . DeER .. vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,55] . . DeER .. vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,56] . . D=eER .. vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,57] . . DeE-R .. pandn %mm2, %mm2 +# CHECK-NEXT: [0,58] . . D=eER .. pandn %xmm2, %xmm2 +# CHECK-NEXT: [0,59] . . D=eER .. vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,60] . . D=eER.. vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,61] . . D=eER.. vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: [0,62] . . D=eER.. vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: [0,63] . . D==eER. vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,64] . . D==eER. vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: [0,65] . . D==eER. vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: [0,66] . . .D==eER vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,67] . . .D----R xorps %xmm0, %xmm0 +# CHECK-NEXT: [0,68] . . .DE---R xorpd %xmm1, %xmm1 +# CHECK-NEXT: [0,69] . . .DE---R vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,70] . . .DE---R vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,71] . . .DE---R vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,72] . . . D---R vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,73] . . . D=eER pxor %mm2, %mm2 +# CHECK-NEXT: [0,74] . . . D---R pxor %xmm2, %xmm2 +# CHECK-NEXT: [0,75] . . . DE--R vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,76] . . . DE--R vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,77] . . . D---R vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: [0,78] . . . D--R vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: [0,79] . . . D--R vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: [0,80] . . . D--R vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: [0,81] . . . D--R vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,82] . . . D--R vpxor %ymm3, %ymm3, %ymm5 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 subl %eax, %eax +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 subq %rax, %rax +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 xorl %eax, %eax +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 xorq %rax, %rax +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 pcmpgtb %mm2, %mm2 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 pcmpgtd %mm2, %mm2 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 pcmpgtw %mm2, %mm2 +# CHECK-NEXT: 7. 1 0.0 0.0 3.0 pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: 8. 1 0.0 0.0 3.0 pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: 9. 1 0.0 0.0 3.0 pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: 10. 1 0.0 0.0 3.0 pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: 11. 1 0.0 0.0 3.0 vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 12. 1 0.0 0.0 2.0 vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 13. 1 0.0 0.0 2.0 vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 14. 1 0.0 0.0 2.0 vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 15. 1 0.0 0.0 2.0 vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 16. 1 0.0 0.0 2.0 vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 17. 1 0.0 0.0 2.0 vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 18. 1 0.0 0.0 1.0 vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 19. 1 0.0 0.0 1.0 vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 20. 1 0.0 0.0 1.0 vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 21. 1 0.0 0.0 1.0 vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 22. 1 0.0 0.0 1.0 vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 23. 1 0.0 0.0 1.0 vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 24. 1 0.0 0.0 0.0 vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 25. 1 0.0 0.0 0.0 vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 26. 1 0.0 0.0 0.0 vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 27. 1 1.0 1.0 0.0 psubb %mm2, %mm2 +# CHECK-NEXT: 28. 1 2.0 0.0 0.0 psubd %mm2, %mm2 +# CHECK-NEXT: 29. 1 3.0 0.0 0.0 psubq %mm2, %mm2 +# CHECK-NEXT: 30. 1 3.0 0.0 0.0 psubw %mm2, %mm2 +# CHECK-NEXT: 31. 1 0.0 0.0 4.0 psubb %xmm2, %xmm2 +# CHECK-NEXT: 32. 1 0.0 0.0 4.0 psubd %xmm2, %xmm2 +# CHECK-NEXT: 33. 1 0.0 0.0 4.0 psubq %xmm2, %xmm2 +# CHECK-NEXT: 34. 1 0.0 0.0 4.0 psubw %xmm2, %xmm2 +# CHECK-NEXT: 35. 1 0.0 0.0 4.0 vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 36. 1 0.0 0.0 3.0 vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 37. 1 0.0 0.0 3.0 vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 38. 1 0.0 0.0 3.0 vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 39. 1 0.0 0.0 3.0 vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 40. 1 0.0 0.0 3.0 vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 41. 1 0.0 0.0 3.0 vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 42. 1 0.0 0.0 2.0 vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 43. 1 0.0 0.0 2.0 vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 44. 1 0.0 0.0 2.0 vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 45. 1 0.0 0.0 2.0 vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 46. 1 0.0 0.0 2.0 vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 47. 1 0.0 0.0 2.0 vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 48. 1 0.0 0.0 1.0 vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 49. 1 0.0 0.0 1.0 vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 50. 1 0.0 0.0 1.0 vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 51. 1 1.0 1.0 0.0 andnps %xmm0, %xmm0 +# CHECK-NEXT: 52. 1 1.0 1.0 0.0 andnpd %xmm1, %xmm1 +# CHECK-NEXT: 53. 1 1.0 1.0 0.0 vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 54. 1 1.0 0.0 0.0 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 55. 1 1.0 0.0 0.0 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 56. 1 2.0 0.0 0.0 vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 57. 1 1.0 1.0 1.0 pandn %mm2, %mm2 +# CHECK-NEXT: 58. 1 2.0 0.0 0.0 pandn %xmm2, %xmm2 +# CHECK-NEXT: 59. 1 2.0 2.0 0.0 vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 60. 1 2.0 0.0 0.0 vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 61. 1 2.0 0.0 0.0 vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: 62. 1 2.0 0.0 0.0 vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: 63. 1 3.0 0.0 0.0 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 64. 1 3.0 1.0 0.0 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 65. 1 3.0 1.0 0.0 vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: 66. 1 3.0 1.0 0.0 vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 67. 1 0.0 0.0 4.0 xorps %xmm0, %xmm0 +# CHECK-NEXT: 68. 1 1.0 0.0 3.0 xorpd %xmm1, %xmm1 +# CHECK-NEXT: 69. 1 1.0 0.0 3.0 vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 70. 1 1.0 0.0 3.0 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 71. 1 1.0 0.0 3.0 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 72. 1 0.0 0.0 3.0 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 73. 1 2.0 2.0 0.0 pxor %mm2, %mm2 +# CHECK-NEXT: 74. 1 0.0 0.0 3.0 pxor %xmm2, %xmm2 +# CHECK-NEXT: 75. 1 1.0 0.0 2.0 vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 76. 1 1.0 0.0 2.0 vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 77. 1 0.0 0.0 3.0 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 78. 1 0.0 0.0 2.0 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 79. 1 0.0 0.0 2.0 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 80. 1 0.0 0.0 2.0 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 81. 1 0.0 0.0 2.0 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 82. 1 0.0 0.0 2.0 vpxor %ymm3, %ymm3, %ymm5 Index: llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s =================================================================== --- /dev/null +++ llvm/test/tools/llvm-mca/X86/SkylakeServer/zero-idioms.s @@ -0,0 +1,778 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -timeline -register-file-stats -iterations=1 < %s | FileCheck %s + +# On SKX, renamer-based zeroing does not work for: +# - 16 and 8-bit GPRs +# - MMX +# - ANDN variants + +subl %eax, %eax +subq %rax, %rax +xorl %eax, %eax +xorq %rax, %rax + +pcmpgtb %mm2, %mm2 +pcmpgtd %mm2, %mm2 +# pcmpgtq %mm2, %mm2 # invalid operand for instruction +pcmpgtw %mm2, %mm2 + +pcmpgtb %xmm2, %xmm2 +pcmpgtd %xmm2, %xmm2 +pcmpgtq %xmm2, %xmm2 +pcmpgtw %xmm2, %xmm2 + +vpcmpgtb %xmm3, %xmm3, %xmm3 +vpcmpgtd %xmm3, %xmm3, %xmm3 +vpcmpgtq %xmm3, %xmm3, %xmm3 +vpcmpgtw %xmm3, %xmm3, %xmm3 + +vpcmpgtb %xmm3, %xmm3, %xmm5 +vpcmpgtd %xmm3, %xmm3, %xmm5 +vpcmpgtq %xmm3, %xmm3, %xmm5 +vpcmpgtw %xmm3, %xmm3, %xmm5 + +vpcmpgtb %ymm3, %ymm3, %ymm3 +vpcmpgtd %ymm3, %ymm3, %ymm3 +vpcmpgtq %ymm3, %ymm3, %ymm3 +vpcmpgtw %ymm3, %ymm3, %ymm3 + +vpcmpgtb %ymm3, %ymm3, %ymm5 +vpcmpgtd %ymm3, %ymm3, %ymm5 +vpcmpgtq %ymm3, %ymm3, %ymm5 +vpcmpgtw %ymm3, %ymm3, %ymm5 + +psubb %mm2, %mm2 +psubd %mm2, %mm2 +psubq %mm2, %mm2 +psubw %mm2, %mm2 +psubb %xmm2, %xmm2 +psubd %xmm2, %xmm2 +psubq %xmm2, %xmm2 +psubw %xmm2, %xmm2 +vpsubb %xmm3, %xmm3, %xmm3 +vpsubd %xmm3, %xmm3, %xmm3 +vpsubq %xmm3, %xmm3, %xmm3 +vpsubw %xmm3, %xmm3, %xmm3 +vpsubb %ymm3, %ymm3, %ymm3 +vpsubd %ymm3, %ymm3, %ymm3 +vpsubq %ymm3, %ymm3, %ymm3 +vpsubw %ymm3, %ymm3, %ymm3 + +vpsubb %xmm3, %xmm3, %xmm5 +vpsubd %xmm3, %xmm3, %xmm5 +vpsubq %xmm3, %xmm3, %xmm5 +vpsubw %xmm3, %xmm3, %xmm5 +vpsubb %ymm3, %ymm3, %ymm5 +vpsubd %ymm3, %ymm3, %ymm5 +vpsubq %ymm3, %ymm3, %ymm5 +vpsubw %ymm3, %ymm3, %ymm5 + +vpsubb %xmm19, %xmm19, %xmm19 +vpsubd %xmm19, %xmm19, %xmm19 +vpsubq %xmm19, %xmm19, %xmm19 +vpsubw %xmm19, %xmm19, %xmm19 +vpsubb %ymm19, %ymm19, %ymm19 +vpsubd %ymm19, %ymm19, %ymm19 +vpsubq %ymm19, %ymm19, %ymm19 +vpsubw %ymm19, %ymm19, %ymm19 +vpsubb %zmm19, %zmm19, %zmm19 +vpsubd %zmm19, %zmm19, %zmm19 +vpsubq %zmm19, %zmm19, %zmm19 +vpsubw %zmm19, %zmm19, %zmm19 + +vpsubb %xmm19, %xmm19, %xmm21 +vpsubd %xmm19, %xmm19, %xmm21 +vpsubq %xmm19, %xmm19, %xmm21 +vpsubw %xmm19, %xmm19, %xmm21 +vpsubb %ymm19, %ymm19, %ymm21 +vpsubd %ymm19, %ymm19, %ymm21 +vpsubq %ymm19, %ymm19, %ymm21 +vpsubw %ymm19, %ymm19, %ymm21 +vpsubb %zmm19, %zmm19, %zmm21 +vpsubd %zmm19, %zmm19, %zmm21 +vpsubq %zmm19, %zmm19, %zmm21 +vpsubw %zmm19, %zmm19, %zmm21 + +andnps %xmm0, %xmm0 +andnpd %xmm1, %xmm1 +vandnps %xmm2, %xmm2, %xmm2 +vandnpd %xmm1, %xmm1, %xmm1 +vandnps %ymm2, %ymm2, %ymm2 +vandnpd %ymm1, %ymm1, %ymm1 +vandnps %zmm2, %zmm2, %zmm2 +vandnpd %zmm1, %zmm1, %zmm1 +pandn %mm2, %mm2 +pandn %xmm2, %xmm2 +vpandn %xmm3, %xmm3, %xmm3 +vpandn %ymm3, %ymm3, %ymm3 + +vpandnd %xmm19, %xmm19, %xmm19 +vpandnq %xmm19, %xmm19, %xmm19 +vpandnd %ymm19, %ymm19, %ymm19 +vpandnq %ymm19, %ymm19, %ymm19 +vpandnd %zmm19, %zmm19, %zmm19 +vpandnq %zmm19, %zmm19, %zmm19 + +vandnps %xmm2, %xmm2, %xmm5 +vandnpd %xmm1, %xmm1, %xmm5 +vpandn %xmm3, %xmm3, %xmm5 +vandnps %ymm2, %ymm2, %ymm5 +vandnpd %ymm1, %ymm1, %ymm5 +vpandn %ymm3, %ymm3, %ymm5 +vandnps %zmm2, %zmm2, %zmm5 +vandnpd %zmm1, %zmm1, %zmm5 + +vpandnd %xmm19, %xmm19, %xmm21 +vpandnq %xmm19, %xmm19, %xmm21 +vpandnd %ymm19, %ymm19, %ymm21 +vpandnq %ymm19, %ymm19, %ymm21 +vpandnd %zmm19, %zmm19, %zmm21 +vpandnq %zmm19, %zmm19, %zmm21 + +xorps %xmm0, %xmm0 +xorpd %xmm1, %xmm1 +vxorps %xmm2, %xmm2, %xmm2 +vxorpd %xmm1, %xmm1, %xmm1 +vxorps %ymm2, %ymm2, %ymm2 +vxorpd %ymm1, %ymm1, %ymm1 +vxorps %zmm2, %zmm2, %zmm2 +vxorpd %zmm1, %zmm1, %zmm1 +pxor %mm2, %mm2 +pxor %xmm2, %xmm2 +vpxor %xmm3, %xmm3, %xmm3 +vpxor %ymm3, %ymm3, %ymm3 + +vpxord %xmm19, %xmm19, %xmm19 +vpxorq %xmm19, %xmm19, %xmm19 +vpxord %ymm19, %ymm19, %ymm19 +vpxorq %ymm19, %ymm19, %ymm19 +vpxord %zmm19, %zmm19, %zmm19 +vpxorq %zmm19, %zmm19, %zmm19 + +vxorps %xmm4, %xmm4, %xmm5 +vxorpd %xmm1, %xmm1, %xmm3 +vxorps %ymm4, %ymm4, %ymm5 +vxorpd %ymm1, %ymm1, %ymm3 +vxorps %zmm4, %zmm4, %zmm5 +vxorpd %zmm1, %zmm1, %zmm3 +vpxor %xmm3, %xmm3, %xmm5 +vpxor %ymm3, %ymm3, %ymm5 + +vpxord %xmm19, %xmm19, %xmm21 +vpxorq %xmm19, %xmm19, %xmm21 +vpxord %ymm19, %ymm19, %ymm21 +vpxorq %ymm19, %ymm19, %ymm21 +vpxord %zmm19, %zmm19, %zmm21 +vpxorq %zmm19, %zmm19, %zmm21 + +# CHECK: Iterations: 1 +# CHECK-NEXT: Instructions: 139 +# CHECK-NEXT: Total Cycles: 27 +# CHECK-NEXT: Total uOps: 139 + +# CHECK: Dispatch Width: 6 +# CHECK-NEXT: uOps Per Cycle: 5.15 +# CHECK-NEXT: IPC: 5.15 +# CHECK-NEXT: Block RThroughput: 23.2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 0 0.17 subl %eax, %eax +# CHECK-NEXT: 1 0 0.17 subq %rax, %rax +# CHECK-NEXT: 1 0 0.17 xorl %eax, %eax +# CHECK-NEXT: 1 0 0.17 xorq %rax, %rax +# CHECK-NEXT: 1 1 1.00 pcmpgtb %mm2, %mm2 +# CHECK-NEXT: 1 1 1.00 pcmpgtd %mm2, %mm2 +# CHECK-NEXT: 1 1 1.00 pcmpgtw %mm2, %mm2 +# CHECK-NEXT: 1 0 0.17 pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 1 0.50 psubb %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubd %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubq %mm2, %mm2 +# CHECK-NEXT: 1 1 0.50 psubw %mm2, %mm2 +# CHECK-NEXT: 1 0 0.17 psubb %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 psubd %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 psubq %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 psubw %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpsubb %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 1 0 0.17 vpsubd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 1 0 0.17 vpsubq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 1 0 0.17 vpsubw %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 1 0 0.17 vpsubb %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 1 0 0.17 vpsubd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 1 0 0.17 vpsubq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 1 0 0.17 vpsubw %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 1 0 0.17 vpsubb %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 1 0 0.17 vpsubd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 1 0 0.17 vpsubq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 1 0 0.17 vpsubw %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 1 0 0.17 vpsubb %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 1 0 0.17 vpsubd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 1 0 0.17 vpsubq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 1 0 0.17 vpsubw %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 1 0 0.17 vpsubb %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 1 0 0.17 vpsubd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 1 0 0.17 vpsubq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 1 0 0.17 vpsubw %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 1 0 0.17 vpsubb %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 0 0.17 vpsubd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 0 0.17 vpsubq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 0 0.17 vpsubw %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 1 0.33 andnps %xmm0, %xmm0 +# CHECK-NEXT: 1 1 0.33 andnpd %xmm1, %xmm1 +# CHECK-NEXT: 1 1 0.33 vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 1 1 0.33 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 1 0.33 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 1 0.33 vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 1 1 0.50 vandnps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: 1 1 0.50 vandnpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: 1 1 0.50 pandn %mm2, %mm2 +# CHECK-NEXT: 1 1 0.33 pandn %xmm2, %xmm2 +# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 1 0.33 vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 1 0.33 vpandnd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 1 1 0.33 vpandnq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 1 1 0.33 vpandnd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 1 1 0.33 vpandnq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 1 1 0.50 vpandnd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 1 1 0.50 vpandnq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 1 1 0.33 vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: 1 1 0.33 vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: 1 1 0.33 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 1 0.33 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 1 1 0.33 vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: 1 1 0.33 vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 1 0.50 vandnps %zmm2, %zmm2, %zmm5 +# CHECK-NEXT: 1 1 0.50 vandnpd %zmm1, %zmm1, %zmm5 +# CHECK-NEXT: 1 1 0.33 vpandnd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 1 1 0.33 vpandnq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 1 1 0.33 vpandnd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 1 1 0.33 vpandnq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 1 1 0.50 vpandnd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 1 0.50 vpandnq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 0 0.17 xorps %xmm0, %xmm0 +# CHECK-NEXT: 1 0 0.17 xorpd %xmm1, %xmm1 +# CHECK-NEXT: 1 0 0.17 vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 1 0 0.17 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 1 0 0.17 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 1 0 0.17 vxorps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: 1 0 0.17 vxorpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: 1 1 0.50 pxor %mm2, %mm2 +# CHECK-NEXT: 1 0 0.17 pxor %xmm2, %xmm2 +# CHECK-NEXT: 1 0 0.17 vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 1 0 0.17 vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 1 0 0.17 vpxord %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 1 0 0.17 vpxorq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 1 0 0.17 vpxord %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 1 0 0.17 vpxorq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 1 0 0.17 vpxord %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 1 0 0.17 vpxorq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 1 0 0.17 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 1 0 0.17 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 1 0 0.17 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 1 0 0.17 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 1 0 0.17 vxorps %zmm4, %zmm4, %zmm5 +# CHECK-NEXT: 1 0 0.17 vxorpd %zmm1, %zmm1, %zmm3 +# CHECK-NEXT: 1 0 0.17 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 1 0 0.17 vpxor %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 1 0 0.17 vpxord %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 1 0 0.17 vpxorq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 1 0 0.17 vpxord %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 1 0 0.17 vpxorq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 1 0 0.17 vpxord %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 1 0 0.17 vpxorq %zmm19, %zmm19, %zmm21 + +# CHECK: Register File statistics: +# CHECK-NEXT: Total number of mappings created: 143 +# CHECK-NEXT: Max number of mappings used: 47 + +# CHECK: Resources: +# CHECK-NEXT: [0] - SKXDivider +# CHECK-NEXT: [1] - SKXFPDivider +# CHECK-NEXT: [2] - SKXPort0 +# CHECK-NEXT: [3] - SKXPort1 +# CHECK-NEXT: [4] - SKXPort2 +# CHECK-NEXT: [5] - SKXPort3 +# CHECK-NEXT: [6] - SKXPort4 +# CHECK-NEXT: [7] - SKXPort5 +# CHECK-NEXT: [8] - SKXPort6 +# CHECK-NEXT: [9] - SKXPort7 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] +# CHECK-NEXT: - - 16.00 10.00 - - - 14.00 - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# CHECK-NEXT: - - - - - - - - - - subl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - subq %rax, %rax +# CHECK-NEXT: - - - - - - - - - - xorl %eax, %eax +# CHECK-NEXT: - - - - - - - - - - xorq %rax, %rax +# CHECK-NEXT: - - 1.00 - - - - - - - pcmpgtb %mm2, %mm2 +# CHECK-NEXT: - - 1.00 - - - - - - - pcmpgtd %mm2, %mm2 +# CHECK-NEXT: - - 1.00 - - - - - - - pcmpgtw %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - 1.00 - - psubb %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - psubd %mm2, %mm2 +# CHECK-NEXT: - - 1.00 - - - - - - - psubq %mm2, %mm2 +# CHECK-NEXT: - - - - - - - 1.00 - - psubw %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - psubb %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubd %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubq %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - psubw %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: - - - - - - - - - - vpsubb %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - - - vpsubd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - - - vpsubq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - - - vpsubw %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - - - vpsubb %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: - - - - - - - - - - vpsubd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: - - - - - - - - - - vpsubq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: - - - - - - - - - - vpsubw %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: - - - - - - - - - - vpsubb %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - - - - - - - - vpsubd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - - - - - - - - vpsubq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - - - - - - - - vpsubw %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - - - - - - - - vpsubb %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: - - - - - - - - - - vpsubd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: - - - - - - - - - - vpsubq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: - - - - - - - - - - vpsubw %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: - - - 1.00 - - - - - - andnps %xmm0, %xmm0 +# CHECK-NEXT: - - 1.00 - - - - - - - andnpd %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: - - - 1.00 - - - - - - vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - 1.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - 1.00 - - - - - - vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - 1.00 - - - - - - - vandnps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: - - 1.00 - - - - - - - vandnpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: - - - - - - - 1.00 - - pandn %mm2, %mm2 +# CHECK-NEXT: - - - 1.00 - - - - - - pandn %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - 1.00 - - vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandnd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandnq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandnd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandnq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: - - 1.00 - - - - - - - vpandnd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - - - - vpandnq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - 1.00 - - - - - - - vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: - - 1.00 - - - - - - - vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: - - 1.00 - - - - - - - vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - 1.00 - - - - - - - vandnps %zmm2, %zmm2, %zmm5 +# CHECK-NEXT: - - - - - - - 1.00 - - vandnpd %zmm1, %zmm1, %zmm5 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandnd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: - - 1.00 - - - - - - - vpandnq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: - - - - - - - 1.00 - - vpandnd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - 1.00 - - - - - - vpandnq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - - - - - 1.00 - - vpandnd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: - - 1.00 - - - - - - - vpandnq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: - - - - - - - - - - xorps %xmm0, %xmm0 +# CHECK-NEXT: - - - - - - - - - - xorpd %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - - - - vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: - - - - - - - - - - vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - - - - - - - - - vxorps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: - - - - - - - 1.00 - - pxor %mm2, %mm2 +# CHECK-NEXT: - - - - - - - - - - pxor %xmm2, %xmm2 +# CHECK-NEXT: - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vpxord %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: - - - - - - - - - - vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: - - - - - - - - - - vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: - - - - - - - - - - vxorps %zmm4, %zmm4, %zmm5 +# CHECK-NEXT: - - - - - - - - - - vxorpd %zmm1, %zmm1, %zmm3 +# CHECK-NEXT: - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: - - - - - - - - - - vpxor %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: - - - - - - - - - - vpxord %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: - - - - - - - - - - vpxorq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: - - - - - - - - - - vpxord %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - - - - - - - - vpxorq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: - - - - - - - - - - vpxord %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: - - - - - - - - - - vpxorq %zmm19, %zmm19, %zmm21 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0123456 + +# CHECK: [0,0] DR . . . . .. subl %eax, %eax +# CHECK-NEXT: [0,1] DR . . . . .. subq %rax, %rax +# CHECK-NEXT: [0,2] DR . . . . .. xorl %eax, %eax +# CHECK-NEXT: [0,3] DR . . . . .. xorq %rax, %rax +# CHECK-NEXT: [0,4] DeER . . . . .. pcmpgtb %mm2, %mm2 +# CHECK-NEXT: [0,5] D=eER. . . . .. pcmpgtd %mm2, %mm2 +# CHECK-NEXT: [0,6] .D=eER . . . .. pcmpgtw %mm2, %mm2 +# CHECK-NEXT: [0,7] .D---R . . . .. pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: [0,8] .D---R . . . .. pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: [0,9] .D---R . . . .. pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: [0,10] .D---R . . . .. pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: [0,11] .D---R . . . .. vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,12] . D--R . . . .. vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,13] . D--R . . . .. vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,14] . D--R . . . .. vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,15] . D--R . . . .. vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,16] . D--R . . . .. vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,17] . D--R . . . .. vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,18] . D-R . . . .. vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,19] . D-R . . . .. vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,20] . D-R . . . .. vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,21] . D-R . . . .. vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,22] . D-R . . . .. vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,23] . D-R . . . .. vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,24] . DR . . . .. vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,25] . DR . . . .. vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,26] . DR . . . .. vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,27] . DeER . . . .. psubb %mm2, %mm2 +# CHECK-NEXT: [0,28] . D=eER . . . .. psubd %mm2, %mm2 +# CHECK-NEXT: [0,29] . D==eER. . . .. psubq %mm2, %mm2 +# CHECK-NEXT: [0,30] . D==eER . . .. psubw %mm2, %mm2 +# CHECK-NEXT: [0,31] . D----R . . .. psubb %xmm2, %xmm2 +# CHECK-NEXT: [0,32] . D----R . . .. psubd %xmm2, %xmm2 +# CHECK-NEXT: [0,33] . D----R . . .. psubq %xmm2, %xmm2 +# CHECK-NEXT: [0,34] . D----R . . .. psubw %xmm2, %xmm2 +# CHECK-NEXT: [0,35] . D----R . . .. vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,36] . .D---R . . .. vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,37] . .D---R . . .. vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,38] . .D---R . . .. vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,39] . .D---R . . .. vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,40] . .D---R . . .. vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,41] . .D---R . . .. vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,42] . . D--R . . .. vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,43] . . D--R . . .. vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,44] . . D--R . . .. vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,45] . . D--R . . .. vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,46] . . D--R . . .. vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,47] . . D--R . . .. vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,48] . . D-R . . .. vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,49] . . D-R . . .. vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,50] . . D-R . . .. vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,51] . . D-R . . .. vpsubb %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,52] . . D-R . . .. vpsubd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,53] . . D-R . . .. vpsubq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,54] . . DR . . .. vpsubw %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,55] . . DR . . .. vpsubb %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,56] . . DR . . .. vpsubd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,57] . . DR . . .. vpsubq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,58] . . DR . . .. vpsubw %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,59] . . DR . . .. vpsubb %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,60] . . DR . . .. vpsubd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,61] . . DR . . .. vpsubq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,62] . . DR . . .. vpsubw %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,63] . . DR . . .. vpsubb %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,64] . . DR . . .. vpsubd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,65] . . DR . . .. vpsubq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,66] . . .DR . . .. vpsubw %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,67] . . .DR . . .. vpsubb %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,68] . . .DR . . .. vpsubd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,69] . . .DR . . .. vpsubq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,70] . . .DR . . .. vpsubw %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,71] . . .DR . . .. vpsubb %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,72] . . . DR . . .. vpsubd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,73] . . . DR . . .. vpsubq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,74] . . . DR . . .. vpsubw %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,75] . . . DeER . .. andnps %xmm0, %xmm0 +# CHECK-NEXT: [0,76] . . . DeER . .. andnpd %xmm1, %xmm1 +# CHECK-NEXT: [0,77] . . . DeER . .. vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,78] . . . DeER . .. vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,79] . . . DeER . .. vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,80] . . . D=eER . .. vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,81] . . . D=eER . .. vandnps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: [0,82] . . . D==eER . .. vandnpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: [0,83] . . . DeE--R . .. pandn %mm2, %mm2 +# CHECK-NEXT: [0,84] . . . D=eER . .. pandn %xmm2, %xmm2 +# CHECK-NEXT: [0,85] . . . DeE-R . .. vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,86] . . . D=eER . .. vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,87] . . . D==eER. .. vpandnd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,88] . . . D===eER .. vpandnq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,89] . . . D====eER .. vpandnd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,90] . . . D====eER .. vpandnq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,91] . . . D=====eER .. vpandnd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,92] . . . D======eER.. vpandnq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,93] . . . D=eE-----R.. vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: [0,94] . . . D=eE-----R.. vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: [0,95] . . . D==eE----R.. vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,96] . . . .D=eE----R.. vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: [0,97] . . . .D==eE---R.. vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: [0,98] . . . .D==eE---R.. vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,99] . . . .D===eE--R.. vandnps %zmm2, %zmm2, %zmm5 +# CHECK-NEXT: [0,100] . . . .D===eE--R.. vandnpd %zmm1, %zmm1, %zmm5 +# CHECK-NEXT: [0,101] . . . .D======eER. vpandnd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,102] . . . . D=====eER. vpandnq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,103] . . . . D=====eER. vpandnd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,104] . . . . D======eER vpandnq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,105] . . . . D======eER vpandnd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,106] . . . . D======eER vpandnq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,107] . . . . D--------R xorps %xmm0, %xmm0 +# CHECK-NEXT: [0,108] . . . . D-------R xorpd %xmm1, %xmm1 +# CHECK-NEXT: [0,109] . . . . D-------R vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,110] . . . . D-------R vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,111] . . . . D-------R vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: [0,112] . . . . D-------R vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,113] . . . . D-------R vxorps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: [0,114] . . . . D------R vxorpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: [0,115] . . . . D=eE---R pxor %mm2, %mm2 +# CHECK-NEXT: [0,116] . . . . D------R pxor %xmm2, %xmm2 +# CHECK-NEXT: [0,117] . . . . D------R vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,118] . . . . D------R vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: [0,119] . . . . D===E--R vpxord %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,120] . . . . D==E--R vpxorq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: [0,121] . . . . D==E--R vpxord %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,122] . . . . D==E--R vpxorq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: [0,123] . . . . D==E--R vpxord %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,124] . . . . D==E--R vpxorq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: [0,125] . . . . D-----R vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: [0,126] . . . . .D----R vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: [0,127] . . . . .D----R vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: [0,128] . . . . .D----R vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: [0,129] . . . . .D----R vxorps %zmm4, %zmm4, %zmm5 +# CHECK-NEXT: [0,130] . . . . .D----R vxorpd %zmm1, %zmm1, %zmm3 +# CHECK-NEXT: [0,131] . . . . .D----R vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,132] . . . . . D---R vpxor %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: [0,133] . . . . . DE--R vpxord %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,134] . . . . . DE--R vpxorq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: [0,135] . . . . . DE--R vpxord %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,136] . . . . . DE--R vpxorq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: [0,137] . . . . . DE--R vpxord %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: [0,138] . . . . . D--R vpxorq %zmm19, %zmm19, %zmm21 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 0.0 0.0 0.0 subl %eax, %eax +# CHECK-NEXT: 1. 1 0.0 0.0 0.0 subq %rax, %rax +# CHECK-NEXT: 2. 1 0.0 0.0 0.0 xorl %eax, %eax +# CHECK-NEXT: 3. 1 0.0 0.0 0.0 xorq %rax, %rax +# CHECK-NEXT: 4. 1 1.0 1.0 0.0 pcmpgtb %mm2, %mm2 +# CHECK-NEXT: 5. 1 2.0 0.0 0.0 pcmpgtd %mm2, %mm2 +# CHECK-NEXT: 6. 1 2.0 0.0 0.0 pcmpgtw %mm2, %mm2 +# CHECK-NEXT: 7. 1 0.0 0.0 3.0 pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: 8. 1 0.0 0.0 3.0 pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: 9. 1 0.0 0.0 3.0 pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: 10. 1 0.0 0.0 3.0 pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: 11. 1 0.0 0.0 3.0 vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 12. 1 0.0 0.0 2.0 vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 13. 1 0.0 0.0 2.0 vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 14. 1 0.0 0.0 2.0 vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 15. 1 0.0 0.0 2.0 vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 16. 1 0.0 0.0 2.0 vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 17. 1 0.0 0.0 2.0 vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 18. 1 0.0 0.0 1.0 vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 19. 1 0.0 0.0 1.0 vpcmpgtb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 20. 1 0.0 0.0 1.0 vpcmpgtd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 21. 1 0.0 0.0 1.0 vpcmpgtq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 22. 1 0.0 0.0 1.0 vpcmpgtw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 23. 1 0.0 0.0 1.0 vpcmpgtb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 24. 1 0.0 0.0 0.0 vpcmpgtd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 25. 1 0.0 0.0 0.0 vpcmpgtq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 26. 1 0.0 0.0 0.0 vpcmpgtw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 27. 1 1.0 1.0 0.0 psubb %mm2, %mm2 +# CHECK-NEXT: 28. 1 2.0 0.0 0.0 psubd %mm2, %mm2 +# CHECK-NEXT: 29. 1 3.0 0.0 0.0 psubq %mm2, %mm2 +# CHECK-NEXT: 30. 1 3.0 0.0 0.0 psubw %mm2, %mm2 +# CHECK-NEXT: 31. 1 0.0 0.0 4.0 psubb %xmm2, %xmm2 +# CHECK-NEXT: 32. 1 0.0 0.0 4.0 psubd %xmm2, %xmm2 +# CHECK-NEXT: 33. 1 0.0 0.0 4.0 psubq %xmm2, %xmm2 +# CHECK-NEXT: 34. 1 0.0 0.0 4.0 psubw %xmm2, %xmm2 +# CHECK-NEXT: 35. 1 0.0 0.0 4.0 vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 36. 1 0.0 0.0 3.0 vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 37. 1 0.0 0.0 3.0 vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 38. 1 0.0 0.0 3.0 vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 39. 1 0.0 0.0 3.0 vpsubb %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 40. 1 0.0 0.0 3.0 vpsubd %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 41. 1 0.0 0.0 3.0 vpsubq %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 42. 1 0.0 0.0 2.0 vpsubw %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 43. 1 0.0 0.0 2.0 vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 44. 1 0.0 0.0 2.0 vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 45. 1 0.0 0.0 2.0 vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 46. 1 0.0 0.0 2.0 vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 47. 1 0.0 0.0 2.0 vpsubb %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 48. 1 0.0 0.0 1.0 vpsubd %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 49. 1 0.0 0.0 1.0 vpsubq %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 50. 1 0.0 0.0 1.0 vpsubw %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 51. 1 0.0 0.0 1.0 vpsubb %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 52. 1 0.0 0.0 1.0 vpsubd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 53. 1 0.0 0.0 1.0 vpsubq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 54. 1 0.0 0.0 0.0 vpsubw %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 55. 1 0.0 0.0 0.0 vpsubb %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 56. 1 0.0 0.0 0.0 vpsubd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 57. 1 0.0 0.0 0.0 vpsubq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 58. 1 0.0 0.0 0.0 vpsubw %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 59. 1 0.0 0.0 0.0 vpsubb %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 60. 1 0.0 0.0 0.0 vpsubd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 61. 1 0.0 0.0 0.0 vpsubq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 62. 1 0.0 0.0 0.0 vpsubw %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 63. 1 0.0 0.0 0.0 vpsubb %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 64. 1 0.0 0.0 0.0 vpsubd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 65. 1 0.0 0.0 0.0 vpsubq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 66. 1 0.0 0.0 0.0 vpsubw %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 67. 1 0.0 0.0 0.0 vpsubb %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 68. 1 0.0 0.0 0.0 vpsubd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 69. 1 0.0 0.0 0.0 vpsubq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 70. 1 0.0 0.0 0.0 vpsubw %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 71. 1 0.0 0.0 0.0 vpsubb %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 72. 1 0.0 0.0 0.0 vpsubd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 73. 1 0.0 0.0 0.0 vpsubq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 74. 1 0.0 0.0 0.0 vpsubw %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 75. 1 1.0 1.0 0.0 andnps %xmm0, %xmm0 +# CHECK-NEXT: 76. 1 1.0 1.0 0.0 andnpd %xmm1, %xmm1 +# CHECK-NEXT: 77. 1 1.0 1.0 0.0 vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 78. 1 1.0 0.0 0.0 vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 79. 1 1.0 0.0 0.0 vandnps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 80. 1 2.0 0.0 0.0 vandnpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 81. 1 2.0 0.0 0.0 vandnps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: 82. 1 3.0 0.0 0.0 vandnpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: 83. 1 1.0 1.0 2.0 pandn %mm2, %mm2 +# CHECK-NEXT: 84. 1 2.0 0.0 0.0 pandn %xmm2, %xmm2 +# CHECK-NEXT: 85. 1 1.0 1.0 1.0 vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 86. 1 2.0 0.0 0.0 vpandn %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 87. 1 3.0 3.0 0.0 vpandnd %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 88. 1 4.0 0.0 0.0 vpandnq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 89. 1 5.0 0.0 0.0 vpandnd %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 90. 1 5.0 0.0 0.0 vpandnq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 91. 1 6.0 0.0 0.0 vpandnd %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 92. 1 7.0 0.0 0.0 vpandnq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 93. 1 2.0 0.0 5.0 vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: 94. 1 2.0 0.0 5.0 vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: 95. 1 3.0 1.0 4.0 vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 96. 1 2.0 1.0 4.0 vandnps %ymm2, %ymm2, %ymm5 +# CHECK-NEXT: 97. 1 3.0 2.0 3.0 vandnpd %ymm1, %ymm1, %ymm5 +# CHECK-NEXT: 98. 1 3.0 2.0 3.0 vpandn %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 99. 1 4.0 3.0 2.0 vandnps %zmm2, %zmm2, %zmm5 +# CHECK-NEXT: 100. 1 4.0 3.0 2.0 vandnpd %zmm1, %zmm1, %zmm5 +# CHECK-NEXT: 101. 1 7.0 0.0 0.0 vpandnd %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 102. 1 6.0 0.0 0.0 vpandnq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 103. 1 6.0 0.0 0.0 vpandnd %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 104. 1 7.0 1.0 0.0 vpandnq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 105. 1 7.0 1.0 0.0 vpandnd %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 106. 1 7.0 1.0 0.0 vpandnq %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 107. 1 0.0 0.0 8.0 xorps %xmm0, %xmm0 +# CHECK-NEXT: 108. 1 0.0 0.0 7.0 xorpd %xmm1, %xmm1 +# CHECK-NEXT: 109. 1 0.0 0.0 7.0 vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: 110. 1 0.0 0.0 7.0 vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: 111. 1 0.0 0.0 7.0 vxorps %ymm2, %ymm2, %ymm2 +# CHECK-NEXT: 112. 1 0.0 0.0 7.0 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 113. 1 0.0 0.0 7.0 vxorps %zmm2, %zmm2, %zmm2 +# CHECK-NEXT: 114. 1 0.0 0.0 6.0 vxorpd %zmm1, %zmm1, %zmm1 +# CHECK-NEXT: 115. 1 2.0 2.0 3.0 pxor %mm2, %mm2 +# CHECK-NEXT: 116. 1 0.0 0.0 6.0 pxor %xmm2, %xmm2 +# CHECK-NEXT: 117. 1 0.0 0.0 6.0 vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: 118. 1 0.0 0.0 6.0 vpxor %ymm3, %ymm3, %ymm3 +# CHECK-NEXT: 119. 1 4.0 0.0 2.0 vpxord %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 120. 1 3.0 0.0 2.0 vpxorq %xmm19, %xmm19, %xmm19 +# CHECK-NEXT: 121. 1 3.0 0.0 2.0 vpxord %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 122. 1 3.0 0.0 2.0 vpxorq %ymm19, %ymm19, %ymm19 +# CHECK-NEXT: 123. 1 3.0 0.0 2.0 vpxord %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 124. 1 3.0 0.0 2.0 vpxorq %zmm19, %zmm19, %zmm19 +# CHECK-NEXT: 125. 1 0.0 0.0 5.0 vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: 126. 1 0.0 0.0 4.0 vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: 127. 1 0.0 0.0 4.0 vxorps %ymm4, %ymm4, %ymm5 +# CHECK-NEXT: 128. 1 0.0 0.0 4.0 vxorpd %ymm1, %ymm1, %ymm3 +# CHECK-NEXT: 129. 1 0.0 0.0 4.0 vxorps %zmm4, %zmm4, %zmm5 +# CHECK-NEXT: 130. 1 0.0 0.0 4.0 vxorpd %zmm1, %zmm1, %zmm3 +# CHECK-NEXT: 131. 1 0.0 0.0 4.0 vpxor %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: 132. 1 0.0 0.0 3.0 vpxor %ymm3, %ymm3, %ymm5 +# CHECK-NEXT: 133. 1 1.0 0.0 2.0 vpxord %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 134. 1 1.0 0.0 2.0 vpxorq %xmm19, %xmm19, %xmm21 +# CHECK-NEXT: 135. 1 1.0 0.0 2.0 vpxord %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 136. 1 1.0 0.0 2.0 vpxorq %ymm19, %ymm19, %ymm21 +# CHECK-NEXT: 137. 1 1.0 0.0 2.0 vpxord %zmm19, %zmm19, %zmm21 +# CHECK-NEXT: 138. 1 0.0 0.0 2.0 vpxorq %zmm19, %zmm19, %zmm21