diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -187,7 +187,7 @@ defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -216,7 +216,7 @@ // Bit counts. defm : Zn2WriteResPair; -defm : Zn2WriteResPair; +defm : Zn2WriteResPair; defm : Zn2WriteResPair; defm : Zn2WriteResPair; defm : Zn2WriteResPair; @@ -272,13 +272,13 @@ defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -314,8 +314,8 @@ defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -326,16 +326,16 @@ defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -381,7 +381,7 @@ defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -403,7 +403,7 @@ defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; defm : Zn2WriteResFpuPair; defm : Zn2WriteResFpuPair; @@ -425,8 +425,8 @@ defm : Zn2WriteResFpuPair; // Vector Shift Operations -defm : Zn2WriteResFpuPair; -defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; +defm : Zn2WriteResFpuPair; defm : X86WriteResPairUnsupported; // Vector insert/extract operations. @@ -470,6 +470,12 @@ def Zn2WriteMicrocoded : SchedWriteRes<[]> { let Latency = 100; } +defm : Zn2WriteResPair; +defm : Zn2WriteResPair; +defm : Zn2WriteResPair; +defm : Zn2WriteResPair; +defm : Zn2WriteResPair; +defm : Zn2WriteResPair; def : SchedAlias; def : SchedAlias; @@ -518,14 +524,14 @@ let NumMicroOps = 2; } -def : InstRW<[Zn2WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>; +def : InstRW<[Zn2WriteXCHG], (instregex "^XCHG(8|16|32|64)rr", "^XCHG(16|32|64)ar")>; // r,m. def Zn2WriteXCHGrm : SchedWriteRes<[Zn2AGU, Zn2ALU]> { let Latency = 5; let NumMicroOps = 2; } -def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>; +def : InstRW<[Zn2WriteXCHGrm, ReadAfterLd], (instregex "^XCHG(8|16|32|64)rm")>; def : InstRW<[WriteMicrocoded], (instrs XLAT)>; @@ -595,8 +601,11 @@ def Zn2WriteMul16 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> { let Latency = 3; } +def Zn2WriteMul16Imm : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> { + let Latency = 4; +} def : SchedAlias; -def : SchedAlias; +def : SchedAlias; def : SchedAlias; // m16. @@ -1002,6 +1011,7 @@ // mm <- mm. def Zn2WriteFPU12 : SchedWriteRes<[Zn2FPU12]> ; def Zn2WriteFPU12Y : SchedWriteRes<[Zn2FPU12]> { + let Latency = 4; let NumMicroOps = 2; } def Zn2WriteFPU12m : SchedWriteRes<[Zn2AGU, Zn2FPU12]> ; @@ -1110,15 +1120,6 @@ //-- Arithmetic instructions --// -// HADD, HSUB PS/PD -// PHADD|PHSUB (S) W/D. -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - // PCMPGTQ. def Zn2WritePCMPGTQr : SchedWriteRes<[Zn2FPU03]>; def : InstRW<[Zn2WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>; @@ -1138,8 +1139,12 @@ // PSLL,PSRL,PSRA W/D/Q. // x,x / v,v,x. -def Zn2WritePShift : SchedWriteRes<[Zn2FPU2]> ; -def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> ; +def Zn2WritePShift : SchedWriteRes<[Zn2FPU2]> { + let Latency = 3; +} +def Zn2WritePShiftY : SchedWriteRes<[Zn2FPU2]> { + let Latency = 3; +} // PSLL,PSRL DQ. def : InstRW<[Zn2WritePShift], (instregex "(V?)PS(R|L)LDQri")>; @@ -1281,7 +1286,7 @@ } // CVTDQ2PD. // x,x. -def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>; +def : InstRW<[Zn2WriteCVTDQ2PDr], (instregex "(V)?CVTDQ2P(D|S)rr")>; // Same as xmm // y,x. @@ -1291,9 +1296,9 @@ def Zn2WriteCVTPD2DQr: SchedWriteRes<[Zn2FPU12, Zn2FPU3]> { let Latency = 3; } -// CVT(T)PD2DQ. +// CVT(T)P(D|S)2DQ. // x,x. -def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)PD2DQrr")>; +def : InstRW<[Zn2WriteCVTPD2DQr], (instregex "(V?)CVT(T?)P(D|S)2DQrr")>; def Zn2WriteCVTPD2DQLd: SchedWriteRes<[Zn2AGU,Zn2FPU12,Zn2FPU3]> { let Latency = 10; @@ -1323,7 +1328,7 @@ def : InstRW<[Zn2WriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>; def Zn2WriteCVSTSI2SSr: SchedWriteRes<[Zn2FPU3]> { - let Latency = 4; + let Latency = 3; } // same as CVTPD2DQr @@ -1335,7 +1340,7 @@ def : InstRW<[Zn2WriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>; def Zn2WriteCVSTSI2SDr: SchedWriteRes<[Zn2FPU013, Zn2FPU3]> { - let Latency = 4; + let Latency = 3; } // CVTSI2SD. // x,r32/64. @@ -1377,7 +1382,7 @@ //-- SSE4A instructions --// // EXTRQ def Zn2WriteEXTRQ: SchedWriteRes<[Zn2FPU12, Zn2FPU2]> { - let Latency = 2; + let Latency = 3; } def : InstRW<[Zn2WriteEXTRQ], (instregex "EXTRQ")>; @@ -1449,12 +1454,6 @@ //-- Arithmetic instructions --// -// HADD, HSUB PS/PD -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; -def : SchedAlias; - // VDIVPS. // TODO - convert to Zn2WriteResFpuPair // y,y,y. @@ -1491,11 +1490,9 @@ // DPPS. // x,x,i / v,v,v,i. -def : SchedAlias; def : SchedAlias; // x,m,i / v,v,m,i. -def : SchedAlias; def : SchedAlias; // DPPD. diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx1.s @@ -1098,18 +1098,18 @@ # CHECK-NEXT: 1 8 0.50 * vbroadcastsd (%rax), %ymm2 # CHECK-NEXT: 1 8 0.50 * vbroadcastss (%rax), %xmm2 # CHECK-NEXT: 1 8 0.50 * vbroadcastss (%rax), %ymm2 -# CHECK-NEXT: 1 3 1.00 vcmpeqpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vcmpeqpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vcmpeqpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 1.00 * vcmpeqpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vcmpeqps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vcmpeqps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vcmpeqps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 1.00 * vcmpeqps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vcmpeqsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vcmpeqsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vcmpeqss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vcmpeqss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vcmpeqpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vcmpeqpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vcmpeqpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vcmpeqpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vcmpeqps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vcmpeqps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vcmpeqps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vcmpeqps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vcmpeqsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vcmpeqsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vcmpeqss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vcmpeqss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 1.00 vcomisd %xmm0, %xmm1 # CHECK-NEXT: 1 10 1.00 * vcomisd (%rax), %xmm1 # CHECK-NEXT: 1 3 1.00 vcomiss %xmm0, %xmm1 @@ -1118,7 +1118,7 @@ # CHECK-NEXT: 1 12 1.00 * vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2pd %xmm0, %ymm2 # CHECK-NEXT: 1 12 1.00 * vcvtdq2pd (%rax), %ymm2 -# CHECK-NEXT: 1 5 1.00 vcvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: 1 12 1.00 * vcvtdq2ps (%rax), %ymm2 @@ -1130,7 +1130,7 @@ # CHECK-NEXT: 2 10 0.50 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: 1 10 1.00 * vcvtpd2psy (%rax), %xmm2 -# CHECK-NEXT: 1 5 1.00 vcvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 vcvtps2dq %ymm0, %ymm2 # CHECK-NEXT: 1 12 1.00 * vcvtps2dq (%rax), %ymm2 @@ -1144,8 +1144,8 @@ # CHECK-NEXT: 1 11 1.00 * vcvtsd2si (%rax), %rcx # CHECK-NEXT: 1 3 1.00 vcvtsd2ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtsd2ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 1.00 vcvtsi2sd %ecx, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 vcvtsi2sd %rcx, %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 vcvtsi2sd %ecx, %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 vcvtsi2sd %rcx, %xmm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * vcvtsi2sdl (%rax), %xmm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * vcvtsi2sdq (%rax), %xmm0, %xmm2 # CHECK-NEXT: 1 5 1.00 vcvtsi2ss %ecx, %xmm0, %xmm2 @@ -1162,7 +1162,7 @@ # CHECK-NEXT: 2 10 1.00 * vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: 2 10 1.00 * vcvttpd2dqy (%rax), %xmm2 -# CHECK-NEXT: 1 5 1.00 vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * vcvttps2dq (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 vcvttps2dq %ymm0, %ymm2 # CHECK-NEXT: 1 12 1.00 * vcvttps2dq (%rax), %ymm2 @@ -1188,30 +1188,30 @@ # CHECK-NEXT: 1 22 1.00 * vdivss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 100 0.25 vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 100 0.25 * vdppd $22, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 15 0.25 vdpps $22, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 19 0.33 * vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 100 0.25 vdpps $22, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 100 0.25 * vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.33 vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 8 0.33 * vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 1 2 2.00 vextractps $1, %xmm0, %ecx # CHECK-NEXT: 2 5 2.00 * vextractps $1, %xmm0, (%rax) -# CHECK-NEXT: 1 100 0.25 vhaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vhaddpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 vhaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vhaddpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 vhaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vhaddps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 vhaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vhaddps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 vhsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vhsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 vhsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vhsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 vhsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vhsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 vhsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 7 0.25 vhaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 11 0.33 * vhaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 7 0.25 vhaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 11 0.33 * vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 7 0.25 vhaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 11 0.33 * vhaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 7 0.25 vhaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 11 0.33 * vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 7 0.25 vhsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 11 0.33 * vhsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 7 0.25 vhsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 11 0.33 * vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 7 0.25 vhsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 11 0.33 * vhsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 7 0.25 vhsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 11 0.33 * vhsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 0.33 vinsertf128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 2 9 0.33 * vinsertf128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2 @@ -1228,30 +1228,30 @@ # CHECK-NEXT: 2 8 0.50 * vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: 1 4 0.50 * * vmaskmovps %xmm0, %xmm1, (%rax) # CHECK-NEXT: 2 5 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 1 3 1.00 vmaxpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vmaxpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 1.00 * vmaxpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vmaxps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vmaxps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 1.00 * vmaxps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vmaxsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vmaxss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vmaxss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vminpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vminpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 1.00 * vminpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vminps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vminps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 10 1.00 * vminps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vminsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vminss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 10 1.00 * vminss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vmaxpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vmaxpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vmaxps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vmaxps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vmaxsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vmaxss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vmaxss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vminpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vminpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vminps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vminps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vminsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 1 1.00 vminss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 8 1.00 * vminss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vmovapd %xmm0, %xmm2 # CHECK-NEXT: 1 1 0.33 * vmovapd %xmm0, (%rax) # CHECK-NEXT: 1 8 0.33 * vmovapd (%rax), %xmm2 @@ -1341,12 +1341,12 @@ # CHECK-NEXT: 1 100 0.25 * vmpsadbw $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 0.50 vmulpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.50 vmulpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vmulpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 0.50 vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmulps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.50 vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 2 11 0.50 * vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 10 0.50 * vmulps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 0.50 vmulsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmulsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 3 0.50 vmulss %xmm0, %xmm1, %xmm2 @@ -1433,20 +1433,20 @@ # CHECK-NEXT: 1 100 0.25 * vperm2f128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 vpermilpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpermilpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpermilpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpermilpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vpermilpd $1, %ymm0, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.50 vpermilpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpermilpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpermilpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpermilpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.50 vpermilps $1, %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.50 * vpermilps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 vpermilps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpermilps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpermilps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpermilps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vpermilps $1, %ymm0, %ymm2 # CHECK-NEXT: 1 8 0.50 * vpermilps $1, (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.50 vpermilps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpermilps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpermilps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpermilps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 2.00 vpextrb $1, %xmm0, %ecx # CHECK-NEXT: 2 5 3.00 * vpextrb $1, %xmm0, (%rax) # CHECK-NEXT: 1 2 2.00 vpextrd $1, %xmm0, %ecx @@ -1455,20 +1455,20 @@ # CHECK-NEXT: 2 5 3.00 * vpextrq $1, %xmm0, (%rax) # CHECK-NEXT: 1 2 2.00 vpextrw $1, %xmm0, %ecx # CHECK-NEXT: 2 5 3.00 * vpextrw $1, %xmm0, (%rax) -# CHECK-NEXT: 1 100 0.25 vphaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vphaddd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 vphaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vphaddsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 vphaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vphaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.25 vphaddd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 7 0.33 * vphaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.25 vphaddsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 7 0.33 * vphaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.25 vphaddw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 7 0.33 * vphaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 4 1.00 vphminposuw %xmm0, %xmm2 # CHECK-NEXT: 1 11 1.00 * vphminposuw (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 vphsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vphsubd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 vphsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vphsubsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 vphsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 100 0.25 * vphsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.25 vphsubd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 7 0.33 * vphsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.25 vphsubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 7 0.33 * vphsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.25 vphsubw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 7 0.33 * vphsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpinsrb $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: 1 8 0.33 * vpinsrb $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpinsrd $1, %eax, %xmm1, %xmm2 @@ -1565,7 +1565,7 @@ # CHECK-NEXT: 1 1 0.25 vpslld $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpslld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpslld (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vpslldq $1, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vpslldq $1, %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsllq $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsllq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsllq (%rax), %xmm1, %xmm2 @@ -1581,7 +1581,7 @@ # CHECK-NEXT: 1 1 0.25 vpsrld $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrld %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrld (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 1.00 vpsrldq $1, %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vpsrldq $1, %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.25 vpsrlq $1, %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 vpsrlq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 8 1.00 * vpsrlq (%rax), %xmm1, %xmm2 @@ -1632,18 +1632,18 @@ # CHECK-NEXT: 3 12 0.50 * vrcpps (%rax), %ymm2 # CHECK-NEXT: 1 5 0.50 vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 12 0.50 * vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 1.00 vroundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 11 1.00 * vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 4 1.00 vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 11 1.00 * vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: 1 4 1.00 vroundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 11 1.00 * vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 4 1.00 vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 11 1.00 * vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: 1 4 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 11 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 1.00 vroundss $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 11 1.00 * vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 10 1.00 * vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: 1 3 1.00 vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: 1 10 1.00 * vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: 1 3 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 1.00 vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 1.00 * vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 5 0.50 vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: 2 12 0.50 * vrsqrtps (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vrsqrtps %ymm0, %ymm2 @@ -1739,7 +1739,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 112.00 112.00 112.00 0.25 0.25 0.25 0.25 - 191.92 141.92 168.75 455.42 - +# CHECK-NEXT: 117.00 117.00 117.00 0.25 0.25 0.25 0.25 - 191.92 143.42 170.25 455.42 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -1831,7 +1831,7 @@ # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtdq2pd %xmm0, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtdq2pd (%rax), %ymm2 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtdq2ps (%rax), %ymm2 @@ -1843,7 +1843,7 @@ # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 - - 0.50 - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtpd2ps %ymm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtpd2psy (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvtps2dq %ymm0, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvtps2dq (%rax), %ymm2 @@ -1875,7 +1875,7 @@ # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 1.00 - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvttpd2dq %ymm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 1.00 - vcvttpd2dqy (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvttps2dq (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - 1.00 - vcvttps2dq %ymm0, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - vcvttps2dq (%rax), %ymm2 @@ -1902,7 +1902,7 @@ # CHECK-NEXT: - - - - - - - - - - - - - vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vdpps $22, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - 0.33 0.33 - 0.33 - vextractf128 $1, %ymm0, %xmm2 @@ -1910,21 +1910,21 @@ # CHECK-NEXT: - - - - - - - - - 0.50 2.50 - - vextractps $1, %xmm0, %ecx # CHECK-NEXT: 1.67 1.67 1.67 - - - - - - 0.50 2.50 - - vextractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - vhaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vhaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhaddpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vhaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vhaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhaddpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vhaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vhaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhaddps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vhaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vhaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhaddps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vhsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vhsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhsubpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vhsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vhsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhsubpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vhsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vhsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vhsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vhsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vhsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - 0.33 0.33 - 0.33 - vinsertf128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.33 0.33 - 0.33 - vinsertf128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - vinsertps $1, %xmm0, %xmm1, %xmm2 @@ -2169,19 +2169,19 @@ # CHECK-NEXT: - - - - - - - - - 0.50 2.50 - - vpextrw $1, %xmm0, %ecx # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 1.00 4.00 - - vpextrw $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - - - - - - - - - vphaddd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphaddd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vphaddsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphaddsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vphaddw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphaddw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 1.00 - - - - vphminposuw %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - vphminposuw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vphsubd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphsubd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vphsubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphsubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vphsubw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphsubw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpinsrb $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - vpinsrb $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - vpinsrd $1, %eax, %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-avx2.s @@ -576,18 +576,18 @@ # CHECK-NEXT: 1 100 0.25 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: 1 100 0.25 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: 1 100 0.25 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1 100 0.25 vphaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vphaddd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 vphaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vphaddsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 vphaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vphaddw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 vphsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vphsubd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 vphsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vphsubsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 vphsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 100 0.25 * vphsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.25 vphaddd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 0.33 * vphaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.25 vphaddsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 0.33 * vphaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.25 vphaddw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 0.33 * vphaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.25 vphsubd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 0.33 * vphsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.25 vphsubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 0.33 * vphsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.25 vphsubw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 0.33 * vphsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vpmaddubsw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 11 1.00 * vpmaddubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vpmaddwd %ymm0, %ymm1, %ymm2 @@ -625,29 +625,29 @@ # CHECK-NEXT: 1 1 0.25 vpminuw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.33 * vpminuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 2 2 2.00 vpmovmskb %ymm0, %ecx -# CHECK-NEXT: 2 1 0.50 vpmovsxbd %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovsxbd %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovsxbd (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxbq %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovsxbq %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovsxbq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxbw %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovsxbw %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovsxbw (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxdq %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovsxdq %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovsxdq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxwd %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovsxwd %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovsxwd (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovsxwq %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovsxwq %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovsxwq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxbd %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovzxbd %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovzxbd (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxbq %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovzxbq %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovzxbq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxbw %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovzxbw %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovzxbw (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxdq %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovzxdq %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovzxdq (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxwd %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovzxwd %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovzxwd (%rax), %ymm2 -# CHECK-NEXT: 2 1 0.50 vpmovzxwq %xmm0, %ymm2 +# CHECK-NEXT: 2 4 0.50 vpmovzxwq %xmm0, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpmovzxwq (%rax), %ymm2 # CHECK-NEXT: 1 4 1.00 vpmuldq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 11 1.00 * vpmuldq (%rax), %ymm1, %ymm2 @@ -657,8 +657,8 @@ # CHECK-NEXT: 1 11 1.00 * vpmulhuw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vpmulhw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 11 1.00 * vpmulhw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vpmulld %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 2 10 1.00 * vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 1.00 vpmulld %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 11 1.00 * vpmulld (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vpmullw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 11 1.00 * vpmullw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vpmuludq %ymm0, %ymm1, %ymm2 @@ -682,51 +682,51 @@ # CHECK-NEXT: 1 1 0.25 vpsignw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.33 * vpsignw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpslld $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpslld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpslld (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 1.00 vpslldq $1, %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpslld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vpslld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 1.00 vpslldq $1, %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsllq $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsllq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsllq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsllvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsllvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsllvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsllvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsllvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsllvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsllvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsllvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsllq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vpsllq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpsllvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpsllvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpsllvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpsllvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpsllvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpsllvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpsllvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpsllvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsllw $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsllw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsllw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsllw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vpsllw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsrad $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsrad %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsrad (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsravd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsravd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsravd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsravd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsrad %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vpsrad (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpsravd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpsravd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpsravd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpsravd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsraw $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsraw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsraw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsraw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vpsraw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsrld $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsrld %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsrld (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 1.00 vpsrldq $1, %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsrld %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vpsrld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 1.00 vpsrldq $1, %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsrlq $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsrlq %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsrlq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsrlvd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsrlvd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrlvd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsrlvd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 1 0.50 vpsrlvq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 0.50 * vpsrlvq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 1 0.50 vpsrlvq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 8 0.50 * vpsrlvq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsrlq %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vpsrlq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpsrlvd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpsrlvd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpsrlvd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpsrlvd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 3 0.50 vpsrlvq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 10 0.50 * vpsrlvq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 3 0.50 vpsrlvq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 10 0.50 * vpsrlvq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsrlw $1, %ymm0, %ymm2 -# CHECK-NEXT: 1 2 1.00 vpsrlw %xmm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 9 1.00 * vpsrlw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 1 1.00 vpsrlw %xmm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 8 1.00 * vpsrlw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsubb %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 1 8 0.33 * vpsubb (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.25 vpsubd %ymm0, %ymm1, %ymm2 @@ -779,7 +779,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 42.67 42.67 42.67 - - - - - 70.17 75.17 85.00 42.67 - +# CHECK-NEXT: 44.67 44.67 44.67 - - - - - 70.17 75.17 85.00 42.67 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -900,17 +900,17 @@ # CHECK-NEXT: - - - - - - - - - - - - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vphaddd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphaddd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vphaddsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphaddsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vphaddw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphaddw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphaddw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vphsubd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphsubd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vphsubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphsubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - - - - - - vphsubw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - - - - - - - - - - - - vphsubw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - vphsubw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - 1.00 - - - - vpmaddubsw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - vpmaddubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - 1.00 - - - - vpmaddwd %ymm0, %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse1.s @@ -202,10 +202,10 @@ # CHECK-NEXT: 1 8 0.33 * andnps (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 andps %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.33 * andps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 cmpeqps %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * cmpeqps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 cmpeqss %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * cmpeqss (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 cmpeqps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cmpeqps (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 cmpeqss %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cmpeqss (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comiss %xmm0, %xmm1 # CHECK-NEXT: 1 10 1.00 * comiss (%rax), %xmm1 # CHECK-NEXT: 1 5 1.00 cvtpi2ps %mm0, %xmm2 @@ -232,14 +232,14 @@ # CHECK-NEXT: 1 22 1.00 * divss (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 * U ldmxcsr (%rax) # CHECK-NEXT: 1 100 0.25 * * U maskmovq %mm0, %mm1 -# CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * maxps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 maxss %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * maxss (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 minps %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * minps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 minss %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * minss (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 maxps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * maxps (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 maxss %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * maxss (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 minps %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * minps (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 minss %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * minss (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 movaps %xmm0, %xmm2 # CHECK-NEXT: 1 1 0.33 * movaps %xmm0, (%rax) # CHECK-NEXT: 1 8 0.33 * movaps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse2.s @@ -416,15 +416,15 @@ # CHECK-NEXT: 1 1 0.25 andpd %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.33 * andpd (%rax), %xmm2 # CHECK-NEXT: 1 8 0.33 * * U clflush (%rax) -# CHECK-NEXT: 1 3 1.00 cmpeqpd %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * cmpeqpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 cmpeqsd %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * cmpeqsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 cmpeqpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cmpeqpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 cmpeqsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * cmpeqsd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 comisd %xmm0, %xmm1 # CHECK-NEXT: 1 10 1.00 * comisd (%rax), %xmm1 # CHECK-NEXT: 1 3 1.00 cvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * cvtdq2pd (%rax), %xmm2 -# CHECK-NEXT: 1 5 1.00 cvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 1.00 * cvtpd2dq (%rax), %xmm2 @@ -434,7 +434,7 @@ # CHECK-NEXT: 2 10 0.50 * cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * cvtpi2pd (%rax), %xmm2 -# CHECK-NEXT: 1 5 1.00 cvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 10 1.00 * cvtps2pd (%rax), %xmm2 @@ -444,8 +444,8 @@ # CHECK-NEXT: 1 11 1.00 * cvtsd2si (%rax), %rcx # CHECK-NEXT: 1 3 1.00 cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cvtsd2ss (%rax), %xmm2 -# CHECK-NEXT: 1 4 1.00 cvtsi2sd %ecx, %xmm2 -# CHECK-NEXT: 1 4 1.00 cvtsi2sd %rcx, %xmm2 +# CHECK-NEXT: 1 3 1.00 cvtsi2sd %ecx, %xmm2 +# CHECK-NEXT: 1 3 1.00 cvtsi2sd %rcx, %xmm2 # CHECK-NEXT: 1 12 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 1 12 1.00 * cvtsi2sdl (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 cvtss2sd %xmm0, %xmm2 @@ -454,7 +454,7 @@ # CHECK-NEXT: 2 10 1.00 * cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 cvttpd2pi %xmm0, %mm2 # CHECK-NEXT: 1 12 1.00 * cvttpd2pi (%rax), %mm2 -# CHECK-NEXT: 1 5 1.00 cvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 cvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 1 12 1.00 * cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 cvttsd2si %xmm0, %ecx # CHECK-NEXT: 1 4 1.00 cvttsd2si %xmm0, %rcx @@ -466,15 +466,15 @@ # CHECK-NEXT: 1 22 1.00 * divsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 * * U lfence # CHECK-NEXT: 1 100 0.25 * * U maskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: 1 3 1.00 maxpd %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * maxpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * maxsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 maxpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * maxpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 maxsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * maxsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 * * U mfence -# CHECK-NEXT: 1 3 1.00 minpd %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * minpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 minsd %xmm0, %xmm2 -# CHECK-NEXT: 1 10 1.00 * minsd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 minpd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * minpd (%rax), %xmm2 +# CHECK-NEXT: 1 1 1.00 minsd %xmm0, %xmm2 +# CHECK-NEXT: 1 8 1.00 * minsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.25 movapd %xmm0, %xmm2 # CHECK-NEXT: 1 1 0.33 * movapd %xmm0, (%rax) # CHECK-NEXT: 1 8 0.33 * movapd (%rax), %xmm2 @@ -597,7 +597,7 @@ # CHECK-NEXT: 1 1 0.25 pslld $1, %xmm2 # CHECK-NEXT: 1 1 1.00 pslld %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * pslld (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 pslldq $1, %xmm2 +# CHECK-NEXT: 1 3 1.00 pslldq $1, %xmm2 # CHECK-NEXT: 1 1 0.25 psllq $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psllq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psllq (%rax), %xmm2 @@ -613,7 +613,7 @@ # CHECK-NEXT: 1 1 0.25 psrld $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrld %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrld (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 psrldq $1, %xmm2 +# CHECK-NEXT: 1 3 1.00 psrldq $1, %xmm2 # CHECK-NEXT: 1 1 0.25 psrlq $1, %xmm2 # CHECK-NEXT: 1 1 1.00 psrlq %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * psrlq (%rax), %xmm2 @@ -692,7 +692,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 71.92 40.42 71.75 152.92 - +# CHECK-NEXT: 44.33 44.33 44.33 - - - - - 71.92 41.92 73.25 152.92 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -713,7 +713,7 @@ # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - comisd (%rax), %xmm1 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtdq2pd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 1.00 - cvtpd2dq (%rax), %xmm2 @@ -723,7 +723,7 @@ # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.50 - - 0.50 - cvtpd2ps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtpi2pd %mm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtpi2pd (%rax), %xmm2 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - 1.00 - cvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvtps2pd (%rax), %xmm2 @@ -743,7 +743,7 @@ # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - 0.50 0.50 1.00 - cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - 1.00 - cvttpd2pi %xmm0, %mm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvttpd2pi (%rax), %mm2 -# CHECK-NEXT: - - - - - - - - - - - 1.00 - cvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - - 0.50 0.50 1.00 - cvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - 1.00 - cvttps2dq (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - cvttsd2si %xmm0, %ecx # CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - cvttsd2si %xmm0, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse3.s --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse3.s @@ -47,14 +47,14 @@ # CHECK-NEXT: 1 10 1.00 * addsubpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2 # CHECK-NEXT: 1 10 1.00 * addsubps (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 haddpd %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * haddpd (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 haddps %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * haddps (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 hsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * hsubpd (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 hsubps %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * hsubps (%rax), %xmm2 +# CHECK-NEXT: 1 7 0.25 haddpd %xmm0, %xmm2 +# CHECK-NEXT: 2 11 0.33 * haddpd (%rax), %xmm2 +# CHECK-NEXT: 1 7 0.25 haddps %xmm0, %xmm2 +# CHECK-NEXT: 2 11 0.33 * haddps (%rax), %xmm2 +# CHECK-NEXT: 1 7 0.25 hsubpd %xmm0, %xmm2 +# CHECK-NEXT: 2 11 0.33 * hsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 7 0.25 hsubps %xmm0, %xmm2 +# CHECK-NEXT: 2 11 0.33 * hsubps (%rax), %xmm2 # CHECK-NEXT: 1 8 0.33 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 U monitor # CHECK-NEXT: 1 1 0.50 movddup %xmm0, %xmm2 @@ -82,7 +82,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 1.67 1.67 1.67 - - - - - 4.00 2.00 2.00 - - +# CHECK-NEXT: 3.00 3.00 3.00 - - - - - 4.00 2.00 2.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -91,13 +91,13 @@ # CHECK-NEXT: - - - - - - - - 1.00 - - - - addsubps %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - addsubps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - haddpd %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - haddpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - haddpd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - haddps %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - haddps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - haddps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - hsubpd %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - hsubpd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - hsubpd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - hsubps %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - hsubps (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - hsubps (%rax), %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - lddqu (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - monitor # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - movddup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse41.s @@ -165,8 +165,8 @@ # CHECK-NEXT: 1 8 0.50 * blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: 1 100 0.25 dppd $22, %xmm0, %xmm2 # CHECK-NEXT: 1 100 0.25 * dppd $22, (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 1 15 0.25 dpps $22, %xmm0, %xmm2 +# CHECK-NEXT: 2 19 0.33 * dpps $22, (%rax), %xmm2 # CHECK-NEXT: 1 2 2.00 extractps $1, %xmm0, %ecx # CHECK-NEXT: 2 5 2.00 * extractps $1, %xmm0, (%rax) # CHECK-NEXT: 1 1 0.50 insertps $1, %xmm0, %xmm2 @@ -243,14 +243,14 @@ # CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 ptest %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * ptest (%rax), %xmm1 -# CHECK-NEXT: 1 4 1.00 roundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 11 1.00 * roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 4 1.00 roundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 11 1.00 * roundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 4 1.00 roundsd $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 11 1.00 * roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 4 1.00 roundss $1, %xmm0, %xmm2 -# CHECK-NEXT: 1 11 1.00 * roundss $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * roundps $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: 1 3 1.00 roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: 1 10 1.00 * roundss $1, (%rax), %xmm2 # CHECK: Resources: # CHECK-NEXT: [0] - Zn2AGU0 @@ -269,7 +269,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 16.67 16.67 16.67 - - - - - 25.17 26.67 44.00 21.17 - +# CHECK-NEXT: 17.00 17.00 17.00 - - - - - 25.17 26.67 44.00 21.17 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -284,7 +284,7 @@ # CHECK-NEXT: - - - - - - - - - - - - - dppd $22, %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - dppd $22, (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - dpps $22, (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 0.50 2.50 - - extractps $1, %xmm0, %ecx # CHECK-NEXT: 1.67 1.67 1.67 - - - - - - 0.50 2.50 - - extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - insertps $1, %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse4a.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse4a.s --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse4a.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-sse4a.s @@ -19,8 +19,8 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 2 1.00 extrq %xmm0, %xmm2 -# CHECK-NEXT: 1 2 1.00 extrq $22, $2, %xmm2 +# CHECK-NEXT: 1 3 1.00 extrq %xmm0, %xmm2 +# CHECK-NEXT: 1 3 1.00 extrq $22, $2, %xmm2 # CHECK-NEXT: 1 4 1.00 insertq %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 insertq $22, $22, %xmm0, %xmm2 # CHECK-NEXT: 1 8 1.00 * movntsd %xmm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-ssse3.s --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-ssse3.s @@ -122,30 +122,30 @@ # CHECK-NEXT: 1 8 0.33 * palignr $1, (%rax), %mm2 # CHECK-NEXT: 1 1 0.25 palignr $1, %xmm0, %xmm2 # CHECK-NEXT: 1 8 0.33 * palignr $1, (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 phaddd %mm0, %mm2 -# CHECK-NEXT: 1 100 0.25 * phaddd (%rax), %mm2 -# CHECK-NEXT: 1 100 0.25 phaddd %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * phaddd (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 phaddsw %mm0, %mm2 -# CHECK-NEXT: 1 100 0.25 * phaddsw (%rax), %mm2 -# CHECK-NEXT: 1 100 0.25 phaddsw %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * phaddsw (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 phaddw %mm0, %mm2 -# CHECK-NEXT: 1 100 0.25 * phaddw (%rax), %mm2 -# CHECK-NEXT: 1 100 0.25 phaddw %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * phaddw (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 phsubd %mm0, %mm2 -# CHECK-NEXT: 1 100 0.25 * phsubd (%rax), %mm2 -# CHECK-NEXT: 1 100 0.25 phsubd %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * phsubd (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 phsubsw %mm0, %mm2 -# CHECK-NEXT: 1 100 0.25 * phsubsw (%rax), %mm2 -# CHECK-NEXT: 1 100 0.25 phsubsw %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * phsubsw (%rax), %xmm2 -# CHECK-NEXT: 1 100 0.25 phsubw %mm0, %mm2 -# CHECK-NEXT: 1 100 0.25 * phsubw (%rax), %mm2 -# CHECK-NEXT: 1 100 0.25 phsubw %xmm0, %xmm2 -# CHECK-NEXT: 1 100 0.25 * phsubw (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.25 phaddd %mm0, %mm2 +# CHECK-NEXT: 2 7 0.33 * phaddd (%rax), %mm2 +# CHECK-NEXT: 1 3 0.25 phaddd %xmm0, %xmm2 +# CHECK-NEXT: 2 7 0.33 * phaddd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.25 phaddsw %mm0, %mm2 +# CHECK-NEXT: 2 7 0.33 * phaddsw (%rax), %mm2 +# CHECK-NEXT: 1 3 0.25 phaddsw %xmm0, %xmm2 +# CHECK-NEXT: 2 7 0.33 * phaddsw (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.25 phaddw %mm0, %mm2 +# CHECK-NEXT: 2 7 0.33 * phaddw (%rax), %mm2 +# CHECK-NEXT: 1 3 0.25 phaddw %xmm0, %xmm2 +# CHECK-NEXT: 2 7 0.33 * phaddw (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.25 phsubd %mm0, %mm2 +# CHECK-NEXT: 2 7 0.33 * phsubd (%rax), %mm2 +# CHECK-NEXT: 1 3 0.25 phsubd %xmm0, %xmm2 +# CHECK-NEXT: 2 7 0.33 * phsubd (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.25 phsubsw %mm0, %mm2 +# CHECK-NEXT: 2 7 0.33 * phsubsw (%rax), %mm2 +# CHECK-NEXT: 1 3 0.25 phsubsw %xmm0, %xmm2 +# CHECK-NEXT: 2 7 0.33 * phsubsw (%rax), %xmm2 +# CHECK-NEXT: 1 3 0.25 phsubw %mm0, %mm2 +# CHECK-NEXT: 2 7 0.33 * phsubw (%rax), %mm2 +# CHECK-NEXT: 1 3 0.25 phsubw %xmm0, %xmm2 +# CHECK-NEXT: 2 7 0.33 * phsubw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2 # CHECK-NEXT: 1 11 1.00 * pmaddubsw (%rax), %mm2 # CHECK-NEXT: 1 4 1.00 pmaddubsw %xmm0, %xmm2 @@ -188,7 +188,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] -# CHECK-NEXT: 6.67 6.67 6.67 - - - - - 16.00 8.00 8.00 8.00 - +# CHECK-NEXT: 10.67 10.67 10.67 - - - - - 16.00 8.00 8.00 8.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] Instructions: @@ -209,29 +209,29 @@ # CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - palignr $1, %xmm0, %xmm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 0.25 0.25 0.25 0.25 - palignr $1, (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - phaddd %mm0, %mm2 -# CHECK-NEXT: - - - - - - - - - - - - - phaddd (%rax), %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddd (%rax), %mm2 # CHECK-NEXT: - - - - - - - - - - - - - phaddd %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - phaddd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - phaddsw %mm0, %mm2 -# CHECK-NEXT: - - - - - - - - - - - - - phaddsw (%rax), %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddsw (%rax), %mm2 # CHECK-NEXT: - - - - - - - - - - - - - phaddsw %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - phaddsw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddsw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - phaddw %mm0, %mm2 -# CHECK-NEXT: - - - - - - - - - - - - - phaddw (%rax), %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddw (%rax), %mm2 # CHECK-NEXT: - - - - - - - - - - - - - phaddw %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - phaddw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phaddw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - phsubd %mm0, %mm2 -# CHECK-NEXT: - - - - - - - - - - - - - phsubd (%rax), %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubd (%rax), %mm2 # CHECK-NEXT: - - - - - - - - - - - - - phsubd %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - phsubd (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - phsubsw %mm0, %mm2 -# CHECK-NEXT: - - - - - - - - - - - - - phsubsw (%rax), %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubsw (%rax), %mm2 # CHECK-NEXT: - - - - - - - - - - - - - phsubsw %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - phsubsw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubsw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - - - - - phsubw %mm0, %mm2 -# CHECK-NEXT: - - - - - - - - - - - - - phsubw (%rax), %mm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubw (%rax), %mm2 # CHECK-NEXT: - - - - - - - - - - - - - phsubw %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - - - - - - phsubw (%rax), %xmm2 +# CHECK-NEXT: 0.33 0.33 0.33 - - - - - - - - - - phsubw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - 1.00 - - - - pmaddubsw %mm0, %mm2 # CHECK-NEXT: 0.33 0.33 0.33 - - - - - 1.00 - - - - pmaddubsw (%rax), %mm2 # CHECK-NEXT: - - - - - - - - 1.00 - - - - pmaddubsw %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s --- a/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver2/resources-x86_64.s @@ -1002,17 +1002,17 @@ # CHECK-NEXT: 2 5 0.33 * * andq %rsi, (%rax) # CHECK-NEXT: 2 5 0.33 * andq (%rax), %rdi # CHECK-NEXT: 1 3 0.25 bsfw %si, %di -# CHECK-NEXT: 1 3 0.25 bsrw %si, %di +# CHECK-NEXT: 1 4 0.25 bsrw %si, %di # CHECK-NEXT: 2 7 0.33 * bsfw (%rax), %di -# CHECK-NEXT: 2 7 0.33 * bsrw (%rax), %di +# CHECK-NEXT: 2 8 0.33 * bsrw (%rax), %di # CHECK-NEXT: 1 3 0.25 bsfl %esi, %edi -# CHECK-NEXT: 1 3 0.25 bsrl %esi, %edi +# CHECK-NEXT: 1 4 0.25 bsrl %esi, %edi # CHECK-NEXT: 2 7 0.33 * bsfl (%rax), %edi -# CHECK-NEXT: 2 7 0.33 * bsrl (%rax), %edi +# CHECK-NEXT: 2 8 0.33 * bsrl (%rax), %edi # CHECK-NEXT: 1 3 0.25 bsfq %rsi, %rdi -# CHECK-NEXT: 1 3 0.25 bsrq %rsi, %rdi +# CHECK-NEXT: 1 4 0.25 bsrq %rsi, %rdi # CHECK-NEXT: 2 7 0.33 * bsfq (%rax), %rdi -# CHECK-NEXT: 2 7 0.33 * bsrq (%rax), %rdi +# CHECK-NEXT: 2 8 0.33 * bsrq (%rax), %rdi # CHECK-NEXT: 1 1 1.00 bswapl %eax # CHECK-NEXT: 1 1 1.00 bswapq %rax # CHECK-NEXT: 1 1 0.25 btw %si, %di @@ -1106,13 +1106,13 @@ # CHECK-NEXT: 1 100 0.25 U cmpsw %es:(%rdi), (%rsi) # CHECK-NEXT: 1 100 0.25 U cmpsl %es:(%rdi), (%rsi) # CHECK-NEXT: 1 100 0.25 U cmpsq %es:(%rdi), (%rsi) -# CHECK-NEXT: 1 1 0.25 cmpxchgb %cl, %bl +# CHECK-NEXT: 1 3 0.25 cmpxchgb %cl, %bl # CHECK-NEXT: 5 8 0.33 * * cmpxchgb %cl, (%rbx) -# CHECK-NEXT: 1 1 0.25 cmpxchgw %cx, %bx +# CHECK-NEXT: 1 3 0.25 cmpxchgw %cx, %bx # CHECK-NEXT: 5 8 0.33 * * cmpxchgw %cx, (%rbx) -# CHECK-NEXT: 1 1 0.25 cmpxchgl %ecx, %ebx +# CHECK-NEXT: 1 3 0.25 cmpxchgl %ecx, %ebx # CHECK-NEXT: 5 8 0.33 * * cmpxchgl %ecx, (%rbx) -# CHECK-NEXT: 1 1 0.25 cmpxchgq %rcx, %rbx +# CHECK-NEXT: 1 3 0.25 cmpxchgq %rcx, %rbx # CHECK-NEXT: 5 8 0.33 * * cmpxchgq %rcx, (%rbx) # CHECK-NEXT: 1 100 0.25 U cpuid # CHECK-NEXT: 1 1 0.25 decb %dil @@ -1146,9 +1146,9 @@ # CHECK-NEXT: 1 7 1.00 * imulw (%rax) # CHECK-NEXT: 1 3 1.00 imulw %si, %di # CHECK-NEXT: 1 7 1.00 * imulw (%rax), %di -# CHECK-NEXT: 1 3 1.00 imulw $511, %si, %di +# CHECK-NEXT: 1 4 1.00 imulw $511, %si, %di # CHECK-NEXT: 1 7 1.00 * imulw $511, (%rax), %di -# CHECK-NEXT: 1 3 1.00 imulw $7, %si, %di +# CHECK-NEXT: 1 4 1.00 imulw $7, %si, %di # CHECK-NEXT: 1 7 1.00 * imulw $7, (%rax), %di # CHECK-NEXT: 1 3 1.00 imull %edi # CHECK-NEXT: 1 7 1.00 * imull (%rax)