diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -1480,54 +1480,42 @@ def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI(16|32)m")>; def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { - let Latency = 22; + let Latency = 17; let NumMicroOps = 7; let ResourceCycles = [1,3,2,1]; } -def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERQPDrm)>; +def: InstRW<[BWWriteResGroup183_1], (instrs VGATHERDPDrm, VPGATHERDQrm, + VGATHERQPDrm, VPGATHERQQrm)>; def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { - let Latency = 23; + let Latency = 18; let NumMicroOps = 9; let ResourceCycles = [1,3,4,1]; } -def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERQPDYrm)>; +def: InstRW<[BWWriteResGroup183_2], (instrs VGATHERDPDYrm, VPGATHERDQYrm, + VGATHERQPDYrm, VPGATHERQQYrm)>; def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { - let Latency = 24; + let Latency = 19; let NumMicroOps = 9; let ResourceCycles = [1,5,2,1]; } -def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSYrm)>; +def: InstRW<[BWWriteResGroup183_3], (instrs VGATHERQPSrm, VPGATHERQDrm)>; def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { - let Latency = 25; - let NumMicroOps = 7; - let ResourceCycles = [1,3,2,1]; + let Latency = 19; + let NumMicroOps = 10; + let ResourceCycles = [1,4,4,1]; } -def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPDrm, - VGATHERDPSrm)>; +def: InstRW<[BWWriteResGroup183_4], (instrs VGATHERDPSrm, VPGATHERDDrm, + VGATHERQPSYrm, VPGATHERQDYrm)>; def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { - let Latency = 26; - let NumMicroOps = 9; - let ResourceCycles = [1,5,2,1]; -} -def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPDYrm)>; - -def BWWriteResGroup183_6 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { - let Latency = 26; + let Latency = 21; let NumMicroOps = 14; let ResourceCycles = [1,4,8,1]; } -def: InstRW<[BWWriteResGroup183_6], (instrs VGATHERDPSYrm)>; - -def BWWriteResGroup183_7 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { - let Latency = 27; - let NumMicroOps = 9; - let ResourceCycles = [1,5,2,1]; -} -def: InstRW<[BWWriteResGroup183_7], (instrs VGATHERQPSrm)>; +def: InstRW<[BWWriteResGroup183_5], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; def BWWriteResGroup185 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> { let Latency = 29; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -1785,75 +1785,55 @@ } def: InstRW<[HWWriteResGroup183], (instrs FSTENVm)>; -def HWWriteResGroup184 : SchedWriteRes<[HWPort0, HWPort5, HWPort15, HWPort015, HWPort06, HWPort23]> { - let Latency = 26; +def HWWriteResGroup184 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> { + let Latency = 14; let NumMicroOps = 12; - let ResourceCycles = [2,2,1,3,2,2]; -} -def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm, - VPGATHERDQrm, - VPGATHERDDrm)>; - -def HWWriteResGroup185 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { - let Latency = 24; - let NumMicroOps = 22; - let ResourceCycles = [5,3,4,1,5,4]; + let ResourceCycles = [2,2,2,1,3,2]; } -def: InstRW<[HWWriteResGroup185], (instrs VGATHERQPDYrm, - VPGATHERQQYrm)>; +def: InstRW<[HWWriteResGroup184], (instrs VGATHERDPDrm, VPGATHERDQrm)>; -def HWWriteResGroup186 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { - let Latency = 28; - let NumMicroOps = 22; - let ResourceCycles = [5,3,4,1,5,4]; -} -def: InstRW<[HWWriteResGroup186], (instrs VPGATHERQDYrm)>; - -def HWWriteResGroup187 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { - let Latency = 25; - let NumMicroOps = 22; - let ResourceCycles = [5,3,4,1,5,4]; +def HWWriteResGroup185 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> { + let Latency = 17; + let NumMicroOps = 20; + let ResourceCycles = [3,3,4,1,5,4]; } -def: InstRW<[HWWriteResGroup187], (instrs VPGATHERQDrm)>; +def: InstRW<[HWWriteResGroup185], (instrs VGATHERDPDYrm, VPGATHERDQYrm)>; -def HWWriteResGroup188 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { - let Latency = 27; +def HWWriteResGroup186 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> { + let Latency = 16; let NumMicroOps = 20; let ResourceCycles = [3,3,4,1,5,4]; } -def: InstRW<[HWWriteResGroup188], (instrs VGATHERDPDYrm, - VPGATHERDQYrm)>; +def: InstRW<[HWWriteResGroup186], (instrs VGATHERDPSrm, VPGATHERDDrm)>; -def HWWriteResGroup189 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { - let Latency = 27; +def HWWriteResGroup187 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> { + let Latency = 22; let NumMicroOps = 34; let ResourceCycles = [5,3,8,1,9,8]; } -def: InstRW<[HWWriteResGroup189], (instrs VGATHERDPSYrm, - VPGATHERDDYrm)>; +def: InstRW<[HWWriteResGroup187], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; -def HWWriteResGroup190 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { - let Latency = 23; +def HWWriteResGroup188 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> { + let Latency = 15; let NumMicroOps = 14; let ResourceCycles = [3,3,2,1,3,2]; } -def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPDrm, - VPGATHERQQrm)>; +def: InstRW<[HWWriteResGroup188], (instrs VGATHERQPDrm, VPGATHERQQrm)>; -def HWWriteResGroup191 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { - let Latency = 28; - let NumMicroOps = 15; - let ResourceCycles = [3,3,2,1,4,2]; +def HWWriteResGroup189 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> { + let Latency = 17; + let NumMicroOps = 22; + let ResourceCycles = [5,3,4,1,5,4]; } -def: InstRW<[HWWriteResGroup191], (instrs VGATHERQPSYrm)>; +def: InstRW<[HWWriteResGroup189], (instrs VGATHERQPDYrm, VPGATHERQQYrm, + VGATHERQPSYrm, VPGATHERQDYrm)>; -def HWWriteResGroup192 : SchedWriteRes<[HWPort0, HWPort5, HWPort06, HWPort15, HWPort015, HWPort23]> { - let Latency = 25; +def HWWriteResGroup190 : SchedWriteRes<[HWPort0,HWPort5,HWPort06,HWPort15,HWPort015,HWPort23]> { + let Latency = 16; let NumMicroOps = 15; let ResourceCycles = [3,3,2,1,4,2]; } -def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm, - VGATHERDPSrm)>; +def: InstRW<[HWWriteResGroup190], (instrs VGATHERQPSrm, VPGATHERQDrm)>; def: InstRW<[WriteZero], (instrs CLC)>; diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx2.s @@ -465,14 +465,14 @@ # CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0 # CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax) -# CHECK-NEXT: 7 25 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 9 26 5.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 7 25 3.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 14 26 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 7 22 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 9 23 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 9 27 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 9 24 5.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 7 17 3.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 9 18 3.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 10 19 4.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 14 21 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 7 17 3.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 9 18 3.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 9 19 5.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 10 19 4.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 2 6 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 6 0.50 * vmovntdqa (%rax), %ymm0 @@ -568,14 +568,14 @@ # CHECK-NEXT: 2 9 1.00 * vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2 # CHECK-NEXT: 2 9 1.00 * vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: 1 5 0.50 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1 5 0.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 1 5 0.50 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1 5 0.50 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1 5 0.50 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 -# CHECK-NEXT: 1 5 0.50 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 1 5 0.50 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 10 19 4.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 14 21 4.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 7 17 3.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 9 18 3.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 9 19 5.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 10 19 4.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 7 17 3.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 9 18 3.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 4 9 2.00 * vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2 @@ -776,7 +776,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 94.67 58.67 85.67 85.67 13.00 237.67 2.00 1.67 +# CHECK-NEXT: - - 96.67 60.67 99.67 99.67 21.00 266.67 4.00 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -786,13 +786,13 @@ # CHECK-NEXT: - - - - - - - 1.00 - - vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax) # CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0 @@ -888,14 +888,14 @@ # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 0.25 0.25 4.00 4.00 1.00 4.25 0.25 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 5.25 0.25 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 4.25 0.25 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: - - 0.25 0.25 1.00 1.00 1.00 3.25 0.25 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 0.25 0.25 2.00 2.00 1.00 3.25 0.25 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 - 2.50 - - vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - 0.50 - - - 2.50 - - vphaddsw %ymm0, %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx2.s @@ -465,14 +465,14 @@ # CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0 # CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax) -# CHECK-NEXT: 12 26 2.67 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 20 27 4.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 15 25 3.67 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 34 27 6.50 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 14 23 3.33 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 22 24 5.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 15 25 3.67 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 15 28 3.67 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 12 14 2.67 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 20 17 4.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 20 16 4.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 34 22 6.50 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 14 15 3.33 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 22 17 5.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 15 16 3.67 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 22 17 5.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 2 7 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovntdqa (%rax), %ymm0 @@ -568,14 +568,14 @@ # CHECK-NEXT: 2 10 1.00 * vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2 # CHECK-NEXT: 2 10 1.00 * vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: 12 26 2.67 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 34 27 6.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 12 26 2.67 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 20 27 4.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 22 25 5.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 22 28 5.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 -# CHECK-NEXT: 14 23 3.33 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 22 24 5.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 20 16 4.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 34 22 6.50 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 12 14 2.67 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 20 17 4.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 15 16 3.67 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 22 17 5.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 14 15 3.33 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 22 17 5.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 4 10 2.00 * vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2 @@ -776,7 +776,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 202.67 89.67 97.67 97.67 5.00 282.67 28.00 1.67 +# CHECK-NEXT: - - 206.67 90.67 99.67 99.67 5.00 284.67 30.00 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -787,12 +787,12 @@ # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax) # CHECK-NEXT: - - 4.00 1.50 1.00 1.00 - 3.50 1.00 - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - 12.00 3.50 4.00 4.00 - 6.50 4.00 - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 5.00 1.50 1.00 1.00 - 4.50 1.00 - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0 @@ -888,11 +888,11 @@ # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: - - 4.00 1.50 1.00 1.00 - 3.50 1.00 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - 12.00 3.50 4.00 4.00 - 6.50 4.00 - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 4.00 1.50 1.00 1.00 - 3.50 1.00 - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - 6.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 5.33 1.83 1.00 1.00 - 4.83 1.00 - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - 5.00 1.50 1.00 1.00 - 4.50 1.00 - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 # CHECK-NEXT: - - 8.67 2.17 2.00 2.00 - 5.17 2.00 - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2