Index: llvm/lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -1593,33 +1593,31 @@ } def: InstRW<[SKLWriteResGroup196], (instregex "DIV_F(32|64)m")>; -def SKLWriteResGroup196_1 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { - let Latency = 22; - let NumMicroOps = 5; +def SKLWriteResGroupVEX2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { + let Latency = 18; + let NumMicroOps = 5; // 2 uops perform multiple loads let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKLWriteResGroup196_1], (instrs VGATHERDPSrm, - VGATHERDPDrm, - VGATHERQPDrm, - VGATHERQPSrm, - VPGATHERDDrm, - VPGATHERDQrm, - VPGATHERQDrm, - VPGATHERQQrm)>; +def: InstRW<[SKLWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm, + VGATHERQPDrm, VPGATHERQQrm, + VGATHERQPSrm, VPGATHERQDrm)>; -def SKLWriteResGroup196_2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { - let Latency = 25; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; +def SKLWriteResGroupVEX4 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { + let Latency = 20; + let NumMicroOps = 5; // 2 uops peform multiple loads + let ResourceCycles = [1,4,1,1]; +} +def: InstRW<[SKLWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm, + VGATHERDPSrm, VPGATHERDDrm, + VGATHERQPDYrm, VPGATHERQQYrm, + VGATHERQPSYrm, VPGATHERQDYrm)>; + +def SKLWriteResGroupVEX8 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { + let Latency = 22; + let NumMicroOps = 5; // 2 uops perform multiple loads + let ResourceCycles = [1,8,1,1]; } -def: InstRW<[SKLWriteResGroup196_2], (instrs VGATHERDPSYrm, - VGATHERQPDYrm, - VGATHERQPSYrm, - VPGATHERDDYrm, - VPGATHERDQYrm, - VPGATHERQDYrm, - VPGATHERQQYrm, - VGATHERDPDYrm)>; +def: InstRW<[SKLWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 23; Index: llvm/lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -2145,14 +2145,6 @@ def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)", "VPMULLQZrm(b?)")>; -def SKXWriteResGroup214 : SchedWriteRes<[]> { - let Latency = 20; - let NumMicroOps = 0; -} -def: InstRW<[SKXWriteResGroup214], (instrs VGATHERDPSZ128rm, - VGATHERQPSZrm, - VPGATHERDDZ128rm)>; - def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> { let Latency = 20; let NumMicroOps = 1; @@ -2167,15 +2159,41 @@ } def : SchedAlias; // TODO - convert to ZnWriteResFpuPair -def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { - let Latency = 20; - let NumMicroOps = 5; +def SKXWriteGatherEVEX2 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 17; + let NumMicroOps = 5; // 2 uops perform multiple loads let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup218], (instrs VGATHERQPSZ128rm, - VGATHERQPSZ256rm, - VPGATHERQDZ128rm, - VPGATHERQDZ256rm)>; +def: InstRW<[SKXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm, + VGATHERDPDZ128rm, VPGATHERDQZ128rm, + VGATHERQPDZ128rm, VPGATHERQQZ128rm)>; + +def SKXWriteGatherEVEX4 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 19; + let NumMicroOps = 5; // 2 uops perform multiple loads + let ResourceCycles = [1,4,1,1]; +} +def: InstRW<[SKXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm, + VGATHERQPDZ256rm, VPGATHERQQZ256rm, + VGATHERDPSZ128rm, VPGATHERDDZ128rm, + VGATHERDPDZ256rm, VPGATHERDQZ256rm)>; + +def SKXWriteGatherEVEX8 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 21; + let NumMicroOps = 5; // 2 uops perform multiple loads + let ResourceCycles = [1,8,1,1]; +} +def: InstRW<[SKXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm, + VGATHERDPDZrm, VPGATHERDQZrm, + VGATHERQPDZrm, VPGATHERQQZrm, + VGATHERQPSZrm, VPGATHERQDZrm)>; + +def SKXWriteGatherEVEX16 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 25; + let NumMicroOps = 5; // 2 uops perform multiple loads + let ResourceCycles = [1,16,1,1]; +} +def: InstRW<[SKXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>; def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { let Latency = 20; @@ -2205,57 +2223,31 @@ } def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F(32|64)m")>; -def SKXWriteResGroup224 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { - let Latency = 22; - let NumMicroOps = 5; +def SKXWriteResGroupVEX2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { + let Latency = 18; + let NumMicroOps = 5; // 2 uops perform multiple loads let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKXWriteResGroup224], (instrs VGATHERDPDZ128rm, - VGATHERQPDZ128rm, - VPGATHERDQZ128rm, - VPGATHERQQZ128rm)>; +def: InstRW<[SKXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm, + VGATHERQPDrm, VPGATHERQQrm, + VGATHERQPSrm, VPGATHERQDrm)>; -def SKXWriteResGroup224_2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { - let Latency = 22; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; +def SKXWriteResGroupVEX4 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { + let Latency = 20; + let NumMicroOps = 5; // 2 uops peform multiple loads + let ResourceCycles = [1,4,1,1]; } -def: InstRW<[SKXWriteResGroup224_2], (instrs VGATHERDPSrm, - VGATHERDPDrm, - VGATHERQPDrm, - VGATHERQPSrm, - VPGATHERDDrm, - VPGATHERDQrm, - VPGATHERQDrm, - VPGATHERQQrm, - VPGATHERDDrm, - VPGATHERQDrm, - VPGATHERDQrm, - VPGATHERQQrm, - VGATHERDPSrm, - VGATHERQPSrm, - VGATHERDPDrm, - VGATHERQPDrm)>; - -def SKXWriteResGroup224_3 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { - let Latency = 25; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; +def: InstRW<[SKXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm, + VGATHERDPSrm, VPGATHERDDrm, + VGATHERQPDYrm, VPGATHERQQYrm, + VGATHERQPSYrm, VPGATHERQDYrm)>; + +def SKXWriteResGroupVEX8 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { + let Latency = 22; + let NumMicroOps = 5; // 2 uops perform multiple loads + let ResourceCycles = [1,8,1,1]; } -def: InstRW<[SKXWriteResGroup224_3], (instrs VGATHERDPSYrm, - VGATHERQPDYrm, - VGATHERQPSYrm, - VPGATHERDDYrm, - VPGATHERDQYrm, - VPGATHERQDYrm, - VPGATHERQQYrm, - VPGATHERDDYrm, - VPGATHERQDYrm, - VPGATHERDQYrm, - VPGATHERQQYrm, - VGATHERDPSYrm, - VGATHERQPSYrm, - VGATHERDPDYrm)>; +def: InstRW<[SKXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>; def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { let Latency = 22; @@ -2279,27 +2271,6 @@ } def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI(16|32)m")>; -def SKXWriteResGroup234 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { - let Latency = 25; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SKXWriteResGroup234], (instrs VGATHERDPDZ256rm, - VGATHERQPDZ256rm, - VPGATHERDQZ256rm, - VPGATHERQDZrm, - VPGATHERQQZ256rm)>; - -def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { - let Latency = 26; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SKXWriteResGroup238], (instrs VGATHERDPDZrm, - VGATHERQPDZrm, - VPGATHERDQZrm, - VPGATHERQQZrm)>; - def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> { let Latency = 27; let NumMicroOps = 2; @@ -2307,14 +2278,6 @@ } def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F(32|64)m")>; -def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { - let Latency = 27; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SKXWriteResGroup240], (instrs VGATHERDPSZ256rm, - VPGATHERDDZ256rm)>; - def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { let Latency = 29; let NumMicroOps = 15; @@ -2329,14 +2292,6 @@ } def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>; -def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { - let Latency = 30; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SKXWriteResGroup245], (instrs VGATHERDPSZrm, - VPGATHERDDZrm)>; - def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> { let Latency = 35; let NumMicroOps = 23; Index: llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s =================================================================== --- llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s +++ llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx2.s @@ -465,14 +465,14 @@ # CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0 # CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax) -# CHECK-NEXT: 5 22 1.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 5 18 1.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 5 20 2.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 22 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 5 18 1.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 5 18 1.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 2 7 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovntdqa (%rax), %ymm0 @@ -568,14 +568,14 @@ # CHECK-NEXT: 2 10 1.00 * vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2 # CHECK-NEXT: 2 10 1.00 * vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 -# CHECK-NEXT: 5 22 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 5 20 2.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 22 4.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 5 18 1.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 5 18 1.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 5 18 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 4 10 2.00 * vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2 @@ -776,7 +776,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 110.33 89.33 85.67 85.67 1.00 164.33 - 1.67 +# CHECK-NEXT: - - 110.33 89.33 99.67 99.67 1.00 164.33 - 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -786,13 +786,13 @@ # CHECK-NEXT: - - - - - - - 1.00 - - vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax) # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 1.33 0.33 4.00 4.00 - 1.33 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0 @@ -888,14 +888,14 @@ # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 1.33 0.33 4.00 4.00 - 1.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphaddd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vphaddsw %ymm0, %ymm1, %ymm2 Index: llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s =================================================================== --- llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s +++ llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s @@ -465,14 +465,14 @@ # CHECK-NEXT: 1 3 1.00 vbroadcastss %xmm0, %ymm0 # CHECK-NEXT: 1 3 1.00 vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vextracti128 $1, %ymm0, (%rax) -# CHECK-NEXT: 5 22 1.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 5 18 1.00 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 5 20 2.00 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 22 4.00 * vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 5 18 1.00 * vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 5 18 1.00 * vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: 1 3 1.00 vinserti128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: 2 7 0.50 * vinserti128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovntdqa (%rax), %ymm0 @@ -568,14 +568,14 @@ # CHECK-NEXT: 2 10 1.00 * vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vpermq $1, %ymm0, %ymm2 # CHECK-NEXT: 2 10 1.00 * vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: 5 22 1.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 -# CHECK-NEXT: 5 22 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: 5 25 1.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 5 20 2.00 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 22 4.00 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: 5 18 1.00 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: 5 18 1.00 * vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: 5 18 1.00 * vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: 5 20 2.00 * vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: 3 3 2.00 vphaddd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 4 10 2.00 * vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 3 3 2.00 vphaddsw %ymm0, %ymm1, %ymm2 @@ -776,7 +776,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 110.33 89.33 85.67 85.67 1.00 164.33 - 1.67 +# CHECK-NEXT: - - 110.33 89.33 99.67 99.67 1.00 164.33 - 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -786,13 +786,13 @@ # CHECK-NEXT: - - - - - - - 1.00 - - vextracti128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextracti128 $1, %ymm0, (%rax) # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 1.33 0.33 4.00 4.00 - 1.33 - - vgatherdps %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqpd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherqpd %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqps %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vgatherqps %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vinserti128 $1, %xmm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vinserti128 $1, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovntdqa (%rax), %ymm0 @@ -888,14 +888,14 @@ # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpermq $1, %ymm0, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq $1, (%rax), %ymm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2 +# CHECK-NEXT: - - 1.33 0.33 4.00 4.00 - 1.33 - - vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherdq %ymm0, (%rax,%xmm1,2), %ymm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherqd %xmm0, (%rax,%ymm1,2), %xmm2 # CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqq %xmm0, (%rax,%xmm1,2), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 1.00 1.00 - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 +# CHECK-NEXT: - - 1.33 0.33 2.00 2.00 - 1.33 - - vpgatherqq %ymm0, (%rax,%ymm1,2), %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphaddd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphaddd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vphaddsw %ymm0, %ymm1, %ymm2 Index: llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s =================================================================== --- llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s +++ llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s @@ -81,6 +81,11 @@ vdivps (%rax), %zmm17, %zmm19 {z}{k1} vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vgatherdpd (%rax,%ymm1,2), %zmm2 {k1} +vgatherdps (%rax,%zmm1,2), %zmm2 {k1} +vgatherqpd (%rax,%zmm1,2), %zmm2 {k1} +vgatherqps (%rax,%zmm1,2), %ymm2 {k1} + vmaxpd %zmm16, %zmm17, %zmm19 vmaxpd (%rax), %zmm17, %zmm19 vmaxpd (%rax){1to8}, %zmm17, %zmm19 @@ -181,6 +186,11 @@ vpaddq (%rax), %zmm17, %zmm19 {z}{k1} vpaddq (%rax){1to8}, %zmm17, %zmm19 {z}{k1} +vpgatherdq (%rax,%ymm1,2), %zmm2 {k1} +vpgatherdd (%rax,%zmm1,2), %zmm2 {k1} +vpgatherqq (%rax,%zmm1,2), %zmm2 {k1} +vpgatherqd (%rax,%zmm1,2), %ymm2 {k1} + vpmulld %zmm16, %zmm17, %zmm19 vpmulld (%rax), %zmm17, %zmm19 vpmulld (%rax){1to16}, %zmm17, %zmm19 @@ -596,6 +606,10 @@ # CHECK-NEXT: 3 18 10.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 10.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 25 10.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 5 21 4.00 * vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: 5 25 8.00 * vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 5 21 4.00 * vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 5 21 4.00 * vgatherqps (%rax,%zmm1,2), %ymm2 {%k1} # CHECK-NEXT: 1 4 0.50 vmaxpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vmaxpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vmaxpd (%rax){1to8}, %zmm17, %zmm19 @@ -686,6 +700,10 @@ # CHECK-NEXT: 1 1 0.33 vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 5 21 4.00 * vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: 5 25 8.00 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 5 21 4.00 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 5 21 4.00 * vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} # CHECK-NEXT: 2 10 1.00 vpmulld %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 3 17 1.00 * vpmulld (%rax), %zmm17, %zmm19 # CHECK-NEXT: 3 17 1.00 * vpmulld (%rax){1to16}, %zmm17, %zmm19 @@ -1001,7 +1019,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 612.00 188.00 26.00 153.00 153.00 - 330.00 - - +# CHECK-NEXT: - 612.00 200.67 30.67 193.00 193.00 - 334.67 2.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1077,6 +1095,10 @@ # CHECK-NEXT: - 10.00 2.00 - - - - 1.00 - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 10.00 2.00 - 0.50 0.50 - 1.00 - - vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherqps (%rax,%zmm1,2), %ymm2 {%k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vmaxpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vmaxpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vmaxpd (%rax){1to8}, %zmm17, %zmm19 @@ -1167,6 +1189,10 @@ # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherqd (%rax,%zmm1,2), %ymm2 {%k1} # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vpmulld %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpmulld (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vpmulld (%rax){1to16}, %zmm17, %zmm19 Index: llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s =================================================================== --- llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s +++ llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s @@ -121,6 +121,16 @@ vdivps (%rax), %ymm17, %ymm19 {z}{k1} vdivps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vgatherdpd (%rax,%xmm1,2), %ymm2 {k1} +vgatherdps (%rax,%ymm1,2), %ymm2 {k1} +vgatherqpd (%rax,%ymm1,2), %ymm2 {k1} +vgatherqps (%rax,%ymm1,2), %xmm2 {k1} + +vgatherdpd (%rax,%xmm1,2), %xmm2 {k1} +vgatherdps (%rax,%xmm1,2), %xmm2 {k1} +vgatherqpd (%rax,%xmm1,2), %xmm2 {k1} +vgatherqps (%rax,%xmm1,2), %xmm2 {k1} + vmaxpd %xmm16, %xmm17, %xmm19 vmaxpd (%rax), %xmm17, %xmm19 vmaxpd (%rax){1to2}, %xmm17, %xmm19 @@ -421,6 +431,16 @@ vpermq (%rax), %ymm17, %ymm19 {z}{k1} vpermq (%rax){1to4}, %ymm17, %ymm19 {z}{k1} +vpgatherdq (%rax,%xmm1,2), %ymm2 {k1} +vpgatherdd (%rax,%ymm1,2), %ymm2 {k1} +vpgatherqq (%rax,%ymm1,2), %ymm2 {k1} +vpgatherqd (%rax,%ymm1,2), %xmm2 {k1} + +vpgatherdq (%rax,%xmm1,2), %xmm2 {k1} +vpgatherdd (%rax,%xmm1,2), %xmm2 {k1} +vpgatherqq (%rax,%xmm1,2), %xmm2 {k1} +vpgatherqd (%rax,%xmm1,2), %xmm2 {k1} + vpmulld %xmm16, %xmm17, %xmm19 vpmulld (%rax), %xmm17, %xmm19 vpmulld (%rax){1to4}, %xmm17, %xmm19 @@ -858,6 +878,14 @@ # CHECK-NEXT: 1 11 5.00 vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 18 5.00 * vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 18 5.00 * vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 5 19 2.00 * vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 5 21 4.00 * vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 5 19 2.00 * vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 5 19 2.00 * vgatherqps (%rax,%ymm1,2), %xmm2 {%k1} +# CHECK-NEXT: 5 17 1.00 * vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 5 19 2.00 * vgatherdps (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 5 17 1.00 * vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 5 17 1.00 * vgatherqps (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 1 4 0.50 vmaxpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 10 0.50 * vmaxpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 10 0.50 * vmaxpd (%rax){1to2}, %xmm17, %xmm19 @@ -1128,6 +1156,14 @@ # CHECK-NEXT: 1 3 1.00 vpermq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vpermq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 5 19 2.00 * vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1} +# CHECK-NEXT: 5 21 4.00 * vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 5 19 2.00 * vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: 5 19 2.00 * vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1} +# CHECK-NEXT: 5 17 1.00 * vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 5 19 2.00 * vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 5 17 1.00 * vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: 5 17 1.00 * vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: 2 10 1.00 vpmulld %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 3 16 1.00 * vpmulld (%rax), %xmm17, %xmm19 # CHECK-NEXT: 3 16 1.00 * vpmulld (%rax){1to4}, %xmm17, %xmm19 @@ -1431,7 +1467,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 423.00 217.00 145.00 222.00 222.00 - 328.00 - - +# CHECK-NEXT: - 423.00 242.33 154.33 252.00 252.00 - 337.33 4.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1543,6 +1579,14 @@ # CHECK-NEXT: - 5.00 1.00 - - - - - - - vdivps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - vdivps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - vdivps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherdpd (%rax,%xmm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vgatherdps (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherqpd (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherqps (%rax,%ymm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vgatherdpd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vgatherdps (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vgatherqpd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vgatherqps (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax){1to2}, %xmm17, %xmm19 @@ -1813,6 +1857,14 @@ # CHECK-NEXT: - - - - - - - 1.00 - - vpermq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vpgatherdq (%rax,%xmm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherdd (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vpgatherqq (%rax,%ymm1,2), %ymm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vpgatherqd (%rax,%ymm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vpgatherdq (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 2.00 2.00 - 0.58 0.25 - vpgatherdd (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vpgatherqq (%rax,%xmm1,2), %xmm2 {%k1} +# CHECK-NEXT: - - 1.58 0.58 1.00 1.00 - 0.58 0.25 - vpgatherqd (%rax,%xmm1,2), %xmm2 {%k1} # CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax){1to4}, %xmm17, %xmm19