Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -512,8 +512,6 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; @@ -819,6 +817,55 @@ def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>; /////////////////////////////////////////////////////////////////////////////// +// AVX Conditional SIMD Packed Stores - VMASKMOVP +// SSE2/AVX Sttore Selected Bytes of Double Quadword - (V)MASKMOVDQ +/////////////////////////////////////////////////////////////////////////////// + +def JWriteMASKMOVPSmr: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> { + let Latency = 16; + let ResourceCycles = [1, 1, 5, 5, 4, 4, 4]; + let NumMicroOps = 19; +} + +def JWriteMASKMOVPDmr: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> { + let Latency = 13; + let ResourceCycles = [1, 1, 2, 2, 2, 2, 2]; + let NumMicroOps = 10; +} + +def JWriteMASKMOVPSYmr: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> { + let Latency = 22; + let ResourceCycles = [1, 1, 10, 10, 8, 8, 8]; + let NumMicroOps = 36; +} + +def JWriteMASKMOVPDYmr: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> { + let Latency = 16; + let ResourceCycles = [1, 1, 4, 4, 4, 4, 4]; + let NumMicroOps = 18; +} + +def JWriteFMaskedStore : SchedWriteVariant<[ + SchedVar>, [JWriteMASKMOVPSmr]>, + SchedVar +]>; +def : SchedAlias; + +def JWriteFMaskedStoreY : SchedWriteVariant<[ + SchedVar>, [JWriteMASKMOVPSYmr]>, + SchedVar +]>; +def : SchedAlias; + +def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> { + let Latency = 34; + let ResourceCycles = [1, 1, 2, 2, 2, 16, 42]; + let NumMicroOps = 63; +} +def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, + VMASKMOVDQU, VMASKMOVDQU64)>; + +/////////////////////////////////////////////////////////////////////////////// // SchedWriteVariant definitions. /////////////////////////////////////////////////////////////////////////////// Index: test/tools/llvm-mca/X86/BtVer2/resources-avx1.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1219,15 +1219,15 @@ # CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 * U vldmxcsr (%rax) -# CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 63 34 21.00 * * U vmaskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 1 6 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2 6 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2 -# CHECK-NEXT: 1 6 2.00 * * vmaskmovpd %xmm0, %xmm1, (%rax) -# CHECK-NEXT: 2 6 2.00 * * vmaskmovpd %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 10 13 2.00 * * vmaskmovpd %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 18 16 4.00 * * vmaskmovpd %ymm0, %ymm1, (%rax) # CHECK-NEXT: 1 6 1.00 * vmaskmovps (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2 6 2.00 * vmaskmovps (%rax), %ymm0, %ymm2 -# CHECK-NEXT: 1 6 2.00 * * vmaskmovps %xmm0, %xmm1, (%rax) -# CHECK-NEXT: 2 6 2.00 * * vmaskmovps %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 19 16 5.00 * * vmaskmovps %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 36 22 10.00 * * vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: 1 2 1.00 vmaxpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2 @@ -1740,7 +1740,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 56.00 - - 365.00 915.00 447.50 461.50 394.00 - 51.00 132.00 135.50 159.50 38.00 +# CHECK-NEXT: 86.00 30.00 - 362.00 907.00 449.50 480.50 414.00 - 78.00 154.00 135.50 159.50 38.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -1933,15 +1933,15 @@ # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vlddqu (%rax), %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vlddqu (%rax), %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vldmxcsr (%rax) -# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmaskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 21.00 21.00 - 1.00 - 1.00 2.00 2.00 - 16.00 2.00 - - - vmaskmovdqu %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vmaskmovpd (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 2.00 - - - - - - vmaskmovpd (%rax), %ymm0, %ymm2 -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovpd %xmm0, %xmm1, (%rax) -# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 - - 2.00 - - - - vmaskmovpd %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 1.00 1.00 - 1.00 - 1.00 2.00 2.00 - 2.00 2.00 - - - vmaskmovpd %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 2.00 2.00 - 1.00 - 1.00 4.00 4.00 - 4.00 4.00 - - - vmaskmovpd %ymm0, %ymm1, (%rax) # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vmaskmovps (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 2.00 - - - - - - vmaskmovps (%rax), %ymm0, %ymm2 -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovps %xmm0, %xmm1, (%rax) -# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 - - 2.00 - - - - vmaskmovps %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 2.00 2.00 - 1.00 - 1.00 5.00 4.00 - 4.00 5.00 - - - vmaskmovps %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 4.00 4.00 - 1.00 - 1.00 10.00 8.00 - 8.00 10.00 - - - vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmaxpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 Index: test/tools/llvm-mca/X86/BtVer2/resources-sse2.s =================================================================== --- test/tools/llvm-mca/X86/BtVer2/resources-sse2.s +++ test/tools/llvm-mca/X86/BtVer2/resources-sse2.s @@ -465,7 +465,7 @@ # CHECK-NEXT: 1 19 19.00 divsd %xmm0, %xmm2 # CHECK-NEXT: 1 24 19.00 * divsd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * * U lfence -# CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 63 34 21.00 * * U maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 1 2 1.00 maxpd %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 maxsd %xmm0, %xmm2 @@ -693,7 +693,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 17.00 - - 49.00 204.00 128.50 141.50 118.00 - 16.00 54.00 67.50 67.50 12.00 +# CHECK-NEXT: 38.00 21.00 - 50.00 204.00 129.50 142.50 120.00 - 31.00 55.00 67.50 67.50 12.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -755,7 +755,7 @@ # CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - divsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - divsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - - - lfence -# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - maskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 21.00 21.00 - 1.00 - 1.00 2.00 2.00 - 16.00 2.00 - - - maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - maxpd %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - maxpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - maxsd %xmm0, %xmm2