Index: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-1.s
===================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-1.s
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-1.s
@@ -23,6 +23,22 @@
 # CHECK-NEXT:   - Register Dependencies [ 94.04% ]
 # CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
 
+# CHECK:      Critical sequence based on the simulation:
+
+# CHECK:                    Instruction                                 Dependency Information
+# CHECK-NEXT:  +----< 3.    addl	%edx, %eax
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 0.    addl	%eax, %ebx                        ## REGISTER dependency:  %eax
+# CHECK-NEXT:  +----> 1.    addl	%ebx, %ecx                        ## REGISTER dependency:  %ebx
+# CHECK-NEXT:  +----> 2.    addl	%ecx, %edx                        ## REGISTER dependency:  %ecx
+# CHECK-NEXT:  +----> 3.    addl	%edx, %eax                        ## REGISTER dependency:  %edx
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 0.    addl	%eax, %ebx                        ## REGISTER dependency:  %eax
+
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
 # CHECK-NEXT: [2]: Latency
Index: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-2.s
===================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-2.s
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-2.s
@@ -22,6 +22,19 @@
 # CHECK-NEXT:   - Register Dependencies [ 0.00% ]
 # CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
 
+# CHECK:      Critical sequence based on the simulation:
+
+# CHECK:                    Instruction                                 Dependency Information
+# CHECK-NEXT:  +----< 0.    vhaddps	%xmm0, %xmm0, %xmm1
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 0.    vhaddps	%xmm0, %xmm0, %xmm1               ## RESOURCE interference:  JFPA [ probability: 99% ]
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 0.    vhaddps	%xmm0, %xmm0, %xmm1               ## RESOURCE interference:  JFPA [ probability: 99% ]
+
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
 # CHECK-NEXT: [2]: Latency
Index: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s
===================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-3.s
@@ -27,6 +27,26 @@
 # CHECK-NEXT:   - Register Dependencies [ 83.24% ]
 # CHECK-NEXT:   - Memory Dependencies   [ 99.89% ]
 
+# CHECK:      Critical sequence based on the simulation:
+
+# CHECK:                    Instruction                                 Dependency Information
+# CHECK-NEXT:  +----< 7.    vmovaps	%xmm0, 48(%rdi)
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 0.    vmovaps	(%rsi), %xmm0                     ## MEMORY dependency.
+# CHECK-NEXT:  +----> 1.    vmovaps	%xmm0, (%rdi)                     ## REGISTER dependency:  %xmm0
+# CHECK-NEXT:  +----> 2.    vmovaps	16(%rsi), %xmm0                   ## MEMORY dependency.
+# CHECK-NEXT:  +----> 3.    vmovaps	%xmm0, 16(%rdi)                   ## REGISTER dependency:  %xmm0
+# CHECK-NEXT:  +----> 4.    vmovaps	32(%rsi), %xmm0                   ## MEMORY dependency.
+# CHECK-NEXT:  +----> 5.    vmovaps	%xmm0, 32(%rdi)                   ## REGISTER dependency:  %xmm0
+# CHECK-NEXT:  +----> 6.    vmovaps	48(%rsi), %xmm0                   ## MEMORY dependency.
+# CHECK-NEXT:  +----> 7.    vmovaps	%xmm0, 48(%rdi)                   ## REGISTER dependency:  %xmm0
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 0.    vmovaps	(%rsi), %xmm0                     ## MEMORY dependency.
+
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
 # CHECK-NEXT: [2]: Latency
Index: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-4.s
===================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-4.s
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/bottleneck-hints-4.s
@@ -0,0 +1,79 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -bottleneck-analysis < %s | FileCheck %s
+
+vmulps   %xmm0, %xmm1, %xmm2
+vhaddps  %xmm2, %xmm2, %xmm3
+vhaddps  %xmm3, %xmm3, %xmm4
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      300
+# CHECK-NEXT: Total Cycles:      211
+# CHECK-NEXT: Total uOps:        300
+
+# CHECK:      Dispatch Width:    2
+# CHECK-NEXT: uOps Per Cycle:    1.42
+# CHECK-NEXT: IPC:               1.42
+# CHECK-NEXT: Block RThroughput: 2.0
+
+# CHECK:      Cycles with backend pressure increase [ 40.76% ]
+# CHECK-NEXT: Throughput Bottlenecks:
+# CHECK-NEXT:   Resource Pressure       [ 39.34% ]
+# CHECK-NEXT:   - JFPA  [ 39.34% ]
+# CHECK-NEXT:   - JFPU0  [ 39.34% ]
+# CHECK-NEXT:   Data Dependencies:      [ 1.42% ]
+# CHECK-NEXT:   - Register Dependencies [ 1.42% ]
+# CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
+
+# CHECK:      Critical sequence based on the simulation:
+
+# CHECK:                    Instruction                                 Dependency Information
+# CHECK-NEXT:  +----< 2.    vhaddps	%xmm3, %xmm3, %xmm4
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |      0.    vmulps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  +----> 1.    vhaddps	%xmm2, %xmm2, %xmm3               ## RESOURCE interference:  JFPA [ probability: 73% ]
+# CHECK-NEXT:  +----> 2.    vhaddps	%xmm3, %xmm3, %xmm4               ## REGISTER dependency:  %xmm3
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 1.    vhaddps	%xmm2, %xmm2, %xmm3               ## RESOURCE interference:  JFPA [ probability: 73% ]
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      2     1.00                        vmulps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  1      4     1.00                        vhaddps	%xmm2, %xmm2, %xmm3
+# CHECK-NEXT:  1      4     1.00                        vhaddps	%xmm3, %xmm3, %xmm4
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - JALU0
+# CHECK-NEXT: [1]   - JALU1
+# CHECK-NEXT: [2]   - JDiv
+# CHECK-NEXT: [3]   - JFPA
+# CHECK-NEXT: [4]   - JFPM
+# CHECK-NEXT: [5]   - JFPU0
+# CHECK-NEXT: [6]   - JFPU1
+# CHECK-NEXT: [7]   - JLAGU
+# CHECK-NEXT: [8]   - JMul
+# CHECK-NEXT: [9]   - JSAGU
+# CHECK-NEXT: [10]  - JSTC
+# CHECK-NEXT: [11]  - JVALU0
+# CHECK-NEXT: [12]  - JVALU1
+# CHECK-NEXT: [13]  - JVIMUL
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
+# CHECK-NEXT:  -      -      -     2.00   1.00   2.00   1.00    -      -      -      -      -      -      -
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
+# CHECK-NEXT:  -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     vmulps	%xmm0, %xmm1, %xmm2
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     vhaddps	%xmm2, %xmm2, %xmm3
+# CHECK-NEXT:  -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     vhaddps	%xmm3, %xmm3, %xmm4
Index: llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/bottleneck-analysis.s
===================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/bottleneck-analysis.s
+++ llvm/trunk/test/tools/llvm-mca/X86/SkylakeClient/bottleneck-analysis.s
@@ -0,0 +1,154 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mcpu=skylake -bottleneck-analysis < %s | FileCheck %s
+
+.LBB0_4:
+  vmovups	(%rsi,%rax,2), %xmm0
+  vpermilps	$255, %xmm0, %xmm7
+  vmulps	-24(%rsp), %xmm7, %xmm8
+  vpermilps	$170, %xmm0, %xmm6
+  vpermilps	$85, %xmm0, %xmm5
+  vbroadcastss	%xmm0, %xmm0
+  vfmadd231ps	%xmm9, %xmm6, %xmm8
+  vfmadd213ps	%xmm8, %xmm10, %xmm5
+  vfmadd213ps	%xmm5, %xmm11, %xmm0
+  vfmadd213ps	%xmm0, %xmm12, %xmm4
+  vfmadd213ps	%xmm4, %xmm13, %xmm1
+  vmovaps	%xmm7, %xmm4
+  vfmadd213ps	%xmm1, %xmm14, %xmm2
+  vmovaps	%xmm6, %xmm1
+  vfmadd213ps	%xmm2, %xmm15, %xmm3
+  vpermilps	$170, %xmm3, %xmm0
+  vmovups	%xmm3, (%rdx,%rax)
+  vpermilps	$255, %xmm3, %xmm2
+  addq	$16, %rax
+  decl	%ecx
+  vmovaps	%xmm0, %xmm3
+  jne	.LBB0_4
+
+# CHECK:      Iterations:        100
+# CHECK-NEXT: Instructions:      2200
+# CHECK-NEXT: Total Cycles:      1039
+# CHECK-NEXT: Total uOps:        2400
+
+# CHECK:      Dispatch Width:    6
+# CHECK-NEXT: uOps Per Cycle:    2.31
+# CHECK-NEXT: IPC:               2.12
+# CHECK-NEXT: Block RThroughput: 6.0
+
+# CHECK:      Cycles with backend pressure increase [ 92.69% ]
+# CHECK-NEXT: Throughput Bottlenecks:
+# CHECK-NEXT:   Resource Pressure       [ 46.78% ]
+# CHECK-NEXT:   - SKLPort0  [ 14.24% ]
+# CHECK-NEXT:   - SKLPort1  [ 14.24% ]
+# CHECK-NEXT:   - SKLPort5  [ 46.49% ]
+# CHECK-NEXT:   - SKLPort6  [ 8.66% ]
+# CHECK-NEXT:   Data Dependencies:      [ 64.97% ]
+# CHECK-NEXT:   - Register Dependencies [ 64.97% ]
+# CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
+
+# CHECK:      Critical sequence based on the simulation:
+
+# CHECK:                    Instruction                                 Dependency Information
+# CHECK-NEXT:  +----< 18.   addq	$16, %rax
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 0.    vmovups	(%rsi,%rax,2), %xmm0              ## REGISTER dependency:  %rax
+# CHECK-NEXT:  |      1.    vpermilps	$255, %xmm0, %xmm7
+# CHECK-NEXT:  |      2.    vmulps	-24(%rsp), %xmm7, %xmm8
+# CHECK-NEXT:  +----> 3.    vpermilps	$170, %xmm0, %xmm6                ## REGISTER dependency:  %xmm0
+# CHECK-NEXT:  |      4.    vpermilps	$85, %xmm0, %xmm5
+# CHECK-NEXT:  |      5.    vbroadcastss	%xmm0, %xmm0
+# CHECK-NEXT:  +----> 6.    vfmadd231ps	%xmm9, %xmm6, %xmm8       ## REGISTER dependency:  %xmm6
+# CHECK-NEXT:  +----> 7.    vfmadd213ps	%xmm8, %xmm10, %xmm5      ## REGISTER dependency:  %xmm8
+# CHECK-NEXT:  +----> 8.    vfmadd213ps	%xmm5, %xmm11, %xmm0      ## REGISTER dependency:  %xmm5
+# CHECK-NEXT:  +----> 9.    vfmadd213ps	%xmm0, %xmm12, %xmm4      ## REGISTER dependency:  %xmm0
+# CHECK-NEXT:  +----> 10.   vfmadd213ps	%xmm4, %xmm13, %xmm1      ## REGISTER dependency:  %xmm4
+# CHECK-NEXT:  |      11.   vmovaps	%xmm7, %xmm4
+# CHECK-NEXT:  +----> 12.   vfmadd213ps	%xmm1, %xmm14, %xmm2      ## REGISTER dependency:  %xmm1
+# CHECK-NEXT:  |      13.   vmovaps	%xmm6, %xmm1
+# CHECK-NEXT:  |      14.   vfmadd213ps	%xmm2, %xmm15, %xmm3
+# CHECK-NEXT:  |      15.   vpermilps	$170, %xmm3, %xmm0
+# CHECK-NEXT:  |      16.   vmovups	%xmm3, (%rdx,%rax)
+# CHECK-NEXT:  |      17.   vpermilps	$255, %xmm3, %xmm2
+# CHECK-NEXT:  |      18.   addq	$16, %rax
+# CHECK-NEXT:  |      19.   decl	%ecx
+# CHECK-NEXT:  |      20.   vmovaps	%xmm0, %xmm3
+# CHECK-NEXT:  |      21.   jne	.LBB0_4
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 2.    vmulps	-24(%rsp), %xmm7, %xmm8           ## RESOURCE interference:  SKLPort1 [ probability: 45% ]
+
+# CHECK:      Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+# CHECK-NEXT:  1      6     0.50    *                   vmovups	(%rsi,%rax,2), %xmm0
+# CHECK-NEXT:  1      1     1.00                        vpermilps	$255, %xmm0, %xmm7
+# CHECK-NEXT:  2      10    0.50    *                   vmulps	-24(%rsp), %xmm7, %xmm8
+# CHECK-NEXT:  1      1     1.00                        vpermilps	$170, %xmm0, %xmm6
+# CHECK-NEXT:  1      1     1.00                        vpermilps	$85, %xmm0, %xmm5
+# CHECK-NEXT:  1      1     1.00                        vbroadcastss	%xmm0, %xmm0
+# CHECK-NEXT:  1      4     0.50                        vfmadd231ps	%xmm9, %xmm6, %xmm8
+# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm8, %xmm10, %xmm5
+# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm5, %xmm11, %xmm0
+# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm0, %xmm12, %xmm4
+# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm4, %xmm13, %xmm1
+# CHECK-NEXT:  1      1     0.33                        vmovaps	%xmm7, %xmm4
+# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm1, %xmm14, %xmm2
+# CHECK-NEXT:  1      1     0.33                        vmovaps	%xmm6, %xmm1
+# CHECK-NEXT:  1      4     0.50                        vfmadd213ps	%xmm2, %xmm15, %xmm3
+# CHECK-NEXT:  1      1     1.00                        vpermilps	$170, %xmm3, %xmm0
+# CHECK-NEXT:  2      1     1.00           *            vmovups	%xmm3, (%rdx,%rax)
+# CHECK-NEXT:  1      1     1.00                        vpermilps	$255, %xmm3, %xmm2
+# CHECK-NEXT:  1      1     0.25                        addq	$16, %rax
+# CHECK-NEXT:  1      1     0.25                        decl	%ecx
+# CHECK-NEXT:  1      1     0.33                        vmovaps	%xmm0, %xmm3
+# CHECK-NEXT:  1      1     0.50                        jne	.LBB0_4
+
+# CHECK:      Resources:
+# CHECK-NEXT: [0]   - SKLDivider
+# CHECK-NEXT: [1]   - SKLFPDivider
+# CHECK-NEXT: [2]   - SKLPort0
+# CHECK-NEXT: [3]   - SKLPort1
+# CHECK-NEXT: [4]   - SKLPort2
+# CHECK-NEXT: [5]   - SKLPort3
+# CHECK-NEXT: [6]   - SKLPort4
+# CHECK-NEXT: [7]   - SKLPort5
+# CHECK-NEXT: [8]   - SKLPort6
+# CHECK-NEXT: [9]   - SKLPort7
+
+# CHECK:      Resource pressure per iteration:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]
+# CHECK-NEXT:  -      -     5.52   5.53   1.01   1.03   1.00   6.02   2.93   0.96
+
+# CHECK:      Resource pressure by instruction:
+# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    Instructions:
+# CHECK-NEXT:  -      -      -      -     0.04   0.96    -      -      -      -     vmovups	(%rsi,%rax,2), %xmm0
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$255, %xmm0, %xmm7
+# CHECK-NEXT:  -      -     0.03   0.97   0.96   0.04    -      -      -      -     vmulps	-24(%rsp), %xmm7, %xmm8
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$170, %xmm0, %xmm6
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$85, %xmm0, %xmm5
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vbroadcastss	%xmm0, %xmm0
+# CHECK-NEXT:  -      -     0.95   0.05    -      -      -      -      -      -     vfmadd231ps	%xmm9, %xmm6, %xmm8
+# CHECK-NEXT:  -      -     0.50   0.50    -      -      -      -      -      -     vfmadd213ps	%xmm8, %xmm10, %xmm5
+# CHECK-NEXT:  -      -     0.92   0.08    -      -      -      -      -      -     vfmadd213ps	%xmm5, %xmm11, %xmm0
+# CHECK-NEXT:  -      -     0.95   0.05    -      -      -      -      -      -     vfmadd213ps	%xmm0, %xmm12, %xmm4
+# CHECK-NEXT:  -      -     0.51   0.49    -      -      -      -      -      -     vfmadd213ps	%xmm4, %xmm13, %xmm1
+# CHECK-NEXT:  -      -     0.52   0.48    -      -      -      -      -      -     vmovaps	%xmm7, %xmm4
+# CHECK-NEXT:  -      -     0.49   0.51    -      -      -      -      -      -     vfmadd213ps	%xmm1, %xmm14, %xmm2
+# CHECK-NEXT:  -      -     0.04   0.95    -      -      -     0.01    -      -     vmovaps	%xmm6, %xmm1
+# CHECK-NEXT:  -      -     0.51   0.49    -      -      -      -      -      -     vfmadd213ps	%xmm2, %xmm15, %xmm3
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$170, %xmm3, %xmm0
+# CHECK-NEXT:  -      -      -      -     0.01   0.03   1.00    -      -     0.96   vmovups	%xmm3, (%rdx,%rax)
+# CHECK-NEXT:  -      -      -      -      -      -      -     1.00    -      -     vpermilps	$255, %xmm3, %xmm2
+# CHECK-NEXT:  -      -      -      -      -      -      -      -     1.00    -     addq	$16, %rax
+# CHECK-NEXT:  -      -     0.04   0.01    -      -      -     0.01   0.94    -     decl	%ecx
+# CHECK-NEXT:  -      -     0.05   0.95    -      -      -      -      -      -     vmovaps	%xmm0, %xmm3
+# CHECK-NEXT:  -      -     0.01    -      -      -      -      -     0.99    -     jne	.LBB0_4
Index: llvm/trunk/test/tools/llvm-mca/X86/option-all-views-1.s
===================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/option-all-views-1.s
+++ llvm/trunk/test/tools/llvm-mca/X86/option-all-views-1.s
@@ -25,6 +25,19 @@
 # FULLREPORT-NEXT:      - Register Dependencies [ 76.70% ]
 # FULLREPORT-NEXT:      - Memory Dependencies   [ 0.00% ]
 
+# FULLREPORT:         Critical sequence based on the simulation:
+
+# FULLREPORT:                       Instruction                                 Dependency Information
+# FULLREPORT-NEXT:     +----< 0.    addl	%eax, %eax
+# FULLREPORT-NEXT:     |
+# FULLREPORT-NEXT:     |    < loop carried >
+# FULLREPORT-NEXT:     |
+# FULLREPORT-NEXT:     +----> 0.    addl	%eax, %eax                        ## REGISTER dependency:  %eax
+# FULLREPORT-NEXT:     |
+# FULLREPORT-NEXT:     |    < loop carried >
+# FULLREPORT-NEXT:     |
+# FULLREPORT-NEXT:     +----> 0.    addl	%eax, %eax                        ## REGISTER dependency:  %eax
+
 # DEFAULTREPORT:      Instruction Info:
 # DEFAULTREPORT-NEXT: [1]: #uOps
 # DEFAULTREPORT-NEXT: [2]: Latency
Index: llvm/trunk/test/tools/llvm-mca/X86/option-all-views-2.s
===================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/option-all-views-2.s
+++ llvm/trunk/test/tools/llvm-mca/X86/option-all-views-2.s
@@ -24,6 +24,19 @@
 # ALL-NEXT:          - Register Dependencies [ 76.70% ]
 # ALL-NEXT:          - Memory Dependencies   [ 0.00% ]
 
+# ALL:             Critical sequence based on the simulation:
+
+# ALL:                           Instruction                                 Dependency Information
+# ALL-NEXT:         +----< 0.    addl	%eax, %eax
+# ALL-NEXT:         |
+# ALL-NEXT:         |    < loop carried >
+# ALL-NEXT:         |
+# ALL-NEXT:         +----> 0.    addl	%eax, %eax                        ## REGISTER dependency:  %eax
+# ALL-NEXT:         |
+# ALL-NEXT:         |    < loop carried >
+# ALL-NEXT:         |
+# ALL-NEXT:         +----> 0.    addl	%eax, %eax                        ## REGISTER dependency:  %eax
+
 # ALL:             Instruction Info:
 # ALL-NEXT:        [1]: #uOps
 # ALL-NEXT:        [2]: Latency
Index: llvm/trunk/test/tools/llvm-mca/X86/option-no-stats-1.s
===================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/option-no-stats-1.s
+++ llvm/trunk/test/tools/llvm-mca/X86/option-no-stats-1.s
@@ -22,6 +22,19 @@
 # CHECK-NEXT:   - Register Dependencies [ 76.70% ]
 # CHECK-NEXT:   - Memory Dependencies   [ 0.00% ]
 
+# CHECK:      Critical sequence based on the simulation:
+
+# CHECK:                    Instruction                                 Dependency Information
+# CHECK-NEXT:  +----< 0.    addl	%edi, %eax
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 0.    addl	%edi, %eax                        ## REGISTER dependency:  %eax
+# CHECK-NEXT:  |
+# CHECK-NEXT:  |    < loop carried >
+# CHECK-NEXT:  |
+# CHECK-NEXT:  +----> 0.    addl	%edi, %eax                        ## REGISTER dependency:  %eax
+
 # CHECK:      Instruction Info:
 # CHECK-NEXT: [1]: #uOps
 # CHECK-NEXT: [2]: Latency
Index: llvm/trunk/tools/llvm-mca/Views/BottleneckAnalysis.h
===================================================================
--- llvm/trunk/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ llvm/trunk/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -10,18 +10,71 @@
 /// This file implements the bottleneck analysis view.
 ///
 /// This view internally observes backend pressure increase events in order to
-/// identify potential sources of bottlenecks.
+/// identify problematic data dependencies and processor resource interferences.
 ///
-/// Example of bottleneck analysis report:
+/// Example of bottleneck analysis report for a dot-product on X86 btver2:
 ///
-/// Cycles with backend pressure increase [ 33.40% ]
-///  Throughput Bottlenecks:
-///  Resource Pressure       [ 0.52% ]
-///  - JLAGU  [ 0.52% ]
-///  Data Dependencies:      [ 32.88% ]
-///  - Register Dependencies [ 32.88% ]
-///  - Memory Dependencies   [ 0.00% ]
+/// Cycles with backend pressure increase [ 40.76% ]
+/// Throughput Bottlenecks: 
+///   Resource Pressure       [ 39.34% ]
+///   - JFPA  [ 39.34% ]
+///   - JFPU0  [ 39.34% ]
+///   Data Dependencies:      [ 1.42% ]
+///   - Register Dependencies [ 1.42% ]
+///   - Memory Dependencies   [ 0.00% ]
 ///
+/// According to the example, backend pressure increased during the 40.76% of
+/// the simulated cycles.  In particular, the major cause of backend pressure
+/// increases was the contention on floating point adder JFPA accessible from
+/// pipeline resource JFPU0.
+///
+/// At the end of each cycle, if pressure on the simulated out-of-order buffers
+/// has increased, a backend pressure event is reported.
+/// In particular, this occurs when there is a delta between the number of uOps
+/// dispatched and the number of uOps issued to the underlying pipelines.
+///
+/// The bottleneck analysis view is also responsible for identifying and printing
+/// the most "critical" sequence of dependent instructions according to the
+/// simulated run.
+///
+/// Below is the critical sequence computed for the dot-product example on
+/// btver2:
+///
+///              Instruction                     Dependency Information
+/// +----< 2.    vhaddps %xmm3, %xmm3, %xmm4
+/// |
+/// |    < loop carried > 
+/// |
+/// |      0.    vmulps	 %xmm0, %xmm0, %xmm2
+/// +----> 1.    vhaddps %xmm2, %xmm2, %xmm3     ## RESOURCE interference:  JFPA [ probability: 73% ]
+/// +----> 2.    vhaddps %xmm3, %xmm3, %xmm4     ## REGISTER dependency:  %xmm3
+/// |
+/// |    < loop carried > 
+/// |
+/// +----> 1.    vhaddps %xmm2, %xmm2, %xmm3     ## RESOURCE interference:  JFPA [ probability: 73% ]
+///
+///
+/// The algorithm that computes the critical sequence is very similar to a
+/// critical path analysis.
+/// 
+/// A dependency graph is used internally to track dependencies between nodes.
+/// Nodes of the graph represent instructions from the input assembly sequence,
+/// and edges of the graph represent data dependencies or processor resource
+/// interferences.
+///
+/// Edges are dynamically 'discovered' by observing instruction state transitions
+/// and backend pressure increase events. Edges are internally ranked based on
+/// their "criticality". A dependency is considered to be critical if it takes a
+/// long time to execute, and if it contributes to backend pressure increases.
+/// Criticality is internally measured in terms of cycles; it is computed for
+/// every edge in the graph as a function of the edge latency and the number of
+/// backend pressure increase cycles contributed by that edge.
+///
+/// At the end of simulation, costs are propagated to nodes through the edges of
+/// the graph, and the most expensive path connecting the root-set (a
+/// set of nodes with no predecessors) to a leaf node is reported as critical
+/// sequence.
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TOOLS_LLVM_MCA_BOTTLENECK_ANALYSIS_H
@@ -108,6 +161,13 @@
     return Info.ResourcePressureCycles;
   }
 
+  const char *resolveResourceName(uint64_t ResourceMask) const {
+    unsigned Index = getResourceStateIndex(ResourceMask);
+    unsigned ProcResID = ResIdx2ProcResID[Index];
+    const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
+    return PRDesc.Name;
+  }
+
   void onInstructionDispatched(unsigned IID);
   void onInstructionExecuted(unsigned IID);
 
@@ -115,14 +175,13 @@
   void handleInstructionIssuedEvent(const HWInstructionIssuedEvent &Event);
 };
 
-// An edge of a dependency graph.
-// Vertices of the graph are instructions identified by their ID.
+// A dependency edge.
 struct DependencyEdge {
   enum DependencyType { DT_INVALID, DT_REGISTER, DT_MEMORY, DT_RESOURCE };
 
   // Dependency edge descriptor.
   //
-  // It describe the dependency reason, as well as the edge cost in cycles.
+  // It specifies the dependency type, as well as the edge cost in cycles.
   struct Dependency {
     DependencyType Type;
     uint64_t ResourceOrRegID;
@@ -130,14 +189,43 @@
   };
   Dependency Dep;
 
-  // Pair of vertices connected by this edge.
   unsigned FromIID;
   unsigned ToIID;
+
+  // Used by the bottleneck analysis to compute the interference
+  // probability for processor resources.
+  unsigned Frequency;
 };
 
+// A dependency graph used by the bottleneck analysis to describe data
+// dependencies and processor resource interferences between instructions.
+//
+// There is a node (an instance of struct DGNode) for every instruction in the
+// input assembly sequence. Edges of the graph represent dependencies between
+// instructions.
+//
+// Each edge of the graph is associated with a cost value which is used
+// internally to rank dependency based on their impact on the runtime
+// performance (see field DependencyEdge::Dependency::Cost). In general, the
+// higher the cost of an edge, the higher the impact on performance.
+//
+// The cost of a dependency is a function of both the latency and the number of
+// cycles where the dependency has been seen as critical (i.e. contributing to
+// back-pressure increases).
+//
+// Loop carried dependencies are carefully expanded by the bottleneck analysis
+// to guarantee that the graph stays acyclic. To this end, extra nodes are
+// pre-allocated at construction time to describe instructions from "past and
+// future" iterations. The graph is kept acyclic mainly because it simplifies the
+// complexity of the algorithm that computes the critical sequence.
 class DependencyGraph {
   struct DGNode {
     unsigned NumPredecessors;
+    unsigned NumVisitedPredecessors;
+    uint64_t Cost;
+    unsigned Depth;
+
+    DependencyEdge CriticalPredecessor;
     SmallVector<DependencyEdge, 8> OutgoingEdges;
   };
   SmallVector<DGNode, 16> Nodes;
@@ -148,6 +236,9 @@
   void addDependency(unsigned From, unsigned To,
                      DependencyEdge::Dependency &&DE);
 
+  void initializeRootSet(SmallVectorImpl<unsigned> &RootSet) const;
+  void propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet);
+
 #ifndef NDEBUG
   void dumpDependencyEdge(raw_ostream &OS, const DependencyEdge &DE,
                           MCInstPrinter &MCIP) const;
@@ -170,6 +261,19 @@
     addDependency(From, To, {DependencyEdge::DT_RESOURCE, Mask, Cost});
   }
 
+  // Called by the bottleneck analysis at the end of simulation to propagate
+  // costs through the edges of the graph, and compute a critical path.
+  void finalizeGraph() {
+    SmallVector<unsigned, 16> RootSet;
+    initializeRootSet(RootSet);
+    propagateThroughEdges(RootSet);
+  }
+
+  // Returns a sequence of edges representing the critical sequence based on the
+  // simulated run. It assumes that the graph has already been finalized (i.e.
+  // method `finalizeGraph()` has already been called on this graph).
+  void getCriticalSequence(SmallVectorImpl<const DependencyEdge *> &Seq) const;
+
 #ifndef NDEBUG
   void dump(raw_ostream &OS, MCInstPrinter &MCIP) const;
 #endif
@@ -178,6 +282,7 @@
 /// A view that collects and prints a few performance numbers.
 class BottleneckAnalysis : public View {
   const MCSubtargetInfo &STI;
+  MCInstPrinter &MCIP;
   PressureTracker Tracker;
   DependencyGraph DG;
 
@@ -212,6 +317,7 @@
 
   // Prints a bottleneck message to OS.
   void printBottleneckHints(raw_ostream &OS) const;
+  void printCriticalSequence(raw_ostream &OS) const;
 
 public:
   BottleneckAnalysis(const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
Index: llvm/trunk/tools/llvm-mca/Views/BottleneckAnalysis.cpp
===================================================================
--- llvm/trunk/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ llvm/trunk/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MCA/Support.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
 
 namespace llvm {
 namespace mca {
@@ -161,12 +162,219 @@
     OS << " - MEMORY";
   } else {
     assert(DE.Type == DependencyEdge::DT_RESOURCE &&
-           "Unexpected unsupported dependency type!");
+           "Unsupported dependency type!");
     OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
   }
   OS << " - CYCLES: " << DE.Cost << '\n';
 }
+#endif // NDEBUG
+
+void DependencyGraph::initializeRootSet(
+    SmallVectorImpl<unsigned> &RootSet) const {
+  for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
+    const DGNode &N = Nodes[I];
+    if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
+      RootSet.emplace_back(I);
+  }
+}
+
+void DependencyGraph::propagateThroughEdges(
+    SmallVectorImpl<unsigned> &RootSet) {
+  SmallVector<unsigned, 8> ToVisit;
+
+  // A critical sequence is computed as the longest path from a node of the
+  // RootSet to a leaf node (i.e. a node with no successors).  The RootSet is
+  // composed of nodes with at least one successor, and no predecessors.
+  //
+  // Each node of the graph starts with an initial default cost of zero.  The
+  // cost of a node is a measure of criticality: the higher the cost, the bigger
+  // is the performance impact.
+  //
+  // This algorithm is very similar to a (reverse) Dijkstra.  Every iteration of
+  // the inner loop selects (i.e. visits) a node N from a set of `unvisited
+  // nodes`, and then propagates the cost of N to all its neighbors.
+  //
+  // The `unvisited nodes` set initially contains all the nodes from the
+  // RootSet.  A node N is added to the `unvisited nodes` if all its
+  // predecessors have been visited already.
+  // 
+  // For simplicity, every node tracks the number of unvisited incoming edges in
+  // field `NumVisitedPredecessors`.  When the value of that field drops to
+  // zero, then the corresponding node is added to a `ToVisit` set.
+  //
+  // At the end of every iteration of the outer loop, set `ToVisit` becomes our
+  // new `unvisited nodes` set.
+  // 
+  // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
+  // is empty. This algorithm works under the assumption that the graph is
+  // acyclic.
+  do {
+    for (unsigned IID : RootSet) {
+      const DGNode &N = Nodes[IID];
+      for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
+        unsigned ToIID = DepEdge.ToIID;
+        DGNode &To = Nodes[ToIID];
+        uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
+        // Check if this is the most expensive incoming edge seen so far.  In
+        // case, update the total cost of the destination node (ToIID), as well
+        // its field `CriticalPredecessor`.
+        if (Cost > To.Cost) {
+          To.CriticalPredecessor = DepEdge;
+          To.Cost = Cost;
+          To.Depth = N.Depth + 1;
+        }
+        To.NumVisitedPredecessors++;
+        if (To.NumVisitedPredecessors == To.NumPredecessors)
+          ToVisit.emplace_back(ToIID);
+      }
+    }
+
+    std::swap(RootSet, ToVisit);
+    ToVisit.clear();
+  } while (!RootSet.empty());
+}
+
+void DependencyGraph::getCriticalSequence(
+    SmallVectorImpl<const DependencyEdge *> &Seq) const {
+  // At this stage, nodes of the graph have been already visited, and costs have
+  // been propagated through the edges (see method `propagateThroughEdges()`).
+
+  // Identify the node N with the highest cost in the graph. By construction,
+  // that node is the last instruction of our critical sequence.
+  // Field N.Depth would tell us the total length of the sequence.
+  //
+  // To obtain the sequence of critical edges, we simply follow the chain of critical
+  // predecessors starting from node N (field DGNode::CriticalPredecessor).
+  const auto It = std::max_element(
+      Nodes.begin(), Nodes.end(),
+      [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
+  unsigned IID = std::distance(Nodes.begin(), It);
+  Seq.resize(Nodes[IID].Depth);
+  for (unsigned I = Seq.size(), E = 0; I > E; --I) {
+    const DGNode &N = Nodes[IID];
+    Seq[I - 1] = &N.CriticalPredecessor;
+    IID = N.CriticalPredecessor.FromIID;
+  }
+}
+
+static void printInstruction(formatted_raw_ostream &FOS,
+                             const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
+                             const MCInst &MCI,
+                             bool UseDifferentColor = false) {
+  std::string Instruction;
+  raw_string_ostream InstrStream(Instruction);
+
+  FOS.PadToColumn(14);
+
+  MCIP.printInst(&MCI, InstrStream, "", STI);
+  InstrStream.flush();
+
+  if (UseDifferentColor)
+    FOS.changeColor(raw_ostream::CYAN, true, false);
+  FOS << StringRef(Instruction).ltrim();
+  if (UseDifferentColor)
+    FOS.resetColor();
+}
+
+void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
+  SmallVector<const DependencyEdge *, 16> Seq;
+  DG.getCriticalSequence(Seq);
+  if (Seq.empty())
+    return;
+
+  OS << "\nCritical sequence based on the simulation:\n\n";
+
+  const DependencyEdge &FirstEdge = *Seq[0];
+  unsigned FromIID = FirstEdge.FromIID % Source.size();
+  unsigned ToIID = FirstEdge.ToIID % Source.size();
+  bool IsLoopCarried = FromIID >= ToIID;
+
+  formatted_raw_ostream FOS(OS);
+  FOS.PadToColumn(14);
+  FOS << "Instruction";
+  FOS.PadToColumn(58);
+  FOS << "Dependency Information";
+
+  bool HasColors = FOS.has_colors();
+
+  unsigned CurrentIID = 0;
+  if (IsLoopCarried) {
+    FOS << "\n +----< " << FromIID << ".";
+    printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
+    FOS << "\n |\n |    < loop carried > \n |";
+  } else {
+    while (CurrentIID < FromIID) {
+      FOS << "\n        " << CurrentIID << ".";
+      printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+      CurrentIID++;
+    }
 
+    FOS << "\n +----< " << CurrentIID << ".";
+    printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+    CurrentIID++;
+  }
+
+  for (const DependencyEdge *&DE : Seq) {
+    ToIID = DE->ToIID % Source.size();
+    unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
+
+    while (CurrentIID < LastIID) {
+      FOS << "\n |      " << CurrentIID << ".";
+      printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+      CurrentIID++;
+    }
+
+    if (CurrentIID == ToIID) {
+      FOS << "\n +----> " << ToIID << ".";
+      printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
+    } else {
+      FOS << "\n |\n |    < loop carried > \n |"
+          << "\n +----> " << ToIID << ".";
+      printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
+    }
+    FOS.PadToColumn(58);
+
+    const DependencyEdge::Dependency &Dep = DE->Dep;
+    if (HasColors)
+      FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+
+    if (Dep.Type == DependencyEdge::DT_REGISTER) {
+      FOS << "## REGISTER dependency:  ";
+      if (HasColors)
+        FOS.changeColor(raw_ostream::MAGENTA, true, false);
+      MCIP.printRegName(FOS, Dep.ResourceOrRegID);
+    } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
+      FOS << "## MEMORY dependency.";
+    } else {
+      assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
+             "Unsupported dependency type!");
+      FOS << "## RESOURCE interference:  ";
+      if (HasColors)
+        FOS.changeColor(raw_ostream::MAGENTA, true, false);
+      FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
+      if (HasColors) {
+        FOS.resetColor();
+        FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
+      }
+      FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
+          << "% ]";
+    }
+    if (HasColors)
+      FOS.resetColor();
+    ++CurrentIID;
+  }
+
+  while (CurrentIID < Source.size()) {
+    FOS << "\n        " << CurrentIID << ".";
+    printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
+    CurrentIID++;
+  }
+
+  FOS << '\n';
+  FOS.flush();
+}
+
+#ifndef NDEBUG
 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
   OS << "\nREG DEPS\n";
   for (const DGNode &Node : Nodes)
@@ -200,10 +408,11 @@
 
   if (It != Vec.end()) {
     It->Dep.Cost += Dep.Cost;
+    It->Frequency++;
     return;
   }
 
-  DependencyEdge DE = {Dep, From, To};
+  DependencyEdge DE = {Dep, From, To, 1};
   Vec.emplace_back(DE);
   NodeTo.NumPredecessors++;
 }
@@ -211,7 +420,7 @@
 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
                                        MCInstPrinter &Printer,
                                        ArrayRef<MCInst> S, unsigned NumIter)
-    : STI(sti), Tracker(STI.getSchedModel()), DG(S.size() * 3),
+    : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
       Source(S), Iterations(NumIter), TotalCycles(0),
       PressureIncreasedBecauseOfResources(false),
       PressureIncreasedBecauseOfRegisterDependencies(false),
@@ -274,38 +483,38 @@
   const Instruction &IS = *Event.IR.getInstruction();
   unsigned To = IID % Source.size();
 
-  unsigned Cycles = Tracker.getResourcePressureCycles(IID);
-  if (Cycles) {
-    uint64_t ResourceMask = IS.getCriticalResourceMask();
-    SmallVector<std::pair<unsigned, unsigned>, 4> Users;
-    while (ResourceMask) {
-      uint64_t Current = ResourceMask & (-ResourceMask);
-      Tracker.getResourceUsers(Current, Users);
-      for (const std::pair<unsigned, unsigned> &U : Users) {
-        unsigned Cost = std::min(U.second, Cycles);
-        addResourceDep(U.first % Source.size(), To, Current, Cost);
-      }
-      Users.clear();
-      ResourceMask ^= Current;
-    }
+  unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
+  uint64_t ResourceMask = IS.getCriticalResourceMask();
+  SmallVector<std::pair<unsigned, unsigned>, 4> Users;
+  while (ResourceMask) {
+    uint64_t Current = ResourceMask & (-ResourceMask);
+    Tracker.getResourceUsers(Current, Users);
+    for (const std::pair<unsigned, unsigned> &U : Users)
+      addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
+    Users.clear();
+    ResourceMask ^= Current;
   }
 
-  Cycles = Tracker.getRegisterPressureCycles(IID);
-  if (Cycles) {
-    const CriticalDependency &RegDep = IS.getCriticalRegDep();
+  const CriticalDependency &RegDep = IS.getCriticalRegDep();
+  if (RegDep.Cycles) {
+    Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
     unsigned From = RegDep.IID % Source.size();
     addRegisterDep(From, To, RegDep.RegID, Cycles);
   }
 
-  Cycles = Tracker.getMemoryPressureCycles(IID);
-  if (Cycles) {
-    const CriticalDependency &MemDep = IS.getCriticalMemDep();
+  const CriticalDependency &MemDep = IS.getCriticalMemDep();
+  if (MemDep.Cycles) {
+    Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
     unsigned From = MemDep.IID % Source.size();
     addMemoryDep(From, To, Cycles);
   }
 
   Tracker.handleInstructionIssuedEvent(
       static_cast<const HWInstructionIssuedEvent &>(Event));
+
+  // Check if this is the last simulated instruction.
+  if (IID == ((Iterations * Source.size()) - 1))
+    DG.finalizeGraph();
 }
 
 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
@@ -356,7 +565,7 @@
 
 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
   if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
-    OS << "\nNo resource or data dependency bottlenecks discovered.\n";
+    OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
     return;
   }
 
@@ -370,7 +579,7 @@
   double MemDepPressurePerCycle =
       (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
 
-  OS << "\nCycles with backend pressure increase [ "
+  OS << "\n\nCycles with backend pressure increase [ "
      << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
 
   OS << "\nThroughput Bottlenecks: "
@@ -399,7 +608,7 @@
      << "% ]";
   OS << "\n  - Memory Dependencies   [ "
      << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
-     << "% ]\n\n";
+     << "% ]\n";
 }
 
 void BottleneckAnalysis::printView(raw_ostream &OS) const {
@@ -408,6 +617,7 @@
   printBottleneckHints(TempStream);
   TempStream.flush();
   OS << Buffer;
+  printCriticalSequence(OS);
 }
 
 } // namespace mca.