diff --git a/llvm/test/CodeGen/AMDGPU/fexp.ll b/llvm/test/CodeGen/AMDGPU/fexp.ll
--- a/llvm/test/CodeGen/AMDGPU/fexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fexp.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
 ;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
 ;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
diff --git a/llvm/test/CodeGen/AMDGPU/load-lo16.ll b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
--- a/llvm/test/CodeGen/AMDGPU/load-lo16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-lo16.ll
@@ -994,13 +994,14 @@
 ; GFX803:       ; %bb.0: ; %entry
 ; GFX803-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX803-NEXT:    flat_load_ushort v0, v[0:1]
-; FIXME: and should be removable
 ; GFX803-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
 ; GFX803-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX803-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX803-NEXT:    flat_store_dword v[0:1], v0
 ; GFX803-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX803-NEXT:    s_setpc_b64 s[30:31]
+
+; FIXME: the and above should be removable
 entry:
   %reg.bc = bitcast i32 %reg to <2 x half>
   %load = load half, half* %in
diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
--- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
@@ -5,7 +5,7 @@
 define hidden <4 x float> @split_v4f32_arg(<4 x float> returned %arg) local_unnamed_addr #0 !dbg !7 {
 ; GCN-LABEL: split_v4f32_arg:
 ; GCN:       .Lfunc_begin0:
-; GCN-NEXT:    .file 0
+; GCN-NEXT:    .file 0 "/tmp/dbg.cl" md5 0x0f834f91e91489a5ff6308040ddbd175
 ; GCN-NEXT:    .loc 0 3 0 ; /tmp/dbg.cl:3:0
 ; GCN-NEXT:  ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900  < %s | FileCheck -check-prefix=GFX9 %s
 
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -13,18 +13,10 @@
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; CHECK-NEXT:    s_mov_b32 s2, -1
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-
-; FIXME: The change related to the fact that
-; DetectDeadLanes pass hit "Copy across incompatible class" SGPR -> VGPR in analysis
-; and hence it cannot derive the fact that the vector element is unused.
-; Such a copies appear because the float4 vectors and their elements in the test are uniform
-; but the PHI node in "ife" block is divergent because of the CF dependency (divergent branch in bb0)
-
 ; CHECK-NEXT:    v_mov_b32_e32 v0, s4
 ; CHECK-NEXT:    v_mov_b32_e32 v1, s5
 ; CHECK-NEXT:    v_mov_b32_e32 v2, s6
 ; CHECK-NEXT:    v_mov_b32_e32 v3, s7
-
 ; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], vcc
 ; CHECK-NEXT:    ; mask branch BB0_2
 ; CHECK-NEXT:  BB0_1: ; %ift
@@ -38,6 +30,12 @@
 ; CHECK-NEXT:    s_mov_b32 s3, 0xf000
 ; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; CHECK-NEXT:    s_endpgm
+
+; FIXME: The change related to the fact that
+; DetectDeadLanes pass hit "Copy across incompatible class" SGPR -> VGPR in analysis
+; and hence it cannot derive the fact that the vector element in the "ift" block is unused.
+; Such a copies appear because the float4 vectors and their elements in the test are uniform
+; but the PHI node in "ife" block is divergent because of the CF dependency (divergent branch in bb0)
 entry:
   %v0 = insertelement <4 x float> undef, float %a0, i32 0
   %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0