diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected
--- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected
+++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected
@@ -4,14 +4,6 @@
 void __test_offloading_42_abcdef_bar_l123();
 void use(int);
 
-void foo(int a)
-{
-    #pragma omp target
-        use(a);
-
-    __test_offloading_42_abcdef_bar_l123();
-    int somevar_abc123_;
-}
 // CHECK-LABEL: @foo(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
@@ -26,7 +18,14 @@
 // CHECK-NEXT:    call void (...) @{{__test_offloading_[a-z0-9]+_[a-z0-9]+_bar_l[0-9]+}}()
 // CHECK-NEXT:    ret void
 //
-//
+void foo(int a)
+{
+    #pragma omp target
+        use(a);
+
+    __test_offloading_42_abcdef_bar_l123();
+    int somevar_abc123_;
+}
 // CHECK-LABEL: @{{__omp_offloading_[a-z0-9]+_[a-z0-9]+_foo_l[0-9]+}}(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected
--- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected
+++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected
@@ -9,6 +9,43 @@
 
 void foo(void);
 
+// OMP-LABEL: @main(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// OMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// OMP-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// OMP-NEXT:    store i32 0, i32* [[I]], align 4
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void @foo()
+// OMP-NEXT:    ret i32 0
+//
+// NOOMP-LABEL: @main(
+// NOOMP-NEXT:  entry:
+// NOOMP-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
+// NOOMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NOOMP-NEXT:    store i32 0, i32* [[RETVAL]], align 4
+// NOOMP-NEXT:    store i32 0, i32* [[I]], align 4
+// NOOMP-NEXT:    store i32 0, i32* [[I]], align 4
+// NOOMP-NEXT:    br label [[FOR_COND:%.*]]
+// NOOMP:       for.cond:
+// NOOMP-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I]], align 4
+// NOOMP-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 33554432
+// NOOMP-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// NOOMP:       for.body:
+// NOOMP-NEXT:    [[TMP1:%.*]] = load i32, i32* [[I]], align 4
+// NOOMP-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// NOOMP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], [33554432 x double]* @A, i64 0, i64 [[IDXPROM]]
+// NOOMP-NEXT:    store double 0.000000e+00, double* [[ARRAYIDX]], align 8
+// NOOMP-NEXT:    br label [[FOR_INC:%.*]]
+// NOOMP:       for.inc:
+// NOOMP-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I]], align 4
+// NOOMP-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP2]], 1
+// NOOMP-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+// NOOMP-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
+// NOOMP:       for.end:
+// NOOMP-NEXT:    call void @foo()
+// NOOMP-NEXT:    ret i32 0
+//
 int main() {
   int i = 0;
 
@@ -22,25 +59,6 @@
   return 0;
 }
 
-void foo(void) {
-  int i = 0;
-
-#pragma omp parallel for
-  for (i = 0; i < size; ++i) {
-    A[i] = 1.0;
-  }
-}
-// OMP-LABEL: @main(
-// OMP-NEXT:  entry:
-// OMP-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
-// OMP-NEXT:    [[I:%.*]] = alloca i32, align 4
-// OMP-NEXT:    store i32 0, i32* [[RETVAL]], align 4
-// OMP-NEXT:    store i32 0, i32* [[I]], align 4
-// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
-// OMP-NEXT:    call void @foo()
-// OMP-NEXT:    ret i32 0
-//
-//
 // OMP-LABEL: @.omp_outlined.(
 // OMP-NEXT:  entry:
 // OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -103,7 +121,6 @@
 // OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
 // OMP-NEXT:    ret void
 //
-//
 // OMP-LABEL: @foo(
 // OMP-NEXT:  entry:
 // OMP-NEXT:    [[I:%.*]] = alloca i32, align 4
@@ -111,7 +128,38 @@
 // OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
 // OMP-NEXT:    ret void
 //
+// NOOMP-LABEL: @foo(
+// NOOMP-NEXT:  entry:
+// NOOMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NOOMP-NEXT:    store i32 0, i32* [[I]], align 4
+// NOOMP-NEXT:    store i32 0, i32* [[I]], align 4
+// NOOMP-NEXT:    br label [[FOR_COND:%.*]]
+// NOOMP:       for.cond:
+// NOOMP-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I]], align 4
+// NOOMP-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 33554432
+// NOOMP-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// NOOMP:       for.body:
+// NOOMP-NEXT:    [[TMP1:%.*]] = load i32, i32* [[I]], align 4
+// NOOMP-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// NOOMP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], [33554432 x double]* @A, i64 0, i64 [[IDXPROM]]
+// NOOMP-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX]], align 8
+// NOOMP-NEXT:    br label [[FOR_INC:%.*]]
+// NOOMP:       for.inc:
+// NOOMP-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I]], align 4
+// NOOMP-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP2]], 1
+// NOOMP-NEXT:    store i32 [[INC]], i32* [[I]], align 4
+// NOOMP-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
+// NOOMP:       for.end:
+// NOOMP-NEXT:    ret void
 //
+void foo(void) {
+  int i = 0;
+
+#pragma omp parallel for
+  for (i = 0; i < size; ++i) {
+    A[i] = 1.0;
+  }
+}
 // OMP-LABEL: @.omp_outlined..1(
 // OMP-NEXT:  entry:
 // OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -174,56 +222,3 @@
 // OMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]])
 // OMP-NEXT:    ret void
 //
-//
-// NOOMP-LABEL: @main(
-// NOOMP-NEXT:  entry:
-// NOOMP-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
-// NOOMP-NEXT:    [[I:%.*]] = alloca i32, align 4
-// NOOMP-NEXT:    store i32 0, i32* [[RETVAL]], align 4
-// NOOMP-NEXT:    store i32 0, i32* [[I]], align 4
-// NOOMP-NEXT:    store i32 0, i32* [[I]], align 4
-// NOOMP-NEXT:    br label [[FOR_COND:%.*]]
-// NOOMP:       for.cond:
-// NOOMP-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I]], align 4
-// NOOMP-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 33554432
-// NOOMP-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// NOOMP:       for.body:
-// NOOMP-NEXT:    [[TMP1:%.*]] = load i32, i32* [[I]], align 4
-// NOOMP-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// NOOMP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], [33554432 x double]* @A, i64 0, i64 [[IDXPROM]]
-// NOOMP-NEXT:    store double 0.000000e+00, double* [[ARRAYIDX]], align 8
-// NOOMP-NEXT:    br label [[FOR_INC:%.*]]
-// NOOMP:       for.inc:
-// NOOMP-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I]], align 4
-// NOOMP-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// NOOMP-NEXT:    store i32 [[INC]], i32* [[I]], align 4
-// NOOMP-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// NOOMP:       for.end:
-// NOOMP-NEXT:    call void @foo()
-// NOOMP-NEXT:    ret i32 0
-//
-//
-// NOOMP-LABEL: @foo(
-// NOOMP-NEXT:  entry:
-// NOOMP-NEXT:    [[I:%.*]] = alloca i32, align 4
-// NOOMP-NEXT:    store i32 0, i32* [[I]], align 4
-// NOOMP-NEXT:    store i32 0, i32* [[I]], align 4
-// NOOMP-NEXT:    br label [[FOR_COND:%.*]]
-// NOOMP:       for.cond:
-// NOOMP-NEXT:    [[TMP0:%.*]] = load i32, i32* [[I]], align 4
-// NOOMP-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 33554432
-// NOOMP-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-// NOOMP:       for.body:
-// NOOMP-NEXT:    [[TMP1:%.*]] = load i32, i32* [[I]], align 4
-// NOOMP-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// NOOMP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], [33554432 x double]* @A, i64 0, i64 [[IDXPROM]]
-// NOOMP-NEXT:    store double 1.000000e+00, double* [[ARRAYIDX]], align 8
-// NOOMP-NEXT:    br label [[FOR_INC:%.*]]
-// NOOMP:       for.inc:
-// NOOMP-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I]], align 4
-// NOOMP-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP2]], 1
-// NOOMP-NEXT:    store i32 [[INC]], i32* [[I]], align 4
-// NOOMP-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
-// NOOMP:       for.end:
-// NOOMP-NEXT:    ret void
-//
diff --git a/clang/test/utils/update_cc_test_checks/Inputs/lots-of-generated-funcs.c b/clang/test/utils/update_cc_test_checks/Inputs/lots-of-generated-funcs.c
new file mode 100644
--- /dev/null
+++ b/clang/test/utils/update_cc_test_checks/Inputs/lots-of-generated-funcs.c
@@ -0,0 +1,39 @@
+// Check that the CHECK lines are generated for clang-generated functions
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp %s -emit-llvm -o - | FileCheck --check-prefix=OMP %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -o - | FileCheck --check-prefix=NOOMP %s
+
+void t0() {
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+}
+
+void t1() {
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+}
+
+void t2() {
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+}
diff --git a/clang/test/utils/update_cc_test_checks/Inputs/lots-of-generated-funcs.c.lots-of-generated.expected b/clang/test/utils/update_cc_test_checks/Inputs/lots-of-generated-funcs.c.lots-of-generated.expected
new file mode 100644
--- /dev/null
+++ b/clang/test/utils/update_cc_test_checks/Inputs/lots-of-generated-funcs.c.lots-of-generated.expected
@@ -0,0 +1,145 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs
+// Check that the CHECK lines are generated for clang-generated functions
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp %s -emit-llvm -o - | FileCheck --check-prefix=OMP %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -o - | FileCheck --check-prefix=NOOMP %s
+
+// OMP-LABEL: @t0(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    ret void
+//
+// NOOMP-LABEL: @t0(
+// NOOMP-NEXT:  entry:
+// NOOMP-NEXT:    ret void
+//
+void t0() {
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+}
+
+// OMP-LABEL: @.omp_outlined.(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    ret void
+//
+// OMP-LABEL: @.omp_outlined..1(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    ret void
+//
+// OMP-LABEL: @.omp_outlined..2(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    ret void
+//
+// OMP-LABEL: @t1(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    ret void
+//
+// NOOMP-LABEL: @t1(
+// NOOMP-NEXT:  entry:
+// NOOMP-NEXT:    ret void
+//
+void t1() {
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+}
+
+// OMP-LABEL: @.omp_outlined..3(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    ret void
+//
+// OMP-LABEL: @.omp_outlined..4(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    ret void
+//
+// OMP-LABEL: @.omp_outlined..5(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    ret void
+//
+// OMP-LABEL: @t2(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    ret void
+//
+// NOOMP-LABEL: @t2(
+// NOOMP-NEXT:  entry:
+// NOOMP-NEXT:    ret void
+//
+void t2() {
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+}
+// OMP-LABEL: @.omp_outlined..6(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    ret void
+//
+// OMP-LABEL: @.omp_outlined..7(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    ret void
+//
+// OMP-LABEL: @.omp_outlined..8(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// OMP-NEXT:    store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP-NEXT:    store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// OMP-NEXT:    ret void
+//
diff --git a/clang/test/utils/update_cc_test_checks/Inputs/lots-of-generated-funcs.c.no-lots-of-generated.expected b/clang/test/utils/update_cc_test_checks/Inputs/lots-of-generated-funcs.c.no-lots-of-generated.expected
new file mode 100644
--- /dev/null
+++ b/clang/test/utils/update_cc_test_checks/Inputs/lots-of-generated-funcs.c.no-lots-of-generated.expected
@@ -0,0 +1,73 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// Check that the CHECK lines are generated for clang-generated functions
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp %s -emit-llvm -o - | FileCheck --check-prefix=OMP %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -o - | FileCheck --check-prefix=NOOMP %s
+
+// OMP-LABEL: @t0(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    ret void
+//
+// NOOMP-LABEL: @t0(
+// NOOMP-NEXT:  entry:
+// NOOMP-NEXT:    ret void
+//
+void t0() {
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+}
+
+// OMP-LABEL: @t1(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    ret void
+//
+// NOOMP-LABEL: @t1(
+// NOOMP-NEXT:  entry:
+// NOOMP-NEXT:    ret void
+//
+void t1() {
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+}
+
+// OMP-LABEL: @t2(
+// OMP-NEXT:  entry:
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*))
+// OMP-NEXT:    ret void
+//
+// NOOMP-LABEL: @t2(
+// NOOMP-NEXT:  entry:
+// NOOMP-NEXT:    ret void
+//
+void t2() {
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+  #pragma omp parallel
+  {
+  }
+}
diff --git a/clang/test/utils/update_cc_test_checks/lots-of-generated-funcs.test b/clang/test/utils/update_cc_test_checks/lots-of-generated-funcs.test
new file mode 100644
--- /dev/null
+++ b/clang/test/utils/update_cc_test_checks/lots-of-generated-funcs.test
@@ -0,0 +1,12 @@
+## Test that CHECK lines are generated for clang-generated functions
+
+# RUN: cp %S/Inputs/lots-of-generated-funcs.c %t-lots-of-generated.c && %update_cc_test_checks --include-generated-funcs %t-lots-of-generated.c
+# RUN: diff -u %S/Inputs/lots-of-generated-funcs.c.lots-of-generated.expected %t-lots-of-generated.c
+# RUN: cp %S/Inputs/lots-of-generated-funcs.c %t-no-lots-of-generated.c && %update_cc_test_checks %t-no-lots-of-generated.c
+# RUN: diff -u %S/Inputs/lots-of-generated-funcs.c.no-lots-of-generated.expected %t-no-lots-of-generated.c
+
+## Check that re-running update_cc_test_checks doesn't change the output
+# RUN: %update_cc_test_checks --include-generated-funcs %t-lots-of-generated.c
+# RUN: diff -u %S/Inputs/lots-of-generated-funcs.c.lots-of-generated.expected %t-lots-of-generated.c
+# RUN: %update_cc_test_checks %t-no-lots-of-generated.c
+# RUN: diff -u %S/Inputs/lots-of-generated-funcs.c.no-lots-of-generated.expected %t-no-lots-of-generated.c
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -290,6 +290,24 @@
   def __str__(self):
     return self.scrub
 
+# Try to put all functions into a total order, this might not work if two run
+# lines generated the functions in a different order.
+def get_total_function_order(prefix_list, func_order):
+  total_order = []
+  for prefix_item in prefix_list:
+    prefixes = prefix_item[0]
+    for prefix in prefixes:
+        for func in func_order[prefix]:
+            total_idx = 0
+            if func in total_order:
+                idx = total_order.index(func)
+                if idx < total_idx:
+                    return None
+                total_idx = idx + 1
+            else:
+                total_order.append(func)
+  return total_order
+
 class FunctionTestBuilder:
   def __init__(self, run_list, flags, scrubber_args):
     self._verbose = flags.verbose
@@ -349,28 +367,8 @@
         print('Processing function: ' + func, file=sys.stderr)
         for l in scrubbed_body.splitlines():
           print('  ' + l, file=sys.stderr)
-      for prefix in prefixes:
-        if func in self._func_dict[prefix]:
-          if (self._func_dict[prefix][func] is None or
-              str(self._func_dict[prefix][func]) != scrubbed_body or
-              self._func_dict[prefix][func].args_and_sig != args_and_sig or
-                  self._func_dict[prefix][func].attrs != attrs):
-            if (self._func_dict[prefix][func] is not None and
-                self._func_dict[prefix][func].is_same_except_arg_names(
-                scrubbed_extra,
-                args_and_sig,
-                attrs)):
-              self._func_dict[prefix][func].scrub = scrubbed_extra
-              self._func_dict[prefix][func].args_and_sig = args_and_sig
-              continue
-            else:
-              # This means a previous RUN line produced a body for this function
-              # that is different from the one produced by this current RUN line,
-              # so the body can't be common accross RUN lines. We use None to
-              # indicate that.
-              self._func_dict[prefix][func] = None
-              continue
 
+      for prefix in prefixes:
         # Replace function names matching the regex.
         for regex in self._replace_value_regex:
           # Pattern that matches capture groups in the regex in leftmost order.
@@ -395,6 +393,27 @@
             # capture groups set.
             scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}', scrubbed_body)
 
+        if func in self._func_dict[prefix]:
+          if (self._func_dict[prefix][func] is None or
+              str(self._func_dict[prefix][func]) != scrubbed_body or
+              self._func_dict[prefix][func].args_and_sig != args_and_sig or
+                  self._func_dict[prefix][func].attrs != attrs):
+            if (self._func_dict[prefix][func] is not None and
+                self._func_dict[prefix][func].is_same_except_arg_names(
+                scrubbed_extra,
+                args_and_sig,
+                attrs)):
+              self._func_dict[prefix][func].scrub = scrubbed_extra
+              self._func_dict[prefix][func].args_and_sig = args_and_sig
+              continue
+            else:
+              # This means a previous RUN line produced a body for this function
+              # that is different from the one produced by this current RUN line,
+              # so the body can't be common across RUN lines. We use None to
+              # indicate that.
+              self._func_dict[prefix][func] = None
+              continue
+
         self._func_dict[prefix][func] = function_body(
             scrubbed_body, scrubbed_extra, args_and_sig, attrs)
         self._func_order[prefix].append(func)
diff --git a/llvm/utils/update_cc_test_checks.py b/llvm/utils/update_cc_test_checks.py
--- a/llvm/utils/update_cc_test_checks.py
+++ b/llvm/utils/update_cc_test_checks.py
@@ -305,8 +305,9 @@
                                                       lambda args: ti.args.include_generated_funcs,
                                                       '--include-generated-funcs',
                                                       True)
+    total_function_order = common.get_total_function_order(filecheck_run_list, builder.func_order())
 
-    if include_generated_funcs:
+    if include_generated_funcs and not total_function_order:
       # Generate the appropriate checks for each function.  We need to emit
       # these in the order according to the generated output so that CHECK-LABEL
       # works properly.  func_order provides that.
@@ -337,6 +338,62 @@
                                check_generator(my_output_lines,
                                                prefixes, func))
     else:
+      trailing_functions = []
+      # If we might have generated functions we need to place them in the right position now,
+      # trailing ones are kept separate though.
+      if include_generated_funcs:
+        seen_mangled = set()
+        total_order_idx = 0
+        for line in line2spell_and_mangled_list.keys():
+          to_be_added = []
+          for spell, mangled in line2spell_and_mangled_list[line]:
+            if mangled not in total_function_order or mangled in seen_mangled:
+              continue
+            seen_mangled.add(mangled)
+
+            # Figure out where this existing function is in the total order. All functions we
+            # skipped are inserted before this one.
+            index = total_function_order.index(mangled)
+            while total_order_idx < index:
+              to_be_added.append(total_function_order[total_order_idx])
+              total_order_idx += 1
+            # Account for the function itself (spell, mangled)
+            total_order_idx += 1
+          # Reverse the order to match total order again before we insert the functions
+          # in the current line.
+          to_be_added.reverse()
+          for tba in to_be_added:
+            line2spell_and_mangled_list[line].insert(0, ('', tba))
+
+        # All functions we have not handled yet are added to the end.
+        while total_order_idx < len(total_function_order):
+          func = total_function_order[total_order_idx]
+          trailing_functions.append(func)
+          total_order_idx += 1
+
+      def handle_functions_on_line(output_lines, line, args, spell, mangled, added, include_line):
+        # One line may contain multiple function declarations.
+        # Skip if the mangled name has been added before.
+        # The line number may come from an included file,
+        # we simply require the spelling name to appear on the line
+        # to exclude functions from other files.
+        if mangled in added or (spell and spell not in line):
+          return
+        if args.functions is None or any(re.search(regex, spell) for regex in args.functions):
+          last_line = output_lines[-1].strip()
+          while last_line == '//':
+            # Remove the comment line since we will generate a new  comment
+            # line as part of common.add_ir_checks()
+            output_lines.pop()
+            last_line = output_lines[-1].strip()
+          if added:
+            output_lines.append('//')
+          added.add(mangled)
+          common.add_ir_checks(output_lines, '//', filecheck_run_list, func_dict, mangled,
+                               False, args.function_signature, global_vars_seen_dict)
+          if line.rstrip('\n') == '//':
+            include_line = False
+
       # Normal mode.  Put checks before each source function.
       for line_info in ti.iterlines(output_lines):
         idx = line_info.line_number
@@ -349,31 +406,17 @@
         if idx in line2spell_and_mangled_list:
           added = set()
           for spell, mangled in line2spell_and_mangled_list[idx]:
-            # One line may contain multiple function declarations.
-            # Skip if the mangled name has been added before.
-            # The line number may come from an included file,
-            # we simply require the spelling name to appear on the line
-            # to exclude functions from other files.
-            if mangled in added or spell not in line:
-              continue
-            if args.functions is None or any(re.search(regex, spell) for regex in args.functions):
-              last_line = output_lines[-1].strip()
-              while last_line == '//':
-                # Remove the comment line since we will generate a new  comment
-                # line as part of common.add_ir_checks()
-                output_lines.pop()
-                last_line = output_lines[-1].strip()
-              if added:
-                output_lines.append('//')
-              added.add(mangled)
-              common.add_ir_checks(output_lines, '//', filecheck_run_list, func_dict, mangled,
-                                   False, args.function_signature, global_vars_seen_dict)
-              if line.rstrip('\n') == '//':
-                include_line = False
+            handle_functions_on_line(output_lines, line, args, spell, mangled, added, include_line)
 
         if include_line:
           output_lines.append(line.rstrip('\n'))
 
+      # After we inserted check lines in the original program we append all check lines for functions
+      # that were generated after the existing ones.
+      added = set()
+      for func in trailing_functions:
+          handle_functions_on_line(output_lines, func, args, func, func, added, include_line)
+
     common.debug('Writing %d lines to %s...' % (len(output_lines), ti.path))
     with open(ti.path, 'wb') as f:
       f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
diff --git a/llvm/utils/update_test_checks.py b/llvm/utils/update_test_checks.py
--- a/llvm/utils/update_test_checks.py
+++ b/llvm/utils/update_test_checks.py
@@ -138,6 +138,9 @@
                                                       '--include-generated-funcs',
                                                       True)
 
+    # TODO: use common.get_total_function_order to avoid adding new functions at the end but
+    #       instead place them in order between existing ones. See the CC script.
+
     if include_generated_funcs:
       # Generate the appropriate checks for each function.  We need to emit
       # these in the order according to the generated output so that CHECK-LABEL