diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -16315,6 +16315,21 @@
   return true;
 }
 
+static BinaryOperatorKind
+getRelatedCompoundReductionOp(BinaryOperatorKind BOK) {
+  if (BOK == BO_Add)
+    return BO_AddAssign;
+  if (BOK == BO_Mul)
+    return BO_MulAssign;
+  if (BOK == BO_And)
+    return BO_AndAssign;
+  if (BOK == BO_Or)
+    return BO_OrAssign;
+  if (BOK == BO_Xor)
+    return BO_XorAssign;
+  return BOK;
+}
+
 static bool actOnOMPReductionKindClause(
     Sema &S, DSAStackTy *Stack, OpenMPClauseKind ClauseKind,
     ArrayRef<Expr *> VarList, SourceLocation StartLoc, SourceLocation LParenLoc,
@@ -16840,25 +16855,35 @@
           CallExpr::Create(Context, OVE, Args, Context.VoidTy, VK_RValue, ELoc,
                            S.CurFPFeatureOverrides());
     } else {
-      ReductionOp = S.BuildBinOp(
-          Stack->getCurScope(), ReductionId.getBeginLoc(), BOK, LHSDRE, RHSDRE);
-      if (ReductionOp.isUsable()) {
-        if (BOK != BO_LT && BOK != BO_GT) {
-          ReductionOp =
-              S.BuildBinOp(Stack->getCurScope(), ReductionId.getBeginLoc(),
-                           BO_Assign, LHSDRE, ReductionOp.get());
-        } else {
-          auto *ConditionalOp = new (Context)
-              ConditionalOperator(ReductionOp.get(), ELoc, LHSDRE, ELoc, RHSDRE,
-                                  Type, VK_LValue, OK_Ordinary);
-          ReductionOp =
-              S.BuildBinOp(Stack->getCurScope(), ReductionId.getBeginLoc(),
-                           BO_Assign, LHSDRE, ConditionalOp);
+      BinaryOperatorKind CombBOK = getRelatedCompoundReductionOp(BOK);
+      if (Type->isRecordType() && CombBOK != BOK) {
+        Sema::TentativeAnalysisScope Trap(S);
+        ReductionOp =
+            S.BuildBinOp(Stack->getCurScope(), ReductionId.getBeginLoc(),
+                         CombBOK, LHSDRE, RHSDRE);
+      }
+      if (!ReductionOp.isUsable()) {
+        ReductionOp =
+            S.BuildBinOp(Stack->getCurScope(), ReductionId.getBeginLoc(), BOK,
+                         LHSDRE, RHSDRE);
+        if (ReductionOp.isUsable()) {
+          if (BOK != BO_LT && BOK != BO_GT) {
+            ReductionOp =
+                S.BuildBinOp(Stack->getCurScope(), ReductionId.getBeginLoc(),
+                             BO_Assign, LHSDRE, ReductionOp.get());
+          } else {
+            auto *ConditionalOp = new (Context)
+                ConditionalOperator(ReductionOp.get(), ELoc, LHSDRE, ELoc,
+                                    RHSDRE, Type, VK_LValue, OK_Ordinary);
+            ReductionOp =
+                S.BuildBinOp(Stack->getCurScope(), ReductionId.getBeginLoc(),
+                             BO_Assign, LHSDRE, ConditionalOp);
+          }
         }
-        if (ReductionOp.isUsable())
-          ReductionOp = S.ActOnFinishFullExpr(ReductionOp.get(),
-                                              /*DiscardedValue*/ false);
       }
+      if (ReductionOp.isUsable())
+        ReductionOp = S.ActOnFinishFullExpr(ReductionOp.get(),
+                                            /*DiscardedValue*/ false);
       if (!ReductionOp.isUsable())
         continue;
     }
diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_messages.cpp
--- a/clang/test/OpenMP/distribute_parallel_for_reduction_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_messages.cpp
@@ -404,7 +404,7 @@
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} expected-warning {{Type 'S4' is not trivially copyable and not guaranteed to be mapped correctly}} expected-warning {{Type 'S5' is not trivially copyable and not guaranteed to be mapped correctly}}
+#pragma omp distribute parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} expected-warning {{Type 'S4' is not trivially copyable and not guaranteed to be mapped correctly}} expected-warning {{Type 'S5' is not trivially copyable and not guaranteed to be mapped correctly}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp target
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_reduction_messages.cpp
@@ -403,7 +403,7 @@
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp distribute parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp target
diff --git a/clang/test/OpenMP/distribute_simd_reduction_messages.cpp b/clang/test/OpenMP/distribute_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/distribute_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/distribute_simd_reduction_messages.cpp
@@ -409,7 +409,7 @@
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} expected-warning {{Type 'S4' is not trivially copyable and not guaranteed to be mapped correctly}} expected-warning {{Type 'S5' is not trivially copyable and not guaranteed to be mapped correctly}}}
+#pragma omp distribute simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}} expected-warning {{Type 'S4' is not trivially copyable and not guaranteed to be mapped correctly}} expected-warning {{Type 'S5' is not trivially copyable and not guaranteed to be mapped correctly}}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp target
diff --git a/clang/test/OpenMP/for_reduction_messages.cpp b/clang/test/OpenMP/for_reduction_messages.cpp
--- a/clang/test/OpenMP/for_reduction_messages.cpp
+++ b/clang/test/OpenMP/for_reduction_messages.cpp
@@ -354,7 +354,7 @@
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
diff --git a/clang/test/OpenMP/for_simd_reduction_messages.cpp b/clang/test/OpenMP/for_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/for_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/for_simd_reduction_messages.cpp
@@ -345,7 +345,7 @@
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_messages.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_messages.cpp
--- a/clang/test/OpenMP/master_taskloop_in_reduction_messages.cpp
+++ b/clang/test/OpenMP/master_taskloop_in_reduction_messages.cpp
@@ -344,7 +344,7 @@
 #pragma omp master taskloop in_reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be in_reduction}}
   for (int i = 0; i < 10; ++i)
   foo();
-#pragma omp master taskloop in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp master taskloop in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
   foo();
 #pragma omp taskgroup task_reduction(+:k)
diff --git a/clang/test/OpenMP/master_taskloop_reduction_messages.cpp b/clang/test/OpenMP/master_taskloop_reduction_messages.cpp
--- a/clang/test/OpenMP/master_taskloop_reduction_messages.cpp
+++ b/clang/test/OpenMP/master_taskloop_reduction_messages.cpp
@@ -312,7 +312,7 @@
 #pragma omp master taskloop reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp master taskloop reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp master taskloop reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp master taskloop reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_messages.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_messages.cpp
--- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_messages.cpp
+++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_messages.cpp
@@ -344,7 +344,7 @@
 #pragma omp master taskloop simd in_reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be in_reduction}}
   for (int i = 0; i < 10; ++i)
   foo();
-#pragma omp master taskloop simd in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp master taskloop simd in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
   foo();
 #pragma omp taskgroup task_reduction(+:k)
diff --git a/clang/test/OpenMP/master_taskloop_simd_reduction_messages.cpp b/clang/test/OpenMP/master_taskloop_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/master_taskloop_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/master_taskloop_simd_reduction_messages.cpp
@@ -312,7 +312,7 @@
 #pragma omp master taskloop simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp master taskloop simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp master taskloop simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp master taskloop simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
@@ -195,7 +195,7 @@
 // CHECK1-NEXT:    [[TMP20:%.*]] = bitcast float* [[REF_TMP2]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]]
 // CHECK1-NEXT:    store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA16]]
-// CHECK1-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR8:[0-9]+]]
+// CHECK1-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR7:[0-9]+]]
 // CHECK1-NEXT:    [[TMP21:%.*]] = bitcast float* [[REF_TMP2]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP21]]) #[[ATTR5]]
 // CHECK1-NEXT:    [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8*
@@ -259,7 +259,7 @@
 // CHECK1-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8
-// CHECK1-NEXT:    call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR8]]
+// CHECK1-NEXT:    call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR7]]
 // CHECK1-NEXT:    ret void
 //
 //
@@ -289,7 +289,6 @@
 // CHECK1-NEXT:    [[REF_TMP15:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[REF_TMP16:%.*]] = alloca float, align 4
 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK1-NEXT:    [[REF_TMP21:%.*]] = alloca %"class.std::complex", align 4
 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]]
@@ -350,7 +349,7 @@
 // CHECK1-NEXT:    [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]]
 // CHECK1-NEXT:    store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA16]]
-// CHECK1-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR8]]
+// CHECK1-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR7]]
 // CHECK1-NEXT:    [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8*
@@ -412,8 +411,8 @@
 // CHECK1-NEXT:    [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]]
 // CHECK1-NEXT:    [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float
 // CHECK1-NEXT:    store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA16]]
-// CHECK1-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR8]]
-// CHECK1-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR8]]
+// CHECK1-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR7]]
+// CHECK1-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR7]]
 // CHECK1-NEXT:    [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]]
 // CHECK1-NEXT:    [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8*
@@ -454,45 +453,32 @@
 // CHECK1-NEXT:    [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1
 // CHECK1-NEXT:    br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
 // CHECK1:       .omp.reduction.then:
-// CHECK1-NEXT:    [[TMP62:%.*]] = bitcast %"class.std::complex"* [[REF_TMP21]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP62]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[CALL22:%.*]] = call %"class.std::complex" @_ZStplIfESt7complexIT_ERKS2_S4_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR8]]
-// CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[REF_TMP21]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP64:%.*]] = extractvalue %"class.std::complex" [[CALL22]], 0
-// CHECK1-NEXT:    store float [[TMP64]], float* [[TMP63]], align 4
-// CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[REF_TMP21]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP66:%.*]] = extractvalue %"class.std::complex" [[CALL22]], 1
-// CHECK1-NEXT:    store float [[TMP66]], float* [[TMP65]], align 4
-// CHECK1-NEXT:    [[TMP67:%.*]] = bitcast %"class.std::complex"* [[TMP2]] to i8*
-// CHECK1-NEXT:    [[TMP68:%.*]] = bitcast %"class.std::complex"* [[REF_TMP21]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP67]], i8* align 4 [[TMP68]], i64 8, i1 false), !tbaa.struct !18
-// CHECK1-NEXT:    [[TMP69:%.*]] = bitcast %"class.std::complex"* [[REF_TMP21]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR7]]
 // CHECK1-NEXT:    call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]])
 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DONE]]
 // CHECK1:       .omp.reduction.done:
-// CHECK1-NEXT:    [[TMP70:%.*]] = bitcast i32* [[I7]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP71:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP71]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP72:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP72]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP73:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP73]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP74:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP74]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP75:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP75]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP62:%.*]] = bitcast i32* [[I7]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]]
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
-// CHECK1-NEXT:    [[TMP76:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP76]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP77:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP77]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP78:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP78]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP79:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP79]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]]
 // CHECK1-NEXT:    ret void
 //
 //
@@ -505,71 +491,20 @@
 // CHECK1-NEXT:    store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP0]]) #[[ATTR8]]
+// CHECK1-NEXT:    [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP0]]) #[[ATTR7]]
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA19:![0-9]+]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18:![0-9]+]]
 // CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]]
-// CHECK1-NEXT:    store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA19]]
+// CHECK1-NEXT:    store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA18]]
 // CHECK1-NEXT:    [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR8]]
+// CHECK1-NEXT:    [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR7]]
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA21:![0-9]+]]
+// CHECK1-NEXT:    [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20:![0-9]+]]
 // CHECK1-NEXT:    [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]]
-// CHECK1-NEXT:    store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA21]]
+// CHECK1-NEXT:    store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]]
 // CHECK1-NEXT:    ret %"class.std::complex"* [[THIS1]]
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZStplIfESt7complexIT_ERKS2_S4_
-// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[__X:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__Y:%.*]]) #[[ATTR6:[0-9]+]] comdat {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[RETVAL:%.*]] = alloca %"class.std::complex", align 4
-// CHECK1-NEXT:    [[__T2:%.*]] = alloca %"class.std::complex", align 4
-// CHECK1-NEXT:    [[__X_ADDR:%.*]] = alloca %"class.std::complex"*, align 8
-// CHECK1-NEXT:    [[__Y_ADDR:%.*]] = alloca %"class.std::complex"*, align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]])
-// CHECK1-NEXT:    [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0
-// CHECK1-NEXT:    [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
-// CHECK1-NEXT:    br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]]
-// CHECK1:       .spmd:
-// CHECK1-NEXT:    br label [[DOTEXIT:%.*]]
-// CHECK1:       .non-spmd:
-// CHECK1-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i64 8, i64 256
-// CHECK1-NEXT:    [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[TMP5]], i16 0)
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty.2*
-// CHECK1-NEXT:    br label [[DOTEXIT]]
-// CHECK1:       .exit:
-// CHECK1-NEXT:    [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty.2* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ]
-// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty.2* [[_SELECT_STACK]] to %struct._globalized_locals_ty.3*
-// CHECK1-NEXT:    [[__T:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[_SELECT_STACK]], i32 0, i32 0
-// CHECK1-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CHECK1-NEXT:    [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31
-// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x %"class.std::complex"], [32 x %"class.std::complex"]* [[__T]], i32 0, i32 [[NVPTX_LANE_ID]]
-// CHECK1-NEXT:    [[__T1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP8]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], %"class.std::complex"* [[__T1]], %"class.std::complex"* [[TMP9]]
-// CHECK1-NEXT:    [[TMP11:%.*]] = select i1 [[TMP4]], %"class.std::complex"* [[__T2]], %"class.std::complex"* [[TMP10]]
-// CHECK1-NEXT:    store %"class.std::complex"* [[__X]], %"class.std::complex"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    store %"class.std::complex"* [[__Y]], %"class.std::complex"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[TMP12:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i8*
-// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 8, i1 false), !tbaa.struct !18
-// CHECK1-NEXT:    [[TMP15:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[TMP11]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP15]]) #[[ATTR8]]
-// CHECK1-NEXT:    [[TMP16:%.*]] = bitcast %"class.std::complex"* [[RETVAL]] to i8*
-// CHECK1-NEXT:    [[TMP17:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 8, i1 false), !tbaa.struct !18
-// CHECK1-NEXT:    br i1 [[TMP4]], label [[DOTEXIT4:%.*]], label [[DOTNON_SPMD3:%.*]]
-// CHECK1:       .non-spmd3:
-// CHECK1-NEXT:    [[TMP18:%.*]] = bitcast %struct._globalized_locals_ty.2* [[_SELECT_STACK]] to i8*
-// CHECK1-NEXT:    call void @__kmpc_data_sharing_pop_stack(i8* [[TMP18]])
-// CHECK1-NEXT:    br label [[DOTEXIT4]]
-// CHECK1:       .exit4:
-// CHECK1-NEXT:    [[TMP19:%.*]] = load %"class.std::complex", %"class.std::complex"* [[RETVAL]], align 4
-// CHECK1-NEXT:    ret %"class.std::complex" [[TMP19]]
-//
-//
 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
 // CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] {
 // CHECK1-NEXT:  entry:
@@ -639,7 +574,7 @@
 // CHECK1-NEXT:    [[TMP45:%.*]] = bitcast i8* [[TMP43]] to %"class.std::complex"*
 // CHECK1-NEXT:    [[TMP46:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8*
 // CHECK1-NEXT:    [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP44]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 8, i1 false), !tbaa.struct !18
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 8, i1 false), !tbaa.struct !21
 // CHECK1-NEXT:    br label [[IFCONT6:%.*]]
 // CHECK1:       else5:
 // CHECK1-NEXT:    br label [[IFCONT6]]
@@ -895,7 +830,7 @@
 // CHECK1-NEXT:    [[TMP20:%.*]] = bitcast double* [[REF_TMP2]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP20]]) #[[ATTR5]]
 // CHECK1-NEXT:    store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]]
-// CHECK1-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR8]]
+// CHECK1-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR7]]
 // CHECK1-NEXT:    [[TMP21:%.*]] = bitcast double* [[REF_TMP2]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]]
 // CHECK1-NEXT:    [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8*
@@ -959,7 +894,7 @@
 // CHECK1-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8
-// CHECK1-NEXT:    call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR8]]
+// CHECK1-NEXT:    call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR7]]
 // CHECK1-NEXT:    ret void
 //
 //
@@ -989,7 +924,6 @@
 // CHECK1-NEXT:    [[REF_TMP15:%.*]] = alloca double, align 8
 // CHECK1-NEXT:    [[REF_TMP16:%.*]] = alloca double, align 8
 // CHECK1-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK1-NEXT:    [[REF_TMP21:%.*]] = alloca %"class.std::complex.1", align 8
 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]]
@@ -1050,7 +984,7 @@
 // CHECK1-NEXT:    [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]]
 // CHECK1-NEXT:    store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]]
-// CHECK1-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR8]]
+// CHECK1-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR7]]
 // CHECK1-NEXT:    [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]]
 // CHECK1-NEXT:    [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8*
@@ -1112,8 +1046,8 @@
 // CHECK1-NEXT:    [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]]
 // CHECK1-NEXT:    [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double
 // CHECK1-NEXT:    store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]]
-// CHECK1-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR8]]
-// CHECK1-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR8]]
+// CHECK1-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR7]]
+// CHECK1-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR7]]
 // CHECK1-NEXT:    [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8*
 // CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]]
 // CHECK1-NEXT:    [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8*
@@ -1154,45 +1088,32 @@
 // CHECK1-NEXT:    [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1
 // CHECK1-NEXT:    br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
 // CHECK1:       .omp.reduction.then:
-// CHECK1-NEXT:    [[TMP62:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP21]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP62]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[CALL22:%.*]] = call %"class.std::complex.1" @_ZStplIdESt7complexIT_ERKS2_S4_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR8]]
-// CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[REF_TMP21]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP64:%.*]] = extractvalue %"class.std::complex.1" [[CALL22]], 0
-// CHECK1-NEXT:    store double [[TMP64]], double* [[TMP63]], align 8
-// CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[REF_TMP21]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP66:%.*]] = extractvalue %"class.std::complex.1" [[CALL22]], 1
-// CHECK1-NEXT:    store double [[TMP66]], double* [[TMP65]], align 8
-// CHECK1-NEXT:    [[TMP67:%.*]] = bitcast %"class.std::complex.1"* [[TMP2]] to i8*
-// CHECK1-NEXT:    [[TMP68:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP21]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 [[TMP68]], i64 16, i1 false), !tbaa.struct !24
-// CHECK1-NEXT:    [[TMP69:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP21]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP2]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR7]]
 // CHECK1-NEXT:    call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]])
 // CHECK1-NEXT:    br label [[DOTOMP_REDUCTION_DONE]]
 // CHECK1:       .omp.reduction.done:
-// CHECK1-NEXT:    [[TMP70:%.*]] = bitcast i32* [[I7]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP71:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP71]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP72:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP72]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP73:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP73]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP74:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP74]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP75:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP75]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP62:%.*]] = bitcast i32* [[I7]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP63:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]]
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
-// CHECK1-NEXT:    [[TMP76:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP76]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP77:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP77]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP78:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP78]]) #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP79:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
-// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP79]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
+// CHECK1-NEXT:    [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
+// CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]]
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1205,71 +1126,20 @@
 // CHECK1-NEXT:    store %"class.std::complex.1"* [[__C]], %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP0:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP0]]) #[[ATTR8]]
+// CHECK1-NEXT:    [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP0]]) #[[ATTR7]]
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA25:![0-9]+]]
+// CHECK1-NEXT:    [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]]
 // CHECK1-NEXT:    [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]]
-// CHECK1-NEXT:    store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA25]]
+// CHECK1-NEXT:    store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]]
 // CHECK1-NEXT:    [[TMP2:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP2]]) #[[ATTR8]]
+// CHECK1-NEXT:    [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP2]]) #[[ATTR7]]
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA27:![0-9]+]]
+// CHECK1-NEXT:    [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]]
 // CHECK1-NEXT:    [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]]
-// CHECK1-NEXT:    store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA27]]
+// CHECK1-NEXT:    store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]]
 // CHECK1-NEXT:    ret %"class.std::complex.1"* [[THIS1]]
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@_ZStplIdESt7complexIT_ERKS2_S4_
-// CHECK1-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[__X:%.*]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[__Y:%.*]]) #[[ATTR6]] comdat {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[RETVAL:%.*]] = alloca %"class.std::complex.1", align 8
-// CHECK1-NEXT:    [[__T2:%.*]] = alloca %"class.std::complex.1", align 8
-// CHECK1-NEXT:    [[__X_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8
-// CHECK1-NEXT:    [[__Y_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5]])
-// CHECK1-NEXT:    [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
-// CHECK1-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0
-// CHECK1-NEXT:    [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR5]]
-// CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
-// CHECK1-NEXT:    br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]]
-// CHECK1:       .spmd:
-// CHECK1-NEXT:    br label [[DOTEXIT:%.*]]
-// CHECK1:       .non-spmd:
-// CHECK1-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i64 16, i64 512
-// CHECK1-NEXT:    [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[TMP5]], i16 0)
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty.4*
-// CHECK1-NEXT:    br label [[DOTEXIT]]
-// CHECK1:       .exit:
-// CHECK1-NEXT:    [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty.4* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ]
-// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty.4* [[_SELECT_STACK]] to %struct._globalized_locals_ty.5*
-// CHECK1-NEXT:    [[__T:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[_SELECT_STACK]], i32 0, i32 0
-// CHECK1-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CHECK1-NEXT:    [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31
-// CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x %"class.std::complex.1"], [32 x %"class.std::complex.1"]* [[__T]], i32 0, i32 [[NVPTX_LANE_ID]]
-// CHECK1-NEXT:    [[__T1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP8]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], %"class.std::complex.1"* [[__T1]], %"class.std::complex.1"* [[TMP9]]
-// CHECK1-NEXT:    [[TMP11:%.*]] = select i1 [[TMP4]], %"class.std::complex.1"* [[__T2]], %"class.std::complex.1"* [[TMP10]]
-// CHECK1-NEXT:    store %"class.std::complex.1"* [[__X]], %"class.std::complex.1"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    store %"class.std::complex.1"* [[__Y]], %"class.std::complex.1"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[TMP12:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[TMP13:%.*]] = bitcast %"class.std::complex.1"* [[TMP11]] to i8*
-// CHECK1-NEXT:    [[TMP14:%.*]] = bitcast %"class.std::complex.1"* [[TMP12]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false), !tbaa.struct !24
-// CHECK1-NEXT:    [[TMP15:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK1-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP11]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP15]]) #[[ATTR8]]
-// CHECK1-NEXT:    [[TMP16:%.*]] = bitcast %"class.std::complex.1"* [[RETVAL]] to i8*
-// CHECK1-NEXT:    [[TMP17:%.*]] = bitcast %"class.std::complex.1"* [[TMP11]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 16, i1 false), !tbaa.struct !24
-// CHECK1-NEXT:    br i1 [[TMP4]], label [[DOTEXIT4:%.*]], label [[DOTNON_SPMD3:%.*]]
-// CHECK1:       .non-spmd3:
-// CHECK1-NEXT:    [[TMP18:%.*]] = bitcast %struct._globalized_locals_ty.4* [[_SELECT_STACK]] to i8*
-// CHECK1-NEXT:    call void @__kmpc_data_sharing_pop_stack(i8* [[TMP18]])
-// CHECK1-NEXT:    br label [[DOTEXIT4]]
-// CHECK1:       .exit4:
-// CHECK1-NEXT:    [[TMP19:%.*]] = load %"class.std::complex.1", %"class.std::complex.1"* [[RETVAL]], align 8
-// CHECK1-NEXT:    ret %"class.std::complex.1" [[TMP19]]
-//
-//
 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7
 // CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] {
 // CHECK1-NEXT:  entry:
@@ -1353,7 +1223,7 @@
 // CHECK1-NEXT:    [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.1"*
 // CHECK1-NEXT:    [[TMP54:%.*]] = bitcast %"class.std::complex.1"* [[TMP53]] to i8*
 // CHECK1-NEXT:    [[TMP55:%.*]] = bitcast %"class.std::complex.1"* [[TMP52]] to i8*
-// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !24
+// CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !27
 // CHECK1-NEXT:    br label [[IFCONT6:%.*]]
 // CHECK1:       else5:
 // CHECK1-NEXT:    br label [[IFCONT6]]
@@ -1460,11 +1330,11 @@
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA16]]
-// CHECK1-NEXT:    store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA19]]
+// CHECK1-NEXT:    store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA18]]
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA16]]
-// CHECK1-NEXT:    store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA21]]
+// CHECK1-NEXT:    store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]]
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1475,7 +1345,7 @@
 // CHECK1-NEXT:    store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA19]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18]]
 // CHECK1-NEXT:    ret float [[TMP0]]
 //
 //
@@ -1486,7 +1356,7 @@
 // CHECK1-NEXT:    store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA21]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20]]
 // CHECK1-NEXT:    ret float [[TMP0]]
 //
 //
@@ -1503,11 +1373,11 @@
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0
 // CHECK1-NEXT:    [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]]
-// CHECK1-NEXT:    store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA25]]
+// CHECK1-NEXT:    store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]]
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1
 // CHECK1-NEXT:    [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]]
-// CHECK1-NEXT:    store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA27]]
+// CHECK1-NEXT:    store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]]
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1518,7 +1388,7 @@
 // CHECK1-NEXT:    store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT:    [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA25]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]]
 // CHECK1-NEXT:    ret double [[TMP0]]
 //
 //
@@ -1529,7 +1399,7 @@
 // CHECK1-NEXT:    store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK1-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK1-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT:    [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA27]]
+// CHECK1-NEXT:    [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]]
 // CHECK1-NEXT:    ret double [[TMP0]]
 //
 //
@@ -1695,7 +1565,7 @@
 // CHECK2-NEXT:    [[TMP20:%.*]] = bitcast float* [[REF_TMP2]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]]
 // CHECK2-NEXT:    store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA16]]
-// CHECK2-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR8:[0-9]+]]
+// CHECK2-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR7:[0-9]+]]
 // CHECK2-NEXT:    [[TMP21:%.*]] = bitcast float* [[REF_TMP2]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP21]]) #[[ATTR5]]
 // CHECK2-NEXT:    [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8*
@@ -1759,7 +1629,7 @@
 // CHECK2-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8
-// CHECK2-NEXT:    call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR8]]
+// CHECK2-NEXT:    call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR7]]
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1789,7 +1659,6 @@
 // CHECK2-NEXT:    [[REF_TMP15:%.*]] = alloca float, align 4
 // CHECK2-NEXT:    [[REF_TMP16:%.*]] = alloca float, align 4
 // CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK2-NEXT:    [[REF_TMP21:%.*]] = alloca %"class.std::complex", align 4
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]]
@@ -1850,7 +1719,7 @@
 // CHECK2-NEXT:    [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]]
 // CHECK2-NEXT:    store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA16]]
-// CHECK2-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR8]]
+// CHECK2-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR7]]
 // CHECK2-NEXT:    [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]]
 // CHECK2-NEXT:    [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8*
@@ -1912,8 +1781,8 @@
 // CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]]
 // CHECK2-NEXT:    [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float
 // CHECK2-NEXT:    store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA16]]
-// CHECK2-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR8]]
-// CHECK2-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR8]]
+// CHECK2-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR7]]
+// CHECK2-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR7]]
 // CHECK2-NEXT:    [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]]
 // CHECK2-NEXT:    [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8*
@@ -1954,45 +1823,32 @@
 // CHECK2-NEXT:    [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1
 // CHECK2-NEXT:    br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
 // CHECK2:       .omp.reduction.then:
-// CHECK2-NEXT:    [[TMP62:%.*]] = bitcast %"class.std::complex"* [[REF_TMP21]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP62]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[CALL22:%.*]] = call %"class.std::complex" @_ZStplIfESt7complexIT_ERKS2_S4_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR8]]
-// CHECK2-NEXT:    [[TMP63:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[REF_TMP21]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP64:%.*]] = extractvalue %"class.std::complex" [[CALL22]], 0
-// CHECK2-NEXT:    store float [[TMP64]], float* [[TMP63]], align 4
-// CHECK2-NEXT:    [[TMP65:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[REF_TMP21]], i32 0, i32 1
-// CHECK2-NEXT:    [[TMP66:%.*]] = extractvalue %"class.std::complex" [[CALL22]], 1
-// CHECK2-NEXT:    store float [[TMP66]], float* [[TMP65]], align 4
-// CHECK2-NEXT:    [[TMP67:%.*]] = bitcast %"class.std::complex"* [[TMP2]] to i8*
-// CHECK2-NEXT:    [[TMP68:%.*]] = bitcast %"class.std::complex"* [[REF_TMP21]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP67]], i8* align 4 [[TMP68]], i64 8, i1 false), !tbaa.struct !18
-// CHECK2-NEXT:    [[TMP69:%.*]] = bitcast %"class.std::complex"* [[REF_TMP21]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR7]]
 // CHECK2-NEXT:    call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]])
 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DONE]]
 // CHECK2:       .omp.reduction.done:
-// CHECK2-NEXT:    [[TMP70:%.*]] = bitcast i32* [[I7]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP71:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP71]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP72:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP72]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP73:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP73]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP74:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP74]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP75:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP75]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP62:%.*]] = bitcast i32* [[I7]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]]
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
-// CHECK2-NEXT:    [[TMP76:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP76]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP77:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP77]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP78:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP78]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP79:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP79]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]]
 // CHECK2-NEXT:    ret void
 //
 //
@@ -2005,71 +1861,20 @@
 // CHECK2-NEXT:    store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP0]]) #[[ATTR8]]
+// CHECK2-NEXT:    [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP0]]) #[[ATTR7]]
 // CHECK2-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA19:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18:![0-9]+]]
 // CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]]
-// CHECK2-NEXT:    store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA19]]
+// CHECK2-NEXT:    store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA18]]
 // CHECK2-NEXT:    [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR8]]
+// CHECK2-NEXT:    [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR7]]
 // CHECK2-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1
-// CHECK2-NEXT:    [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA21:![0-9]+]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20:![0-9]+]]
 // CHECK2-NEXT:    [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]]
-// CHECK2-NEXT:    store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA21]]
+// CHECK2-NEXT:    store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]]
 // CHECK2-NEXT:    ret %"class.std::complex"* [[THIS1]]
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@_ZStplIfESt7complexIT_ERKS2_S4_
-// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[__X:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__Y:%.*]]) #[[ATTR6:[0-9]+]] comdat {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[RETVAL:%.*]] = alloca %"class.std::complex", align 4
-// CHECK2-NEXT:    [[__T2:%.*]] = alloca %"class.std::complex", align 4
-// CHECK2-NEXT:    [[__X_ADDR:%.*]] = alloca %"class.std::complex"*, align 8
-// CHECK2-NEXT:    [[__Y_ADDR:%.*]] = alloca %"class.std::complex"*, align 8
-// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]])
-// CHECK2-NEXT:    [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
-// CHECK2-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0
-// CHECK2-NEXT:    [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
-// CHECK2-NEXT:    br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]]
-// CHECK2:       .spmd:
-// CHECK2-NEXT:    br label [[DOTEXIT:%.*]]
-// CHECK2:       .non-spmd:
-// CHECK2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i64 8, i64 256
-// CHECK2-NEXT:    [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[TMP5]], i16 0)
-// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty.2*
-// CHECK2-NEXT:    br label [[DOTEXIT]]
-// CHECK2:       .exit:
-// CHECK2-NEXT:    [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty.2* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ]
-// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty.2* [[_SELECT_STACK]] to %struct._globalized_locals_ty.3*
-// CHECK2-NEXT:    [[__T:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[_SELECT_STACK]], i32 0, i32 0
-// CHECK2-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CHECK2-NEXT:    [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31
-// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x %"class.std::complex"], [32 x %"class.std::complex"]* [[__T]], i32 0, i32 [[NVPTX_LANE_ID]]
-// CHECK2-NEXT:    [[__T1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP8]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], %"class.std::complex"* [[__T1]], %"class.std::complex"* [[TMP9]]
-// CHECK2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP4]], %"class.std::complex"* [[__T2]], %"class.std::complex"* [[TMP10]]
-// CHECK2-NEXT:    store %"class.std::complex"* [[__X]], %"class.std::complex"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    store %"class.std::complex"* [[__Y]], %"class.std::complex"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[TMP12:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i8*
-// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 8, i1 false), !tbaa.struct !18
-// CHECK2-NEXT:    [[TMP15:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[TMP11]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP15]]) #[[ATTR8]]
-// CHECK2-NEXT:    [[TMP16:%.*]] = bitcast %"class.std::complex"* [[RETVAL]] to i8*
-// CHECK2-NEXT:    [[TMP17:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 8, i1 false), !tbaa.struct !18
-// CHECK2-NEXT:    br i1 [[TMP4]], label [[DOTEXIT4:%.*]], label [[DOTNON_SPMD3:%.*]]
-// CHECK2:       .non-spmd3:
-// CHECK2-NEXT:    [[TMP18:%.*]] = bitcast %struct._globalized_locals_ty.2* [[_SELECT_STACK]] to i8*
-// CHECK2-NEXT:    call void @__kmpc_data_sharing_pop_stack(i8* [[TMP18]])
-// CHECK2-NEXT:    br label [[DOTEXIT4]]
-// CHECK2:       .exit4:
-// CHECK2-NEXT:    [[TMP19:%.*]] = load %"class.std::complex", %"class.std::complex"* [[RETVAL]], align 4
-// CHECK2-NEXT:    ret %"class.std::complex" [[TMP19]]
-//
-//
 // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
 // CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] {
 // CHECK2-NEXT:  entry:
@@ -2139,7 +1944,7 @@
 // CHECK2-NEXT:    [[TMP45:%.*]] = bitcast i8* [[TMP43]] to %"class.std::complex"*
 // CHECK2-NEXT:    [[TMP46:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8*
 // CHECK2-NEXT:    [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP44]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 8, i1 false), !tbaa.struct !18
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 8, i1 false), !tbaa.struct !21
 // CHECK2-NEXT:    br label [[IFCONT6:%.*]]
 // CHECK2:       else5:
 // CHECK2-NEXT:    br label [[IFCONT6]]
@@ -2395,7 +2200,7 @@
 // CHECK2-NEXT:    [[TMP20:%.*]] = bitcast double* [[REF_TMP2]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP20]]) #[[ATTR5]]
 // CHECK2-NEXT:    store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]]
-// CHECK2-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR8]]
+// CHECK2-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR7]]
 // CHECK2-NEXT:    [[TMP21:%.*]] = bitcast double* [[REF_TMP2]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]]
 // CHECK2-NEXT:    [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8*
@@ -2459,7 +2264,7 @@
 // CHECK2-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8
-// CHECK2-NEXT:    call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR8]]
+// CHECK2-NEXT:    call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR7]]
 // CHECK2-NEXT:    ret void
 //
 //
@@ -2489,7 +2294,6 @@
 // CHECK2-NEXT:    [[REF_TMP15:%.*]] = alloca double, align 8
 // CHECK2-NEXT:    [[REF_TMP16:%.*]] = alloca double, align 8
 // CHECK2-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK2-NEXT:    [[REF_TMP21:%.*]] = alloca %"class.std::complex.1", align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]]
@@ -2550,7 +2354,7 @@
 // CHECK2-NEXT:    [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]]
 // CHECK2-NEXT:    store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]]
-// CHECK2-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR8]]
+// CHECK2-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR7]]
 // CHECK2-NEXT:    [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]]
 // CHECK2-NEXT:    [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8*
@@ -2612,8 +2416,8 @@
 // CHECK2-NEXT:    [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]]
 // CHECK2-NEXT:    [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double
 // CHECK2-NEXT:    store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]]
-// CHECK2-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR8]]
-// CHECK2-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR8]]
+// CHECK2-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR7]]
+// CHECK2-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR7]]
 // CHECK2-NEXT:    [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8*
 // CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]]
 // CHECK2-NEXT:    [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8*
@@ -2654,45 +2458,32 @@
 // CHECK2-NEXT:    [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1
 // CHECK2-NEXT:    br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
 // CHECK2:       .omp.reduction.then:
-// CHECK2-NEXT:    [[TMP62:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP21]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP62]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[CALL22:%.*]] = call %"class.std::complex.1" @_ZStplIdESt7complexIT_ERKS2_S4_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR8]]
-// CHECK2-NEXT:    [[TMP63:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[REF_TMP21]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP64:%.*]] = extractvalue %"class.std::complex.1" [[CALL22]], 0
-// CHECK2-NEXT:    store double [[TMP64]], double* [[TMP63]], align 8
-// CHECK2-NEXT:    [[TMP65:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[REF_TMP21]], i32 0, i32 1
-// CHECK2-NEXT:    [[TMP66:%.*]] = extractvalue %"class.std::complex.1" [[CALL22]], 1
-// CHECK2-NEXT:    store double [[TMP66]], double* [[TMP65]], align 8
-// CHECK2-NEXT:    [[TMP67:%.*]] = bitcast %"class.std::complex.1"* [[TMP2]] to i8*
-// CHECK2-NEXT:    [[TMP68:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP21]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 [[TMP68]], i64 16, i1 false), !tbaa.struct !24
-// CHECK2-NEXT:    [[TMP69:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP21]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP2]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR7]]
 // CHECK2-NEXT:    call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]])
 // CHECK2-NEXT:    br label [[DOTOMP_REDUCTION_DONE]]
 // CHECK2:       .omp.reduction.done:
-// CHECK2-NEXT:    [[TMP70:%.*]] = bitcast i32* [[I7]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP71:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP71]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP72:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP72]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP73:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP73]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP74:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP74]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP75:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP75]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP62:%.*]] = bitcast i32* [[I7]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP63:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]]
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
-// CHECK2-NEXT:    [[TMP76:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP76]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP77:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP77]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP78:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP78]]) #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP79:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
-// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP79]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
+// CHECK2-NEXT:    [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
+// CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]]
 // CHECK2-NEXT:    ret void
 //
 //
@@ -2705,71 +2496,20 @@
 // CHECK2-NEXT:    store %"class.std::complex.1"* [[__C]], %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP0:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP0]]) #[[ATTR8]]
+// CHECK2-NEXT:    [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP0]]) #[[ATTR7]]
 // CHECK2-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA25:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]]
 // CHECK2-NEXT:    [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]]
-// CHECK2-NEXT:    store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA25]]
+// CHECK2-NEXT:    store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]]
 // CHECK2-NEXT:    [[TMP2:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP2]]) #[[ATTR8]]
+// CHECK2-NEXT:    [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP2]]) #[[ATTR7]]
 // CHECK2-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1
-// CHECK2-NEXT:    [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA27:![0-9]+]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]]
 // CHECK2-NEXT:    [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]]
-// CHECK2-NEXT:    store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA27]]
+// CHECK2-NEXT:    store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]]
 // CHECK2-NEXT:    ret %"class.std::complex.1"* [[THIS1]]
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@_ZStplIdESt7complexIT_ERKS2_S4_
-// CHECK2-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[__X:%.*]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[__Y:%.*]]) #[[ATTR6]] comdat {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[RETVAL:%.*]] = alloca %"class.std::complex.1", align 8
-// CHECK2-NEXT:    [[__T2:%.*]] = alloca %"class.std::complex.1", align 8
-// CHECK2-NEXT:    [[__X_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8
-// CHECK2-NEXT:    [[__Y_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8
-// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5]])
-// CHECK2-NEXT:    [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
-// CHECK2-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0
-// CHECK2-NEXT:    [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR5]]
-// CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
-// CHECK2-NEXT:    br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]]
-// CHECK2:       .spmd:
-// CHECK2-NEXT:    br label [[DOTEXIT:%.*]]
-// CHECK2:       .non-spmd:
-// CHECK2-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i64 16, i64 512
-// CHECK2-NEXT:    [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[TMP5]], i16 0)
-// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty.4*
-// CHECK2-NEXT:    br label [[DOTEXIT]]
-// CHECK2:       .exit:
-// CHECK2-NEXT:    [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty.4* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ]
-// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty.4* [[_SELECT_STACK]] to %struct._globalized_locals_ty.5*
-// CHECK2-NEXT:    [[__T:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[_SELECT_STACK]], i32 0, i32 0
-// CHECK2-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CHECK2-NEXT:    [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31
-// CHECK2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x %"class.std::complex.1"], [32 x %"class.std::complex.1"]* [[__T]], i32 0, i32 [[NVPTX_LANE_ID]]
-// CHECK2-NEXT:    [[__T1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP8]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], %"class.std::complex.1"* [[__T1]], %"class.std::complex.1"* [[TMP9]]
-// CHECK2-NEXT:    [[TMP11:%.*]] = select i1 [[TMP4]], %"class.std::complex.1"* [[__T2]], %"class.std::complex.1"* [[TMP10]]
-// CHECK2-NEXT:    store %"class.std::complex.1"* [[__X]], %"class.std::complex.1"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    store %"class.std::complex.1"* [[__Y]], %"class.std::complex.1"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[TMP12:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[TMP13:%.*]] = bitcast %"class.std::complex.1"* [[TMP11]] to i8*
-// CHECK2-NEXT:    [[TMP14:%.*]] = bitcast %"class.std::complex.1"* [[TMP12]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false), !tbaa.struct !24
-// CHECK2-NEXT:    [[TMP15:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK2-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP11]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP15]]) #[[ATTR8]]
-// CHECK2-NEXT:    [[TMP16:%.*]] = bitcast %"class.std::complex.1"* [[RETVAL]] to i8*
-// CHECK2-NEXT:    [[TMP17:%.*]] = bitcast %"class.std::complex.1"* [[TMP11]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 16, i1 false), !tbaa.struct !24
-// CHECK2-NEXT:    br i1 [[TMP4]], label [[DOTEXIT4:%.*]], label [[DOTNON_SPMD3:%.*]]
-// CHECK2:       .non-spmd3:
-// CHECK2-NEXT:    [[TMP18:%.*]] = bitcast %struct._globalized_locals_ty.4* [[_SELECT_STACK]] to i8*
-// CHECK2-NEXT:    call void @__kmpc_data_sharing_pop_stack(i8* [[TMP18]])
-// CHECK2-NEXT:    br label [[DOTEXIT4]]
-// CHECK2:       .exit4:
-// CHECK2-NEXT:    [[TMP19:%.*]] = load %"class.std::complex.1", %"class.std::complex.1"* [[RETVAL]], align 8
-// CHECK2-NEXT:    ret %"class.std::complex.1" [[TMP19]]
-//
-//
 // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7
 // CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] {
 // CHECK2-NEXT:  entry:
@@ -2853,7 +2593,7 @@
 // CHECK2-NEXT:    [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.1"*
 // CHECK2-NEXT:    [[TMP54:%.*]] = bitcast %"class.std::complex.1"* [[TMP53]] to i8*
 // CHECK2-NEXT:    [[TMP55:%.*]] = bitcast %"class.std::complex.1"* [[TMP52]] to i8*
-// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !24
+// CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !27
 // CHECK2-NEXT:    br label [[IFCONT6:%.*]]
 // CHECK2:       else5:
 // CHECK2-NEXT:    br label [[IFCONT6]]
@@ -2960,11 +2700,11 @@
 // CHECK2-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA16]]
-// CHECK2-NEXT:    store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA19]]
+// CHECK2-NEXT:    store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA18]]
 // CHECK2-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA16]]
-// CHECK2-NEXT:    store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA21]]
+// CHECK2-NEXT:    store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]]
 // CHECK2-NEXT:    ret void
 //
 //
@@ -2975,7 +2715,7 @@
 // CHECK2-NEXT:    store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA19]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18]]
 // CHECK2-NEXT:    ret float [[TMP0]]
 //
 //
@@ -2986,7 +2726,7 @@
 // CHECK2-NEXT:    store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1
-// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA21]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20]]
 // CHECK2-NEXT:    ret float [[TMP0]]
 //
 //
@@ -3003,11 +2743,11 @@
 // CHECK2-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0
 // CHECK2-NEXT:    [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]]
-// CHECK2-NEXT:    store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA25]]
+// CHECK2-NEXT:    store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]]
 // CHECK2-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1
 // CHECK2-NEXT:    [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]]
-// CHECK2-NEXT:    store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA27]]
+// CHECK2-NEXT:    store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]]
 // CHECK2-NEXT:    ret void
 //
 //
@@ -3018,7 +2758,7 @@
 // CHECK2-NEXT:    store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0
-// CHECK2-NEXT:    [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA25]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]]
 // CHECK2-NEXT:    ret double [[TMP0]]
 //
 //
@@ -3029,7 +2769,7 @@
 // CHECK2-NEXT:    store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK2-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK2-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1
-// CHECK2-NEXT:    [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA27]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]]
 // CHECK2-NEXT:    ret double [[TMP0]]
 //
 //
@@ -3195,7 +2935,7 @@
 // CHECK3-NEXT:    [[TMP20:%.*]] = bitcast float* [[REF_TMP2]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]]
 // CHECK3-NEXT:    store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA16]]
-// CHECK3-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR8:[0-9]+]]
+// CHECK3-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR7:[0-9]+]]
 // CHECK3-NEXT:    [[TMP21:%.*]] = bitcast float* [[REF_TMP2]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP21]]) #[[ATTR5]]
 // CHECK3-NEXT:    [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8*
@@ -3259,7 +2999,7 @@
 // CHECK3-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8
-// CHECK3-NEXT:    call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR8]]
+// CHECK3-NEXT:    call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR7]]
 // CHECK3-NEXT:    ret void
 //
 //
@@ -3289,7 +3029,6 @@
 // CHECK3-NEXT:    [[REF_TMP15:%.*]] = alloca float, align 4
 // CHECK3-NEXT:    [[REF_TMP16:%.*]] = alloca float, align 4
 // CHECK3-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK3-NEXT:    [[REF_TMP21:%.*]] = alloca %"class.std::complex", align 4
 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]]
@@ -3350,7 +3089,7 @@
 // CHECK3-NEXT:    [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]]
 // CHECK3-NEXT:    store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA16]]
-// CHECK3-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR8]]
+// CHECK3-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR7]]
 // CHECK3-NEXT:    [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]]
 // CHECK3-NEXT:    [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8*
@@ -3412,8 +3151,8 @@
 // CHECK3-NEXT:    [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]]
 // CHECK3-NEXT:    [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float
 // CHECK3-NEXT:    store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA16]]
-// CHECK3-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR8]]
-// CHECK3-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR8]]
+// CHECK3-NEXT:    call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR7]]
+// CHECK3-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR7]]
 // CHECK3-NEXT:    [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]]
 // CHECK3-NEXT:    [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8*
@@ -3454,45 +3193,32 @@
 // CHECK3-NEXT:    [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1
 // CHECK3-NEXT:    br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
 // CHECK3:       .omp.reduction.then:
-// CHECK3-NEXT:    [[TMP62:%.*]] = bitcast %"class.std::complex"* [[REF_TMP21]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP62]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[CALL22:%.*]] = call %"class.std::complex" @_ZStplIfESt7complexIT_ERKS2_S4_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR8]]
-// CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[REF_TMP21]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP64:%.*]] = extractvalue %"class.std::complex" [[CALL22]], 0
-// CHECK3-NEXT:    store float [[TMP64]], float* [[TMP63]], align 4
-// CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[REF_TMP21]], i32 0, i32 1
-// CHECK3-NEXT:    [[TMP66:%.*]] = extractvalue %"class.std::complex" [[CALL22]], 1
-// CHECK3-NEXT:    store float [[TMP66]], float* [[TMP65]], align 4
-// CHECK3-NEXT:    [[TMP67:%.*]] = bitcast %"class.std::complex"* [[TMP2]] to i8*
-// CHECK3-NEXT:    [[TMP68:%.*]] = bitcast %"class.std::complex"* [[REF_TMP21]] to i8*
-// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP67]], i8* align 4 [[TMP68]], i64 8, i1 false), !tbaa.struct !18
-// CHECK3-NEXT:    [[TMP69:%.*]] = bitcast %"class.std::complex"* [[REF_TMP21]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR7]]
 // CHECK3-NEXT:    call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]])
 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DONE]]
 // CHECK3:       .omp.reduction.done:
-// CHECK3-NEXT:    [[TMP70:%.*]] = bitcast i32* [[I7]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP71:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP71]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP72:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP72]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP73:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP73]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP74:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP74]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP75:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP75]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP62:%.*]] = bitcast i32* [[I7]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]]
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
-// CHECK3-NEXT:    [[TMP76:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP76]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP77:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP77]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP78:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP78]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP79:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP79]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]]
 // CHECK3-NEXT:    ret void
 //
 //
@@ -3505,71 +3231,20 @@
 // CHECK3-NEXT:    store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP0]]) #[[ATTR8]]
+// CHECK3-NEXT:    [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP0]]) #[[ATTR7]]
 // CHECK3-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA19:![0-9]+]]
+// CHECK3-NEXT:    [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18:![0-9]+]]
 // CHECK3-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]]
-// CHECK3-NEXT:    store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA19]]
+// CHECK3-NEXT:    store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA18]]
 // CHECK3-NEXT:    [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR8]]
+// CHECK3-NEXT:    [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull dereferenceable(8) [[TMP2]]) #[[ATTR7]]
 // CHECK3-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1
-// CHECK3-NEXT:    [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA21:![0-9]+]]
+// CHECK3-NEXT:    [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20:![0-9]+]]
 // CHECK3-NEXT:    [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]]
-// CHECK3-NEXT:    store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA21]]
+// CHECK3-NEXT:    store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]]
 // CHECK3-NEXT:    ret %"class.std::complex"* [[THIS1]]
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@_ZStplIfESt7complexIT_ERKS2_S4_
-// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[__X:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__Y:%.*]]) #[[ATTR6:[0-9]+]] comdat {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[RETVAL:%.*]] = alloca %"class.std::complex", align 4
-// CHECK3-NEXT:    [[__T2:%.*]] = alloca %"class.std::complex", align 4
-// CHECK3-NEXT:    [[__X_ADDR:%.*]] = alloca %"class.std::complex"*, align 8
-// CHECK3-NEXT:    [[__Y_ADDR:%.*]] = alloca %"class.std::complex"*, align 8
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]])
-// CHECK3-NEXT:    [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
-// CHECK3-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0
-// CHECK3-NEXT:    [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
-// CHECK3-NEXT:    br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]]
-// CHECK3:       .spmd:
-// CHECK3-NEXT:    br label [[DOTEXIT:%.*]]
-// CHECK3:       .non-spmd:
-// CHECK3-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i64 8, i64 256
-// CHECK3-NEXT:    [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[TMP5]], i16 0)
-// CHECK3-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty.2*
-// CHECK3-NEXT:    br label [[DOTEXIT]]
-// CHECK3:       .exit:
-// CHECK3-NEXT:    [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty.2* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ]
-// CHECK3-NEXT:    [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty.2* [[_SELECT_STACK]] to %struct._globalized_locals_ty.3*
-// CHECK3-NEXT:    [[__T:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[_SELECT_STACK]], i32 0, i32 0
-// CHECK3-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CHECK3-NEXT:    [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31
-// CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x %"class.std::complex"], [32 x %"class.std::complex"]* [[__T]], i32 0, i32 [[NVPTX_LANE_ID]]
-// CHECK3-NEXT:    [[__T1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP8]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], %"class.std::complex"* [[__T1]], %"class.std::complex"* [[TMP9]]
-// CHECK3-NEXT:    [[TMP11:%.*]] = select i1 [[TMP4]], %"class.std::complex"* [[__T2]], %"class.std::complex"* [[TMP10]]
-// CHECK3-NEXT:    store %"class.std::complex"* [[__X]], %"class.std::complex"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    store %"class.std::complex"* [[__Y]], %"class.std::complex"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[TMP12:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i8*
-// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i8*
-// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i64 8, i1 false), !tbaa.struct !18
-// CHECK3-NEXT:    [[TMP15:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull dereferenceable(8) [[TMP11]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP15]]) #[[ATTR8]]
-// CHECK3-NEXT:    [[TMP16:%.*]] = bitcast %"class.std::complex"* [[RETVAL]] to i8*
-// CHECK3-NEXT:    [[TMP17:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i8*
-// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP16]], i8* align 4 [[TMP17]], i64 8, i1 false), !tbaa.struct !18
-// CHECK3-NEXT:    br i1 [[TMP4]], label [[DOTEXIT4:%.*]], label [[DOTNON_SPMD3:%.*]]
-// CHECK3:       .non-spmd3:
-// CHECK3-NEXT:    [[TMP18:%.*]] = bitcast %struct._globalized_locals_ty.2* [[_SELECT_STACK]] to i8*
-// CHECK3-NEXT:    call void @__kmpc_data_sharing_pop_stack(i8* [[TMP18]])
-// CHECK3-NEXT:    br label [[DOTEXIT4]]
-// CHECK3:       .exit4:
-// CHECK3-NEXT:    [[TMP19:%.*]] = load %"class.std::complex", %"class.std::complex"* [[RETVAL]], align 4
-// CHECK3-NEXT:    ret %"class.std::complex" [[TMP19]]
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
 // CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
@@ -3639,7 +3314,7 @@
 // CHECK3-NEXT:    [[TMP45:%.*]] = bitcast i8* [[TMP43]] to %"class.std::complex"*
 // CHECK3-NEXT:    [[TMP46:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8*
 // CHECK3-NEXT:    [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP44]] to i8*
-// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 8, i1 false), !tbaa.struct !18
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP46]], i8* align 4 [[TMP47]], i64 8, i1 false), !tbaa.struct !21
 // CHECK3-NEXT:    br label [[IFCONT6:%.*]]
 // CHECK3:       else5:
 // CHECK3-NEXT:    br label [[IFCONT6]]
@@ -3895,7 +3570,7 @@
 // CHECK3-NEXT:    [[TMP20:%.*]] = bitcast double* [[REF_TMP2]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP20]]) #[[ATTR5]]
 // CHECK3-NEXT:    store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]]
-// CHECK3-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR8]]
+// CHECK3-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR7]]
 // CHECK3-NEXT:    [[TMP21:%.*]] = bitcast double* [[REF_TMP2]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]]
 // CHECK3-NEXT:    [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8*
@@ -3959,7 +3634,7 @@
 // CHECK3-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8
-// CHECK3-NEXT:    call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR8]]
+// CHECK3-NEXT:    call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR7]]
 // CHECK3-NEXT:    ret void
 //
 //
@@ -3989,7 +3664,6 @@
 // CHECK3-NEXT:    [[REF_TMP15:%.*]] = alloca double, align 8
 // CHECK3-NEXT:    [[REF_TMP16:%.*]] = alloca double, align 8
 // CHECK3-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
-// CHECK3-NEXT:    [[REF_TMP21:%.*]] = alloca %"class.std::complex.1", align 8
 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]]
@@ -4050,7 +3724,7 @@
 // CHECK3-NEXT:    [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]]
 // CHECK3-NEXT:    store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]]
-// CHECK3-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR8]]
+// CHECK3-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR7]]
 // CHECK3-NEXT:    [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]]
 // CHECK3-NEXT:    [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8*
@@ -4112,8 +3786,8 @@
 // CHECK3-NEXT:    [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]]
 // CHECK3-NEXT:    [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double
 // CHECK3-NEXT:    store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]]
-// CHECK3-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR8]]
-// CHECK3-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR8]]
+// CHECK3-NEXT:    call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR7]]
+// CHECK3-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR7]]
 // CHECK3-NEXT:    [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8*
 // CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]]
 // CHECK3-NEXT:    [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8*
@@ -4154,45 +3828,32 @@
 // CHECK3-NEXT:    [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1
 // CHECK3-NEXT:    br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
 // CHECK3:       .omp.reduction.then:
-// CHECK3-NEXT:    [[TMP62:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP21]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP62]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[CALL22:%.*]] = call %"class.std::complex.1" @_ZStplIdESt7complexIT_ERKS2_S4_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR8]]
-// CHECK3-NEXT:    [[TMP63:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[REF_TMP21]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP64:%.*]] = extractvalue %"class.std::complex.1" [[CALL22]], 0
-// CHECK3-NEXT:    store double [[TMP64]], double* [[TMP63]], align 8
-// CHECK3-NEXT:    [[TMP65:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[REF_TMP21]], i32 0, i32 1
-// CHECK3-NEXT:    [[TMP66:%.*]] = extractvalue %"class.std::complex.1" [[CALL22]], 1
-// CHECK3-NEXT:    store double [[TMP66]], double* [[TMP65]], align 8
-// CHECK3-NEXT:    [[TMP67:%.*]] = bitcast %"class.std::complex.1"* [[TMP2]] to i8*
-// CHECK3-NEXT:    [[TMP68:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP21]] to i8*
-// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP67]], i8* align 8 [[TMP68]], i64 16, i1 false), !tbaa.struct !24
-// CHECK3-NEXT:    [[TMP69:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP21]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP2]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR7]]
 // CHECK3-NEXT:    call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]])
 // CHECK3-NEXT:    br label [[DOTOMP_REDUCTION_DONE]]
 // CHECK3:       .omp.reduction.done:
-// CHECK3-NEXT:    [[TMP70:%.*]] = bitcast i32* [[I7]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP71:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP71]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP72:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP72]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP73:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP73]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP74:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP74]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP75:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP75]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP62:%.*]] = bitcast i32* [[I7]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP63:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]]
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
-// CHECK3-NEXT:    [[TMP76:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP76]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP77:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP77]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP78:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP78]]) #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP79:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
-// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP79]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]]
+// CHECK3-NEXT:    [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8*
+// CHECK3-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]]
 // CHECK3-NEXT:    ret void
 //
 //
@@ -4205,71 +3866,20 @@
 // CHECK3-NEXT:    store %"class.std::complex.1"* [[__C]], %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP0:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP0]]) #[[ATTR8]]
+// CHECK3-NEXT:    [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP0]]) #[[ATTR7]]
 // CHECK3-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA25:![0-9]+]]
+// CHECK3-NEXT:    [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]]
 // CHECK3-NEXT:    [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]]
-// CHECK3-NEXT:    store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA25]]
+// CHECK3-NEXT:    store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]]
 // CHECK3-NEXT:    [[TMP2:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP2]]) #[[ATTR8]]
+// CHECK3-NEXT:    [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP2]]) #[[ATTR7]]
 // CHECK3-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1
-// CHECK3-NEXT:    [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA27:![0-9]+]]
+// CHECK3-NEXT:    [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]]
 // CHECK3-NEXT:    [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]]
-// CHECK3-NEXT:    store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA27]]
+// CHECK3-NEXT:    store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]]
 // CHECK3-NEXT:    ret %"class.std::complex.1"* [[THIS1]]
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@_ZStplIdESt7complexIT_ERKS2_S4_
-// CHECK3-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[__X:%.*]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[__Y:%.*]]) #[[ATTR6]] comdat {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[RETVAL:%.*]] = alloca %"class.std::complex.1", align 8
-// CHECK3-NEXT:    [[__T2:%.*]] = alloca %"class.std::complex.1", align 8
-// CHECK3-NEXT:    [[__X_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8
-// CHECK3-NEXT:    [[__Y_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5]])
-// CHECK3-NEXT:    [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB5]], i32 [[TMP0]])
-// CHECK3-NEXT:    [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0
-// CHECK3-NEXT:    [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR5]]
-// CHECK3-NEXT:    [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0
-// CHECK3-NEXT:    br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]]
-// CHECK3:       .spmd:
-// CHECK3-NEXT:    br label [[DOTEXIT:%.*]]
-// CHECK3:       .non-spmd:
-// CHECK3-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], i64 16, i64 512
-// CHECK3-NEXT:    [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[TMP5]], i16 0)
-// CHECK3-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty.4*
-// CHECK3-NEXT:    br label [[DOTEXIT]]
-// CHECK3:       .exit:
-// CHECK3-NEXT:    [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty.4* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ]
-// CHECK3-NEXT:    [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty.4* [[_SELECT_STACK]] to %struct._globalized_locals_ty.5*
-// CHECK3-NEXT:    [[__T:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[_SELECT_STACK]], i32 0, i32 0
-// CHECK3-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
-// CHECK3-NEXT:    [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31
-// CHECK3-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x %"class.std::complex.1"], [32 x %"class.std::complex.1"]* [[__T]], i32 0, i32 [[NVPTX_LANE_ID]]
-// CHECK3-NEXT:    [[__T1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP8]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], %"class.std::complex.1"* [[__T1]], %"class.std::complex.1"* [[TMP9]]
-// CHECK3-NEXT:    [[TMP11:%.*]] = select i1 [[TMP4]], %"class.std::complex.1"* [[__T2]], %"class.std::complex.1"* [[TMP10]]
-// CHECK3-NEXT:    store %"class.std::complex.1"* [[__X]], %"class.std::complex.1"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    store %"class.std::complex.1"* [[__Y]], %"class.std::complex.1"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[TMP12:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__X_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[TMP13:%.*]] = bitcast %"class.std::complex.1"* [[TMP11]] to i8*
-// CHECK3-NEXT:    [[TMP14:%.*]] = bitcast %"class.std::complex.1"* [[TMP12]] to i8*
-// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false), !tbaa.struct !24
-// CHECK3-NEXT:    [[TMP15:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__Y_ADDR]], align 8, !tbaa [[TBAA10]]
-// CHECK3-NEXT:    [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull dereferenceable(16) [[TMP11]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP15]]) #[[ATTR8]]
-// CHECK3-NEXT:    [[TMP16:%.*]] = bitcast %"class.std::complex.1"* [[RETVAL]] to i8*
-// CHECK3-NEXT:    [[TMP17:%.*]] = bitcast %"class.std::complex.1"* [[TMP11]] to i8*
-// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 16, i1 false), !tbaa.struct !24
-// CHECK3-NEXT:    br i1 [[TMP4]], label [[DOTEXIT4:%.*]], label [[DOTNON_SPMD3:%.*]]
-// CHECK3:       .non-spmd3:
-// CHECK3-NEXT:    [[TMP18:%.*]] = bitcast %struct._globalized_locals_ty.4* [[_SELECT_STACK]] to i8*
-// CHECK3-NEXT:    call void @__kmpc_data_sharing_pop_stack(i8* [[TMP18]])
-// CHECK3-NEXT:    br label [[DOTEXIT4]]
-// CHECK3:       .exit4:
-// CHECK3-NEXT:    [[TMP19:%.*]] = load %"class.std::complex.1", %"class.std::complex.1"* [[RETVAL]], align 8
-// CHECK3-NEXT:    ret %"class.std::complex.1" [[TMP19]]
-//
-//
 // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7
 // CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] {
 // CHECK3-NEXT:  entry:
@@ -4353,7 +3963,7 @@
 // CHECK3-NEXT:    [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.1"*
 // CHECK3-NEXT:    [[TMP54:%.*]] = bitcast %"class.std::complex.1"* [[TMP53]] to i8*
 // CHECK3-NEXT:    [[TMP55:%.*]] = bitcast %"class.std::complex.1"* [[TMP52]] to i8*
-// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !24
+// CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !27
 // CHECK3-NEXT:    br label [[IFCONT6:%.*]]
 // CHECK3:       else5:
 // CHECK3-NEXT:    br label [[IFCONT6]]
@@ -4460,11 +4070,11 @@
 // CHECK3-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA16]]
-// CHECK3-NEXT:    store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA19]]
+// CHECK3-NEXT:    store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA18]]
 // CHECK3-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA16]]
-// CHECK3-NEXT:    store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA21]]
+// CHECK3-NEXT:    store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]]
 // CHECK3-NEXT:    ret void
 //
 //
@@ -4475,7 +4085,7 @@
 // CHECK3-NEXT:    store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA19]]
+// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18]]
 // CHECK3-NEXT:    ret float [[TMP0]]
 //
 //
@@ -4486,7 +4096,7 @@
 // CHECK3-NEXT:    store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1
-// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA21]]
+// CHECK3-NEXT:    [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20]]
 // CHECK3-NEXT:    ret float [[TMP0]]
 //
 //
@@ -4503,11 +4113,11 @@
 // CHECK3-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0
 // CHECK3-NEXT:    [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]]
-// CHECK3-NEXT:    store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA25]]
+// CHECK3-NEXT:    store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]]
 // CHECK3-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1
 // CHECK3-NEXT:    [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]]
-// CHECK3-NEXT:    store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA27]]
+// CHECK3-NEXT:    store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]]
 // CHECK3-NEXT:    ret void
 //
 //
@@ -4518,7 +4128,7 @@
 // CHECK3-NEXT:    store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0
-// CHECK3-NEXT:    [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA25]]
+// CHECK3-NEXT:    [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]]
 // CHECK3-NEXT:    ret double [[TMP0]]
 //
 //
@@ -4529,6 +4139,6 @@
 // CHECK3-NEXT:    store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]]
 // CHECK3-NEXT:    [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8
 // CHECK3-NEXT:    [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1
-// CHECK3-NEXT:    [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA27]]
+// CHECK3-NEXT:    [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]]
 // CHECK3-NEXT:    ret double [[TMP0]]
 //
diff --git a/clang/test/OpenMP/parallel_for_reduction_messages.cpp b/clang/test/OpenMP/parallel_for_reduction_messages.cpp
--- a/clang/test/OpenMP/parallel_for_reduction_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_reduction_messages.cpp
@@ -301,7 +301,7 @@
 #pragma omp parallel for reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/parallel_for_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/parallel_for_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_simd_reduction_messages.cpp
@@ -293,7 +293,7 @@
 #pragma omp parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/parallel_master_reduction_messages.cpp b/clang/test/OpenMP/parallel_master_reduction_messages.cpp
--- a/clang/test/OpenMP/parallel_master_reduction_messages.cpp
+++ b/clang/test/OpenMP/parallel_master_reduction_messages.cpp
@@ -349,7 +349,7 @@
   {
     foo();
   }
-#pragma omp parallel master reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp parallel master reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   {
     foo();
   }
diff --git a/clang/test/OpenMP/parallel_master_taskloop_reduction_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_reduction_messages.cpp
--- a/clang/test/OpenMP/parallel_master_taskloop_reduction_messages.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_reduction_messages.cpp
@@ -312,7 +312,7 @@
 #pragma omp parallel master taskloop reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel master taskloop reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp parallel master taskloop reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel master taskloop reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp
@@ -312,7 +312,7 @@
 #pragma omp parallel master taskloop simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel master taskloop simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp parallel master taskloop simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel master taskloop simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/parallel_reduction_messages.cpp b/clang/test/OpenMP/parallel_reduction_messages.cpp
--- a/clang/test/OpenMP/parallel_reduction_messages.cpp
+++ b/clang/test/OpenMP/parallel_reduction_messages.cpp
@@ -245,7 +245,7 @@
   foo();
 #pragma omp parallel reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   foo();
-#pragma omp parallel reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp parallel reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   foo();
 #pragma omp parallel reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
   foo();
diff --git a/clang/test/OpenMP/parallel_sections_reduction_messages.cpp b/clang/test/OpenMP/parallel_sections_reduction_messages.cpp
--- a/clang/test/OpenMP/parallel_sections_reduction_messages.cpp
+++ b/clang/test/OpenMP/parallel_sections_reduction_messages.cpp
@@ -349,7 +349,7 @@
   {
     foo();
   }
-#pragma omp parallel sections reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp parallel sections reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   {
     foo();
   }
diff --git a/clang/test/OpenMP/reduction_compound_op.cpp b/clang/test/OpenMP/reduction_compound_op.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/OpenMP/reduction_compound_op.cpp
@@ -0,0 +1,2558 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
+//RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -DNORM \
+//RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix NORM
+
+//RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -DCOMP \
+//RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix COMP
+
+// Prefer compound operators since that is what the spec seems to say.
+//RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -DNORM -DCOMP \
+//RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix COMP
+
+//RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp-simd -DNORM \
+//RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix SIMD-ONLY
+
+//RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp-simd -DCOMP \
+//RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix SIMD-ONLY
+
+//RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp-simd -DNORM -DCOMP \
+//RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix SIMD-ONLY
+
+// SIMD-ONLY-NOT: {{__kmpc|__tgt}}
+
+struct Point {
+  int x = 0;
+  int y = 0;
+#if NORM
+  Point operator+(Point const &other) const;
+  Point operator-(Point const &other) const;
+  Point operator*(Point const &other) const;
+  Point operator&(Point const &other) const;
+  Point operator|(Point const &other) const;
+  Point operator^(Point const &other) const;
+#endif
+  Point operator&&(Point const &other) const;
+  Point operator||(Point const &other) const;
+  Point &operator=(Point const &other);
+#if COMP
+  Point &operator+=(Point const &other);
+  Point &operator*=(Point const &other);
+  Point &operator&=(Point const &other);
+  Point &operator|=(Point const &other);
+  Point &operator^=(Point const &other);
+#endif
+};
+
+void work(Point &P, int N, Point const *Points);
+
+void foo(int N, Point const *Points) {
+  Point Red;
+  #pragma omp parallel for reduction(+: Red)
+  for (unsigned I = 0; I < N; ++I)
+    work(Red, I, Points);
+
+  #pragma omp parallel for reduction(-: Red)
+  for (unsigned I = 0; I < N; ++I)
+    work(Red, I, Points);
+
+  #pragma omp parallel for reduction(*: Red)
+  for (unsigned I = 0; I < N; ++I)
+    work(Red, I, Points);
+
+  #pragma omp parallel for reduction(&: Red)
+  for (unsigned I = 0; I < N; ++I)
+    work(Red, I, Points);
+
+  #pragma omp parallel for reduction(|: Red)
+  for (unsigned I = 0; I < N; ++I)
+    work(Red, I, Points);
+
+  #pragma omp parallel for reduction(^: Red)
+  for (unsigned I = 0; I < N; ++I)
+    work(Red, I, Points);
+
+  #pragma omp parallel for reduction(&&: Red)
+  for (unsigned I = 0; I < N; ++I)
+    work(Red, I, Points);
+
+  #pragma omp parallel for reduction(||: Red)
+  for (unsigned I = 0; I < N; ++I)
+    work(Red, I, Points);
+}
+// NORM-LABEL: define {{[^@]+}}@_Z3fooiPK5Point
+// NORM-SAME: (i32 [[N:%.*]], %struct.Point* [[POINTS:%.*]]) #[[ATTR0:[0-9]+]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
+// NORM-NEXT:    store %struct.Point* [[POINTS]], %struct.Point** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED]]) #[[ATTR4:[0-9]+]]
+// NORM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// NORM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// NORM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// NORM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// NORM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// NORM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// NORM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// NORM-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@_ZN5PointC1Ev
+// NORM-SAME: (%struct.Point* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    store %struct.Point* [[THIS]], %struct.Point** [[THIS_ADDR]], align 8
+// NORM-NEXT:    [[THIS1:%.*]] = load %struct.Point*, %struct.Point** [[THIS_ADDR]], align 8
+// NORM-NEXT:    call void @_ZN5PointC2Ev(%struct.Point* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR4]]
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp_outlined.
+// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2:[0-9]+]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// NORM-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// NORM-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// NORM-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// NORM-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// NORM-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// NORM-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 0, i32* [[I]], align 4
+// NORM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// NORM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// NORM:       omp.precond.then:
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// NORM-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// NORM-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// NORM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// NORM-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// NORM:       cond.true:
+// NORM-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    br label [[COND_END:%.*]]
+// NORM:       cond.false:
+// NORM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    br label [[COND_END]]
+// NORM:       cond.end:
+// NORM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// NORM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// NORM:       omp.inner.for.cond:
+// NORM-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// NORM-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// NORM-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// NORM:       omp.inner.for.body:
+// NORM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// NORM-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// NORM-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// NORM-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// NORM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// NORM:       omp.body.continue:
+// NORM-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// NORM:       omp.inner.for.inc:
+// NORM-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// NORM-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// NORM:       omp.inner.for.end:
+// NORM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// NORM:       omp.loop.exit:
+// NORM-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// NORM-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// NORM-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// NORM-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// NORM-NEXT:    ]
+// NORM:       .omp.reduction.case1:
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.case2:
+// NORM-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// NORM-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// NORM-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.default:
+// NORM-NEXT:    br label [[OMP_PRECOND_END]]
+// NORM:       omp.precond.end:
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
+// NORM-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// NORM-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// NORM-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// NORM-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// NORM-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp_outlined..1
+// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// NORM-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// NORM-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// NORM-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// NORM-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// NORM-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// NORM-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 0, i32* [[I]], align 4
+// NORM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// NORM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// NORM:       omp.precond.then:
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// NORM-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// NORM-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// NORM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// NORM-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// NORM:       cond.true:
+// NORM-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    br label [[COND_END:%.*]]
+// NORM:       cond.false:
+// NORM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    br label [[COND_END]]
+// NORM:       cond.end:
+// NORM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// NORM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// NORM:       omp.inner.for.cond:
+// NORM-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// NORM-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// NORM-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// NORM:       omp.inner.for.body:
+// NORM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// NORM-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// NORM-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// NORM-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// NORM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// NORM:       omp.body.continue:
+// NORM-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// NORM:       omp.inner.for.inc:
+// NORM-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// NORM-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// NORM:       omp.inner.for.end:
+// NORM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// NORM:       omp.loop.exit:
+// NORM-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// NORM-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// NORM-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// NORM-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// NORM-NEXT:    ]
+// NORM:       .omp.reduction.case1:
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.case2:
+// NORM-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// NORM-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// NORM-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.default:
+// NORM-NEXT:    br label [[OMP_PRECOND_END]]
+// NORM:       omp.precond.end:
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
+// NORM-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// NORM-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// NORM-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// NORM-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// NORM-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp_outlined..3
+// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// NORM-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// NORM-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// NORM-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// NORM-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// NORM-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// NORM-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 0, i32* [[I]], align 4
+// NORM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// NORM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// NORM:       omp.precond.then:
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// NORM-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// NORM-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// NORM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// NORM-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// NORM:       cond.true:
+// NORM-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    br label [[COND_END:%.*]]
+// NORM:       cond.false:
+// NORM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    br label [[COND_END]]
+// NORM:       cond.end:
+// NORM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// NORM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// NORM:       omp.inner.for.cond:
+// NORM-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// NORM-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// NORM-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// NORM:       omp.inner.for.body:
+// NORM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// NORM-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// NORM-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// NORM-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// NORM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// NORM:       omp.body.continue:
+// NORM-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// NORM:       omp.inner.for.inc:
+// NORM-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// NORM-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// NORM:       omp.inner.for.end:
+// NORM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// NORM:       omp.loop.exit:
+// NORM-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// NORM-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// NORM-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// NORM-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// NORM-NEXT:    ]
+// NORM:       .omp.reduction.case1:
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.case2:
+// NORM-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointmlERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// NORM-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// NORM-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.default:
+// NORM-NEXT:    br label [[OMP_PRECOND_END]]
+// NORM:       omp.precond.end:
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.4
+// NORM-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// NORM-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// NORM-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// NORM-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// NORM-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp_outlined..5
+// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// NORM-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// NORM-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// NORM-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// NORM-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// NORM-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// NORM-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 0, i32* [[I]], align 4
+// NORM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// NORM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// NORM:       omp.precond.then:
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// NORM-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// NORM-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// NORM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// NORM-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// NORM:       cond.true:
+// NORM-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    br label [[COND_END:%.*]]
+// NORM:       cond.false:
+// NORM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    br label [[COND_END]]
+// NORM:       cond.end:
+// NORM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// NORM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// NORM:       omp.inner.for.cond:
+// NORM-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// NORM-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// NORM-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// NORM:       omp.inner.for.body:
+// NORM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// NORM-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// NORM-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// NORM-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// NORM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// NORM:       omp.body.continue:
+// NORM-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// NORM:       omp.inner.for.inc:
+// NORM-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// NORM-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// NORM:       omp.inner.for.end:
+// NORM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// NORM:       omp.loop.exit:
+// NORM-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// NORM-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// NORM-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// NORM-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// NORM-NEXT:    ]
+// NORM:       .omp.reduction.case1:
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.case2:
+// NORM-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointanERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// NORM-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// NORM-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.default:
+// NORM-NEXT:    br label [[OMP_PRECOND_END]]
+// NORM:       omp.precond.end:
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.6
+// NORM-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// NORM-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// NORM-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// NORM-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// NORM-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp_outlined..7
+// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// NORM-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// NORM-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// NORM-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// NORM-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// NORM-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// NORM-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 0, i32* [[I]], align 4
+// NORM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// NORM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// NORM:       omp.precond.then:
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// NORM-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// NORM-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// NORM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// NORM-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// NORM:       cond.true:
+// NORM-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    br label [[COND_END:%.*]]
+// NORM:       cond.false:
+// NORM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    br label [[COND_END]]
+// NORM:       cond.end:
+// NORM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// NORM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// NORM:       omp.inner.for.cond:
+// NORM-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// NORM-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// NORM-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// NORM:       omp.inner.for.body:
+// NORM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// NORM-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// NORM-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// NORM-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// NORM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// NORM:       omp.body.continue:
+// NORM-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// NORM:       omp.inner.for.inc:
+// NORM-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// NORM-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// NORM:       omp.inner.for.end:
+// NORM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// NORM:       omp.loop.exit:
+// NORM-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// NORM-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// NORM-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// NORM-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// NORM-NEXT:    ]
+// NORM:       .omp.reduction.case1:
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.case2:
+// NORM-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointorERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// NORM-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// NORM-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.default:
+// NORM-NEXT:    br label [[OMP_PRECOND_END]]
+// NORM:       omp.precond.end:
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.8
+// NORM-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// NORM-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// NORM-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// NORM-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// NORM-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp_outlined..9
+// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// NORM-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// NORM-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// NORM-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// NORM-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// NORM-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// NORM-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 0, i32* [[I]], align 4
+// NORM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// NORM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// NORM:       omp.precond.then:
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// NORM-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// NORM-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// NORM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// NORM-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// NORM:       cond.true:
+// NORM-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    br label [[COND_END:%.*]]
+// NORM:       cond.false:
+// NORM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    br label [[COND_END]]
+// NORM:       cond.end:
+// NORM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// NORM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// NORM:       omp.inner.for.cond:
+// NORM-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// NORM-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// NORM-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// NORM:       omp.inner.for.body:
+// NORM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// NORM-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// NORM-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// NORM-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// NORM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// NORM:       omp.body.continue:
+// NORM-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// NORM:       omp.inner.for.inc:
+// NORM-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// NORM-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// NORM:       omp.inner.for.end:
+// NORM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// NORM:       omp.loop.exit:
+// NORM-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// NORM-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// NORM-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// NORM-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// NORM-NEXT:    ]
+// NORM:       .omp.reduction.case1:
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.case2:
+// NORM-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointeoERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// NORM-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// NORM-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.default:
+// NORM-NEXT:    br label [[OMP_PRECOND_END]]
+// NORM:       omp.precond.end:
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.10
+// NORM-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// NORM-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// NORM-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// NORM-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// NORM-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp_outlined..11
+// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// NORM-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// NORM-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// NORM-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// NORM-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// NORM-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// NORM-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 0, i32* [[I]], align 4
+// NORM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// NORM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// NORM:       omp.precond.then:
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// NORM-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// NORM-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// NORM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// NORM-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// NORM:       cond.true:
+// NORM-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    br label [[COND_END:%.*]]
+// NORM:       cond.false:
+// NORM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    br label [[COND_END]]
+// NORM:       cond.end:
+// NORM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// NORM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// NORM:       omp.inner.for.cond:
+// NORM-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// NORM-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// NORM-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// NORM:       omp.inner.for.body:
+// NORM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// NORM-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// NORM-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// NORM-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// NORM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// NORM:       omp.body.continue:
+// NORM-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// NORM:       omp.inner.for.inc:
+// NORM-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// NORM-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// NORM:       omp.inner.for.end:
+// NORM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// NORM:       omp.loop.exit:
+// NORM-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// NORM-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// NORM-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// NORM-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// NORM-NEXT:    ]
+// NORM:       .omp.reduction.case1:
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.case2:
+// NORM-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// NORM-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// NORM-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.default:
+// NORM-NEXT:    br label [[OMP_PRECOND_END]]
+// NORM:       omp.precond.end:
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.12
+// NORM-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// NORM-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// NORM-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// NORM-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// NORM-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp_outlined..13
+// NORM-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// NORM-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// NORM-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[I:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// NORM-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// NORM-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// NORM-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// NORM-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// NORM-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// NORM-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// NORM-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// NORM-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 0, i32* [[I]], align 4
+// NORM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// NORM-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// NORM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// NORM:       omp.precond.then:
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// NORM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// NORM-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// NORM-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// NORM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// NORM-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// NORM:       cond.true:
+// NORM-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// NORM-NEXT:    br label [[COND_END:%.*]]
+// NORM:       cond.false:
+// NORM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    br label [[COND_END]]
+// NORM:       cond.end:
+// NORM-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// NORM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// NORM-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// NORM:       omp.inner.for.cond:
+// NORM-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// NORM-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// NORM-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// NORM-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// NORM:       omp.inner.for.body:
+// NORM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// NORM-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// NORM-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// NORM-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// NORM-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// NORM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// NORM:       omp.body.continue:
+// NORM-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// NORM:       omp.inner.for.inc:
+// NORM-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// NORM-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// NORM-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// NORM:       omp.inner.for.end:
+// NORM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// NORM:       omp.loop.exit:
+// NORM-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// NORM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// NORM-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// NORM-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// NORM-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// NORM-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// NORM-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// NORM-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// NORM-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// NORM-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// NORM-NEXT:    ]
+// NORM:       .omp.reduction.case1:
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// NORM-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.case2:
+// NORM-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// NORM-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// NORM-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// NORM-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// NORM-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// NORM-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// NORM-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// NORM-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// NORM:       .omp.reduction.default:
+// NORM-NEXT:    br label [[OMP_PRECOND_END]]
+// NORM:       omp.precond.end:
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.14
+// NORM-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// NORM-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// NORM-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// NORM-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// NORM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// NORM-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// NORM-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// NORM-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// NORM-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// NORM-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// NORM-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// NORM-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// NORM-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// NORM-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// NORM-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// NORM-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// NORM-NEXT:    ret void
+//
+//
+// NORM-LABEL: define {{[^@]+}}@_ZN5PointC2Ev
+// NORM-SAME: (%struct.Point* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// NORM-NEXT:  entry:
+// NORM-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Point*, align 8
+// NORM-NEXT:    store %struct.Point* [[THIS]], %struct.Point** [[THIS_ADDR]], align 8
+// NORM-NEXT:    [[THIS1:%.*]] = load %struct.Point*, %struct.Point** [[THIS_ADDR]], align 8
+// NORM-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_POINT:%.*]], %struct.Point* [[THIS1]], i32 0, i32 0
+// NORM-NEXT:    store i32 0, i32* [[X]], align 4
+// NORM-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_POINT]], %struct.Point* [[THIS1]], i32 0, i32 1
+// NORM-NEXT:    store i32 0, i32* [[Y]], align 4
+// NORM-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@_Z3fooiPK5Point
+// COMP-SAME: (i32 [[N:%.*]], %struct.Point* [[POINTS:%.*]]) #[[ATTR0:[0-9]+]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    store i32 [[N]], i32* [[N_ADDR]], align 4
+// COMP-NEXT:    store %struct.Point* [[POINTS]], %struct.Point** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED]]) #[[ATTR4:[0-9]+]]
+// COMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// COMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// COMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// COMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// COMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// COMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// COMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// COMP-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, %struct.Point*, %struct.Point**)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i32* [[N_ADDR]], %struct.Point* [[RED]], %struct.Point** [[POINTS_ADDR]])
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@_ZN5PointC1Ev
+// COMP-SAME: (%struct.Point* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    store %struct.Point* [[THIS]], %struct.Point** [[THIS_ADDR]], align 8
+// COMP-NEXT:    [[THIS1:%.*]] = load %struct.Point*, %struct.Point** [[THIS_ADDR]], align 8
+// COMP-NEXT:    call void @_ZN5PointC2Ev(%struct.Point* nonnull dereferenceable(8) [[THIS1]]) #[[ATTR4]]
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp_outlined.
+// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2:[0-9]+]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// COMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// COMP-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// COMP-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// COMP-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// COMP-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// COMP-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// COMP-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 0, i32* [[I]], align 4
+// COMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// COMP-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// COMP:       omp.precond.then:
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// COMP-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// COMP-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// COMP-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// COMP-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// COMP:       cond.true:
+// COMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    br label [[COND_END:%.*]]
+// COMP:       cond.false:
+// COMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    br label [[COND_END]]
+// COMP:       cond.end:
+// COMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// COMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// COMP:       omp.inner.for.cond:
+// COMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// COMP-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// COMP-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// COMP:       omp.inner.for.body:
+// COMP-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// COMP-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// COMP-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// COMP-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// COMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// COMP:       omp.body.continue:
+// COMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// COMP:       omp.inner.for.inc:
+// COMP-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// COMP-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// COMP:       omp.inner.for.end:
+// COMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// COMP:       omp.loop.exit:
+// COMP-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// COMP-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// COMP-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// COMP-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// COMP-NEXT:    ]
+// COMP:       .omp.reduction.case1:
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.case2:
+// COMP-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.default:
+// COMP-NEXT:    br label [[OMP_PRECOND_END]]
+// COMP:       omp.precond.end:
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
+// COMP-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// COMP-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// COMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// COMP-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp_outlined..1
+// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// COMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// COMP-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// COMP-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// COMP-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// COMP-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// COMP-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// COMP-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 0, i32* [[I]], align 4
+// COMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// COMP-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// COMP:       omp.precond.then:
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// COMP-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// COMP-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// COMP-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// COMP-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// COMP:       cond.true:
+// COMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    br label [[COND_END:%.*]]
+// COMP:       cond.false:
+// COMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    br label [[COND_END]]
+// COMP:       cond.end:
+// COMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// COMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// COMP:       omp.inner.for.cond:
+// COMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// COMP-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// COMP-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// COMP:       omp.inner.for.body:
+// COMP-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// COMP-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// COMP-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// COMP-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// COMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// COMP:       omp.body.continue:
+// COMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// COMP:       omp.inner.for.inc:
+// COMP-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// COMP-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// COMP:       omp.inner.for.end:
+// COMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// COMP:       omp.loop.exit:
+// COMP-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// COMP-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// COMP-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// COMP-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.2, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// COMP-NEXT:    ]
+// COMP:       .omp.reduction.case1:
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.case2:
+// COMP-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.default:
+// COMP-NEXT:    br label [[OMP_PRECOND_END]]
+// COMP:       omp.precond.end:
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.2
+// COMP-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// COMP-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// COMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// COMP-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointpLERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp_outlined..3
+// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// COMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// COMP-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// COMP-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// COMP-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// COMP-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// COMP-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// COMP-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 0, i32* [[I]], align 4
+// COMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// COMP-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// COMP:       omp.precond.then:
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// COMP-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// COMP-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// COMP-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// COMP-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// COMP:       cond.true:
+// COMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    br label [[COND_END:%.*]]
+// COMP:       cond.false:
+// COMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    br label [[COND_END]]
+// COMP:       cond.end:
+// COMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// COMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// COMP:       omp.inner.for.cond:
+// COMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// COMP-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// COMP-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// COMP:       omp.inner.for.body:
+// COMP-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// COMP-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// COMP-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// COMP-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// COMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// COMP:       omp.body.continue:
+// COMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// COMP:       omp.inner.for.inc:
+// COMP-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// COMP-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// COMP:       omp.inner.for.end:
+// COMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// COMP:       omp.loop.exit:
+// COMP-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// COMP-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// COMP-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// COMP-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.4, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// COMP-NEXT:    ]
+// COMP:       .omp.reduction.case1:
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointmLERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.case2:
+// COMP-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointmLERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.default:
+// COMP-NEXT:    br label [[OMP_PRECOND_END]]
+// COMP:       omp.precond.end:
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.4
+// COMP-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// COMP-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// COMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// COMP-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointmLERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp_outlined..5
+// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// COMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// COMP-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// COMP-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// COMP-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// COMP-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// COMP-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// COMP-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 0, i32* [[I]], align 4
+// COMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// COMP-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// COMP:       omp.precond.then:
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// COMP-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// COMP-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// COMP-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// COMP-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// COMP:       cond.true:
+// COMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    br label [[COND_END:%.*]]
+// COMP:       cond.false:
+// COMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    br label [[COND_END]]
+// COMP:       cond.end:
+// COMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// COMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// COMP:       omp.inner.for.cond:
+// COMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// COMP-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// COMP-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// COMP:       omp.inner.for.body:
+// COMP-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// COMP-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// COMP-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// COMP-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// COMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// COMP:       omp.body.continue:
+// COMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// COMP:       omp.inner.for.inc:
+// COMP-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// COMP-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// COMP:       omp.inner.for.end:
+// COMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// COMP:       omp.loop.exit:
+// COMP-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// COMP-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// COMP-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// COMP-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.6, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// COMP-NEXT:    ]
+// COMP:       .omp.reduction.case1:
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaNERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.case2:
+// COMP-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaNERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.default:
+// COMP-NEXT:    br label [[OMP_PRECOND_END]]
+// COMP:       omp.precond.end:
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.6
+// COMP-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// COMP-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// COMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// COMP-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaNERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp_outlined..7
+// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// COMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// COMP-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// COMP-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// COMP-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// COMP-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// COMP-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// COMP-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 0, i32* [[I]], align 4
+// COMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// COMP-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// COMP:       omp.precond.then:
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// COMP-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// COMP-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// COMP-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// COMP-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// COMP:       cond.true:
+// COMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    br label [[COND_END:%.*]]
+// COMP:       cond.false:
+// COMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    br label [[COND_END]]
+// COMP:       cond.end:
+// COMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// COMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// COMP:       omp.inner.for.cond:
+// COMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// COMP-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// COMP-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// COMP:       omp.inner.for.body:
+// COMP-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// COMP-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// COMP-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// COMP-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// COMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// COMP:       omp.body.continue:
+// COMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// COMP:       omp.inner.for.inc:
+// COMP-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// COMP-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// COMP:       omp.inner.for.end:
+// COMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// COMP:       omp.loop.exit:
+// COMP-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// COMP-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// COMP-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// COMP-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.8, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// COMP-NEXT:    ]
+// COMP:       .omp.reduction.case1:
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointoRERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.case2:
+// COMP-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointoRERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.default:
+// COMP-NEXT:    br label [[OMP_PRECOND_END]]
+// COMP:       omp.precond.end:
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.8
+// COMP-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// COMP-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// COMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// COMP-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointoRERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp_outlined..9
+// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// COMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// COMP-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// COMP-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// COMP-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// COMP-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// COMP-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// COMP-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 0, i32* [[I]], align 4
+// COMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// COMP-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// COMP:       omp.precond.then:
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// COMP-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// COMP-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// COMP-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// COMP-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// COMP:       cond.true:
+// COMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    br label [[COND_END:%.*]]
+// COMP:       cond.false:
+// COMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    br label [[COND_END]]
+// COMP:       cond.end:
+// COMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// COMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// COMP:       omp.inner.for.cond:
+// COMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// COMP-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// COMP-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// COMP:       omp.inner.for.body:
+// COMP-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// COMP-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// COMP-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// COMP-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// COMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// COMP:       omp.body.continue:
+// COMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// COMP:       omp.inner.for.inc:
+// COMP-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// COMP-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// COMP:       omp.inner.for.end:
+// COMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// COMP:       omp.loop.exit:
+// COMP-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// COMP-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// COMP-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// COMP-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.10, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// COMP-NEXT:    ]
+// COMP:       .omp.reduction.case1:
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointeOERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.case2:
+// COMP-NEXT:    [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointeOERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.default:
+// COMP-NEXT:    br label [[OMP_PRECOND_END]]
+// COMP:       omp.precond.end:
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.10
+// COMP-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// COMP-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// COMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// COMP-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// COMP-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointeOERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp_outlined..11
+// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// COMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// COMP-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// COMP-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// COMP-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// COMP-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// COMP-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// COMP-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// COMP-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// COMP-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 0, i32* [[I]], align 4
+// COMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// COMP-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// COMP:       omp.precond.then:
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// COMP-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// COMP-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// COMP-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// COMP-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// COMP:       cond.true:
+// COMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    br label [[COND_END:%.*]]
+// COMP:       cond.false:
+// COMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    br label [[COND_END]]
+// COMP:       cond.end:
+// COMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// COMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// COMP:       omp.inner.for.cond:
+// COMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// COMP-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// COMP-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// COMP:       omp.inner.for.body:
+// COMP-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// COMP-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// COMP-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// COMP-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// COMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// COMP:       omp.body.continue:
+// COMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// COMP:       omp.inner.for.inc:
+// COMP-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// COMP-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// COMP:       omp.inner.for.end:
+// COMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// COMP:       omp.loop.exit:
+// COMP-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// COMP-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// COMP-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// COMP-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.12, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// COMP-NEXT:    ]
+// COMP:       .omp.reduction.case1:
+// COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// COMP-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.case2:
+// COMP-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// COMP-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// COMP-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// COMP-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.default:
+// COMP-NEXT:    br label [[OMP_PRECOND_END]]
+// COMP:       omp.precond.end:
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.12
+// COMP-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// COMP-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// COMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// COMP-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// COMP-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// COMP-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// COMP-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp_outlined..13
+// COMP-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[N:%.*]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED:%.*]], %struct.Point** nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[N_ADDR:%.*]] = alloca i32*, align 8
+// COMP-NEXT:    [[RED_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    [[POINTS_ADDR:%.*]] = alloca %struct.Point**, align 8
+// COMP-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[I:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    [[I4:%.*]] = alloca i32, align 4
+// COMP-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+// COMP-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4
+// COMP-NEXT:    [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4
+// COMP-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
+// COMP-NEXT:    store i32* [[N]], i32** [[N_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point* [[RED]], %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    store %struct.Point** [[POINTS]], %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[N_ADDR]], align 8
+// COMP-NEXT:    [[TMP1:%.*]] = load %struct.Point*, %struct.Point** [[RED_ADDR]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load %struct.Point**, %struct.Point*** [[POINTS_ADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+// COMP-NEXT:    store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[SUB:%.*]] = sub i32 [[TMP4]], 0
+// COMP-NEXT:    [[DIV:%.*]] = udiv i32 [[SUB]], 1
+// COMP-NEXT:    [[SUB2:%.*]] = sub i32 [[DIV]], 1
+// COMP-NEXT:    store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 0, i32* [[I]], align 4
+// COMP-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4
+// COMP-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP5]]
+// COMP-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// COMP:       omp.precond.then:
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+// COMP-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// COMP-NEXT:    call void @_ZN5PointC1Ev(%struct.Point* nonnull dereferenceable(8) [[RED3]]) #[[ATTR4]]
+// COMP-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1)
+// COMP-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]]
+// COMP-NEXT:    br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// COMP:       cond.true:
+// COMP-NEXT:    [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4
+// COMP-NEXT:    br label [[COND_END:%.*]]
+// COMP:       cond.false:
+// COMP-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    br label [[COND_END]]
+// COMP:       cond.end:
+// COMP-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
+// COMP-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+// COMP-NEXT:    store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// COMP:       omp.inner.for.cond:
+// COMP-NEXT:    [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+// COMP-NEXT:    [[ADD:%.*]] = add i32 [[TMP15]], 1
+// COMP-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]]
+// COMP-NEXT:    br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// COMP:       omp.inner.for.body:
+// COMP-NEXT:    [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[MUL:%.*]] = mul i32 [[TMP16]], 1
+// COMP-NEXT:    [[ADD7:%.*]] = add i32 0, [[MUL]]
+// COMP-NEXT:    store i32 [[ADD7]], i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP17:%.*]] = load i32, i32* [[I4]], align 4
+// COMP-NEXT:    [[TMP18:%.*]] = load %struct.Point*, %struct.Point** [[TMP2]], align 8
+// COMP-NEXT:    call void @_Z4workR5PointiPKS_(%struct.Point* nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], %struct.Point* [[TMP18]])
+// COMP-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// COMP:       omp.body.continue:
+// COMP-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// COMP:       omp.inner.for.inc:
+// COMP-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    [[ADD8:%.*]] = add i32 [[TMP19]], 1
+// COMP-NEXT:    store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4
+// COMP-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// COMP:       omp.inner.for.end:
+// COMP-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// COMP:       omp.loop.exit:
+// COMP-NEXT:    [[TMP20:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
+// COMP-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP21]])
+// COMP-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// COMP-NEXT:    [[TMP23:%.*]] = bitcast %struct.Point* [[RED3]] to i8*
+// COMP-NEXT:    store i8* [[TMP23]], i8** [[TMP22]], align 8
+// COMP-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
+// COMP-NEXT:    [[TMP26:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+// COMP-NEXT:    [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, i8* [[TMP26]], void (i8*, i8*)* @.omp.reduction.reduction_func.14, [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+// COMP-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+// COMP-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+// COMP-NEXT:    ]
+// COMP:       .omp.reduction.case1:
+// COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    [[TMP28:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// COMP-NEXT:    store i64 [[CALL]], i64* [[TMP28]], align 4
+// COMP-NEXT:    [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// COMP-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* @[[GLOB2]], i32 [[TMP25]], [8 x i32]* @.gomp_critical_user_.reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.case2:
+// COMP-NEXT:    [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
+// COMP-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
+// COMP-NEXT:    call void @__kmpc_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[RED3]])
+// COMP-NEXT:    [[TMP31:%.*]] = bitcast %struct.Point* [[REF_TMP10]] to i64*
+// COMP-NEXT:    store i64 [[CALL11]], i64* [[TMP31]], align 4
+// COMP-NEXT:    [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP1]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP10]])
+// COMP-NEXT:    call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], [8 x i32]* @.gomp_critical_user_.atomic_reduction.var)
+// COMP-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+// COMP:       .omp.reduction.default:
+// COMP-NEXT:    br label [[OMP_PRECOND_END]]
+// COMP:       omp.precond.end:
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func.14
+// COMP-SAME: (i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) #[[ATTR5]] {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[DOTADDR:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
+// COMP-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4
+// COMP-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
+// COMP-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
+// COMP-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
+// COMP-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]*
+// COMP-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0
+// COMP-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8
+// COMP-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.Point*
+// COMP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP3]], i64 0, i64 0
+// COMP-NEXT:    [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8
+// COMP-NEXT:    [[TMP11:%.*]] = bitcast i8* [[TMP10]] to %struct.Point*
+// COMP-NEXT:    [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[TMP8]])
+// COMP-NEXT:    [[TMP12:%.*]] = bitcast %struct.Point* [[REF_TMP]] to i64*
+// COMP-NEXT:    store i64 [[CALL]], i64* [[TMP12]], align 4
+// COMP-NEXT:    [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) %struct.Point* @_ZN5PointaSERKS_(%struct.Point* nonnull dereferenceable(8) [[TMP11]], %struct.Point* nonnull align 4 dereferenceable(8) [[REF_TMP]])
+// COMP-NEXT:    ret void
+//
+//
+// COMP-LABEL: define {{[^@]+}}@_ZN5PointC2Ev
+// COMP-SAME: (%struct.Point* nonnull dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 {
+// COMP-NEXT:  entry:
+// COMP-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.Point*, align 8
+// COMP-NEXT:    store %struct.Point* [[THIS]], %struct.Point** [[THIS_ADDR]], align 8
+// COMP-NEXT:    [[THIS1:%.*]] = load %struct.Point*, %struct.Point** [[THIS_ADDR]], align 8
+// COMP-NEXT:    [[X:%.*]] = getelementptr inbounds [[STRUCT_POINT:%.*]], %struct.Point* [[THIS1]], i32 0, i32 0
+// COMP-NEXT:    store i32 0, i32* [[X]], align 4
+// COMP-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_POINT]], %struct.Point* [[THIS1]], i32 0, i32 1
+// COMP-NEXT:    store i32 0, i32* [[Y]], align 4
+// COMP-NEXT:    ret void
+//
diff --git a/clang/test/OpenMP/sections_reduction_messages.cpp b/clang/test/OpenMP/sections_reduction_messages.cpp
--- a/clang/test/OpenMP/sections_reduction_messages.cpp
+++ b/clang/test/OpenMP/sections_reduction_messages.cpp
@@ -401,7 +401,7 @@
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp sections reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   {
     foo();
   }
diff --git a/clang/test/OpenMP/simd_reduction_messages.cpp b/clang/test/OpenMP/simd_reduction_messages.cpp
--- a/clang/test/OpenMP/simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/simd_reduction_messages.cpp
@@ -296,7 +296,7 @@
 #pragma omp simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/target_parallel_for_reduction_messages.cpp b/clang/test/OpenMP/target_parallel_for_reduction_messages.cpp
--- a/clang/test/OpenMP/target_parallel_for_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_reduction_messages.cpp
@@ -303,7 +303,7 @@
 #pragma omp target parallel for reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp target parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp target parallel for reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_reduction_messages.cpp
@@ -305,7 +305,7 @@
 #pragma omp target parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp target parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp target parallel for simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/target_parallel_reduction_messages.cpp b/clang/test/OpenMP/target_parallel_reduction_messages.cpp
--- a/clang/test/OpenMP/target_parallel_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_reduction_messages.cpp
@@ -256,7 +256,7 @@
   foo();
 #pragma omp target parallel reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   foo();
-#pragma omp target parallel reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target parallel reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   foo();
 #pragma omp target parallel reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
   foo();
diff --git a/clang/test/OpenMP/target_reduction_messages.cpp b/clang/test/OpenMP/target_reduction_messages.cpp
--- a/clang/test/OpenMP/target_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_reduction_messages.cpp
@@ -253,7 +253,7 @@
   foo();
 #pragma omp target reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   foo();
-#pragma omp target reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   foo();
 #pragma omp target reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
   foo();
diff --git a/clang/test/OpenMP/target_simd_reduction_messages.cpp b/clang/test/OpenMP/target_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/target_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_simd_reduction_messages.cpp
@@ -303,7 +303,7 @@
 #pragma omp target simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp target simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp target simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_messages.cpp
@@ -240,7 +240,7 @@
   for (int j=0; j<100; j++) foo();
 #pragma omp target teams distribute parallel for reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
-#pragma omp target teams distribute parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target teams distribute parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target teams distribute parallel for reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_messages.cpp
@@ -241,7 +241,7 @@
   for (int j=0; j<100; j++) foo();
 #pragma omp target teams distribute parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
-#pragma omp target teams distribute parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target teams distribute parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target teams distribute parallel for simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
diff --git a/clang/test/OpenMP/target_teams_distribute_reduction_messages.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_messages.cpp
--- a/clang/test/OpenMP/target_teams_distribute_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_reduction_messages.cpp
@@ -245,7 +245,7 @@
   for (int j=0; j<100; j++) foo();
 #pragma omp target teams distribute reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
-#pragma omp target teams distribute reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target teams distribute reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target teams distribute reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_messages.cpp
@@ -241,7 +241,7 @@
   for (int j=0; j<100; j++) foo();
 #pragma omp target teams distribute simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
-#pragma omp target teams distribute simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target teams distribute simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target teams distribute simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
diff --git a/clang/test/OpenMP/target_teams_reduction_messages.cpp b/clang/test/OpenMP/target_teams_reduction_messages.cpp
--- a/clang/test/OpenMP/target_teams_reduction_messages.cpp
+++ b/clang/test/OpenMP/target_teams_reduction_messages.cpp
@@ -256,7 +256,7 @@
   foo();
 #pragma omp target teams reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   foo();
-#pragma omp target teams reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp target teams reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   foo();
 #pragma omp target teams reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
   foo();
diff --git a/clang/test/OpenMP/task_in_reduction_message.cpp b/clang/test/OpenMP/task_in_reduction_message.cpp
--- a/clang/test/OpenMP/task_in_reduction_message.cpp
+++ b/clang/test/OpenMP/task_in_reduction_message.cpp
@@ -287,7 +287,7 @@
   foo();
 #pragma omp task in_reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be in_reduction}}
   foo();
-#pragma omp task in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp task in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   foo();
 #pragma omp taskgroup task_reduction(+:k)
 #pragma omp task in_reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/taskgroup_task_reduction_messages.cpp b/clang/test/OpenMP/taskgroup_task_reduction_messages.cpp
--- a/clang/test/OpenMP/taskgroup_task_reduction_messages.cpp
+++ b/clang/test/OpenMP/taskgroup_task_reduction_messages.cpp
@@ -228,7 +228,7 @@
   foo();
 #pragma omp taskgroup task_reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be task_reduction}}
   foo();
-#pragma omp taskgroup task_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp taskgroup task_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   foo();
 #pragma omp taskgroup task_reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
   foo();
diff --git a/clang/test/OpenMP/taskloop_in_reduction_messages.cpp b/clang/test/OpenMP/taskloop_in_reduction_messages.cpp
--- a/clang/test/OpenMP/taskloop_in_reduction_messages.cpp
+++ b/clang/test/OpenMP/taskloop_in_reduction_messages.cpp
@@ -344,7 +344,7 @@
 #pragma omp taskloop in_reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be in_reduction}}
   for (int i = 0; i < 10; ++i)
   foo();
-#pragma omp taskloop in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp taskloop in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
   foo();
 #pragma omp taskgroup task_reduction(+:k)
diff --git a/clang/test/OpenMP/taskloop_reduction_messages.cpp b/clang/test/OpenMP/taskloop_reduction_messages.cpp
--- a/clang/test/OpenMP/taskloop_reduction_messages.cpp
+++ b/clang/test/OpenMP/taskloop_reduction_messages.cpp
@@ -312,7 +312,7 @@
 #pragma omp taskloop reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp taskloop reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp taskloop reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp taskloop reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/taskloop_simd_in_reduction_messages.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_messages.cpp
--- a/clang/test/OpenMP/taskloop_simd_in_reduction_messages.cpp
+++ b/clang/test/OpenMP/taskloop_simd_in_reduction_messages.cpp
@@ -344,7 +344,7 @@
 #pragma omp taskloop simd in_reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be in_reduction}}
   for (int i = 0; i < 10; ++i)
   foo();
-#pragma omp taskloop simd in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp taskloop simd in_reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
   foo();
 #pragma omp taskgroup task_reduction(+:k)
diff --git a/clang/test/OpenMP/taskloop_simd_reduction_messages.cpp b/clang/test/OpenMP/taskloop_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/taskloop_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/taskloop_simd_reduction_messages.cpp
@@ -312,7 +312,7 @@
 #pragma omp taskloop simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp taskloop simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp taskloop simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp taskloop simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_messages.cpp
@@ -284,7 +284,7 @@
 #pragma omp teams distribute parallel for reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target
-#pragma omp teams distribute parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp teams distribute parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target
 #pragma omp teams distribute parallel for reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_messages.cpp
@@ -284,7 +284,7 @@
 #pragma omp teams distribute parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target
-#pragma omp teams distribute parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp teams distribute parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target
 #pragma omp teams distribute parallel for simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/teams_distribute_reduction_messages.cpp b/clang/test/OpenMP/teams_distribute_reduction_messages.cpp
--- a/clang/test/OpenMP/teams_distribute_reduction_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_reduction_messages.cpp
@@ -290,7 +290,7 @@
 #pragma omp teams distribute reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target
-#pragma omp teams distribute reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp teams distribute reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target
 #pragma omp teams distribute reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/teams_distribute_simd_reduction_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_messages.cpp
--- a/clang/test/OpenMP/teams_distribute_simd_reduction_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_reduction_messages.cpp
@@ -284,7 +284,7 @@
 #pragma omp teams distribute simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target
-#pragma omp teams distribute simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp teams distribute simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   for (int j=0; j<100; j++) foo();
 #pragma omp target
 #pragma omp teams distribute simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
diff --git a/clang/test/OpenMP/teams_reduction_messages.cpp b/clang/test/OpenMP/teams_reduction_messages.cpp
--- a/clang/test/OpenMP/teams_reduction_messages.cpp
+++ b/clang/test/OpenMP/teams_reduction_messages.cpp
@@ -303,7 +303,7 @@
 #pragma omp teams reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+#pragma omp teams reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   foo();
 #pragma omp target
 #pragma omp teams reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}