Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -886,6 +886,11 @@ bool DynamicWithOrderedClause = Dynamic && S.getSingleClause(OMPC_ordered) != nullptr; SourceLocation Loc = S.getLocStart(); + // Generate !llvm.loop.parallel metadata for loads and stores for loops with + // dynamic/guided scheduling and without ordered clause. + LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic || + ScheduleKind == OMPC_SCHEDULE_guided) && + !DynamicWithOrderedClause); EmitOMPInnerLoop( S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false), S.getInc(), Index: test/OpenMP/for_codegen.cpp =================================================================== --- test/OpenMP/for_codegen.cpp +++ test/OpenMP/for_codegen.cpp @@ -41,6 +41,7 @@ // ... loop body ... // End of body: store into a[i]: // CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK-NOT: !llvm.mem.parallel_loop_access a[i] = b[i] * c[i] * d[i]; // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}} // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 @@ -81,6 +82,7 @@ // ... loop body ... // End of body: store into a[i]: // CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK-NOT: !llvm.mem.parallel_loop_access a[i] = b[i] * c[i] * d[i]; // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}} // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 @@ -129,6 +131,7 @@ // ... loop body ... // End of body: store into a[i]: // CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK-NOT: !llvm.mem.parallel_loop_access a[i] = b[i] * c[i] * d[i]; // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}} // CHECK-NEXT: [[ADD1_2:%.+]] = add i32 [[IV1_2]], 1 @@ -180,7 +183,7 @@ // CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]] // ... loop body ... // End of body: store into a[i]: -// CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK: store float [[RESULT:%.+]], float* {{%.+}}!llvm.mem.parallel_loop_access a[i] = b[i] * c[i] * d[i]; // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}} // CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1 @@ -221,7 +224,7 @@ // CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]] // ... loop body ... // End of body: store into a[i]: -// CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK: store float [[RESULT:%.+]], float* {{%.+}}!llvm.mem.parallel_loop_access a[i] = b[i] * c[i] * d[i]; // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}} // CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1 @@ -266,6 +269,7 @@ // ... loop body ... // End of body: store into a[i]: // CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK-NOT: !llvm.mem.parallel_loop_access a[i] = b[i] * c[i] * d[i]; // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}} // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1 @@ -307,6 +311,7 @@ // ... loop body ... // End of body: store into a[i]: // CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK-NOT: !llvm.mem.parallel_loop_access a[i] = b[i] * c[i] * d[i]; // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}} // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1 Index: test/OpenMP/ordered_codegen.cpp =================================================================== --- test/OpenMP/ordered_codegen.cpp +++ test/OpenMP/ordered_codegen.cpp @@ -39,6 +39,7 @@ // ... loop body ... // End of body: store into a[i]: // CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK-NOT: !llvm.mem.parallel_loop_access // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ... end of ordered region ... #pragma omp ordered @@ -86,6 +87,7 @@ // ... loop body ... // End of body: store into a[i]: // CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK-NOT: !llvm.mem.parallel_loop_access // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ... end of ordered region ... #pragma omp ordered @@ -139,6 +141,7 @@ // ... loop body ... // End of body: store into a[i]: // CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK-NOT: !llvm.mem.parallel_loop_access // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ... end of ordered region ... #pragma omp ordered @@ -189,6 +192,7 @@ // ... loop body ... // End of body: store into a[i]: // CHECK: store float [[RESULT:%.+]], float* {{%.+}} +// CHECK-NOT: !llvm.mem.parallel_loop_access // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // ... end of ordered region ... #pragma omp ordered