Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -279,6 +279,10 @@ /// llvm::Value *getCriticalRegionLock(StringRef CriticalName); + /// \brief Emit barrier call with the specified flags \a Flags. + virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPLocationFlags Flags); + public: explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} @@ -358,11 +362,17 @@ ArrayRef DstExprs, ArrayRef AssignmentOps); - /// \brief Emits explicit barrier for OpenMP threads. - /// \param IsExplicit true, if it is explicitly specified barrier. + /// \brief Emit an explicit barrier for OpenMP threads. /// - virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, - bool IsExplicit = true); + virtual void emitExplicitBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc); + + /// \brief Emit an implicit barrier for OpenMP threads. + /// \param Kind Directive for which this implicit barrier call must be + /// generated. + /// + virtual void emitImplicitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind = OMPD_unknown); /// \brief Check if the specified \a ScheduleKind is static non-chunked. /// This kind of worksharing directive is emitted without outer loop. Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -178,8 +178,8 @@ CGF.EmitOMPFirstprivateClause(Directive, PrivateScope); if (PrivateScope.Privatize()) // Emit implicit barrier to synchronize threads and avoid data races. - CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, Directive.getLocStart(), - /*IsExplicit=*/false); + CGF.CGM.getOpenMPRuntime().emitImplicitBarrierCall(CGF, + Directive.getLocStart()); CGCapturedStmtInfo::EmitBody(CGF, S); } @@ -1138,11 +1138,7 @@ } void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, - bool IsExplicit) { - // Build call __kmpc_cancel_barrier(loc, thread_id); - auto Flags = static_cast( - OMP_IDENT_KMPC | - (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL)); + OpenMPLocationFlags Flags) { // Build call __kmpc_cancel_barrier(loc, thread_id); // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this // one provides the same functionality and adds initial support for @@ -1154,6 +1150,34 @@ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); } +void CGOpenMPRuntime::emitExplicitBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc) { + // Build call __kmpc_cancel_barrier(loc, thread_id); + OpenMPLocationFlags Flags = + static_cast(OMP_IDENT_KMPC | OMP_IDENT_BARRIER_EXPL); + emitBarrierCall(CGF, Loc, Flags); +} + +void CGOpenMPRuntime::emitImplicitBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPDirectiveKind Kind) { + // Build call __kmpc_cancel_barrier(loc, thread_id); + OpenMPLocationFlags Flags = OMP_IDENT_KMPC; + if (Kind == OMPD_for) { + Flags = static_cast(Flags | + OMP_IDENT_BARRIER_IMPL_FOR); + } else if (Kind == OMPD_sections) { + Flags = static_cast(Flags | + OMP_IDENT_BARRIER_IMPL_SECTIONS); + } else if (Kind == OMPD_single) { + Flags = + static_cast(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE); + } else { + Flags = static_cast(Flags | OMP_IDENT_BARRIER_IMPL); + } + emitBarrierCall(CGF, Loc, Flags); +} + /// \brief Schedule types for 'omp for' loops (these enumerators are taken from /// the enum sched_type in kmp.h). enum OpenMPSchedType { Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -785,8 +785,9 @@ EmitOMPWorksharingLoop(S); // Emit an implicit barrier at the end. - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), - /*IsExplicit*/ false); + if (!S.getSingleClause(OMPC_nowait)) + CGM.getOpenMPRuntime().emitImplicitBarrierCall(*this, S.getLocStart(), + OMPD_for); } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) { @@ -887,8 +888,9 @@ // Emit an implicit barrier at the end. if (!S.getSingleClause(OMPC_nowait)) - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), - /*IsExplicit=*/false); + CGM.getOpenMPRuntime().emitImplicitBarrierCall( + *this, S.getLocStart(), + (CS && CS->size() > 1) ? OMPD_sections : OMPD_single); } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { @@ -928,8 +930,8 @@ }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); // Emit an implicit barrier at the end. if (!S.getSingleClause(OMPC_nowait)) - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), - /*IsExplicit=*/false); + CGM.getOpenMPRuntime().emitImplicitBarrierCall(*this, S.getLocStart(), + OMPD_single); } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { @@ -1001,7 +1003,7 @@ } void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart()); + CGM.getOpenMPRuntime().emitExplicitBarrierCall(*this, S.getLocStart()); } void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &) { Index: test/OpenMP/for_codegen.cpp =================================================================== --- test/OpenMP/for_codegen.cpp +++ test/OpenMP/for_codegen.cpp @@ -8,10 +8,11 @@ #define HEADER // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) void without_schedule_clause(float *a, float *b, float *c, float *d) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) - #pragma omp for + #pragma omp for nowait // CHECK: call void @__kmpc_for_static_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 34, i32* [[IS_LAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]], i32 1, i32 1) // UB = min(UB, GlobalUB) // CHECK-NEXT: [[UB:%.+]] = load i32, i32* [[OMP_UB]] @@ -44,7 +45,7 @@ } // CHECK: [[LOOP1_END]] // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK-NOT: __kmpc_cancel_barrier // CHECK: ret void } @@ -84,7 +85,7 @@ } // CHECK: [[LOOP1_END]] // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -143,7 +144,7 @@ // CHECK: [[O_LOOP1_END]] // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -184,7 +185,7 @@ } // CHECK: [[LOOP1_END]] // CHECK: [[O_LOOP1_END]] -// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -225,7 +226,7 @@ } // CHECK: [[LOOP1_END]] // CHECK: [[O_LOOP1_END]] -// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -269,7 +270,7 @@ } // CHECK: [[LOOP1_END]] // CHECK: [[O_LOOP1_END]] -// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // CHECK: ret void } @@ -310,7 +311,7 @@ } // CHECK: [[LOOP1_END]] // CHECK: [[O_LOOP1_END]] -// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) +// CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]]) // CHECK: ret void } Index: test/OpenMP/sections_codegen.cpp =================================================================== --- test/OpenMP/sections_codegen.cpp +++ test/OpenMP/sections_codegen.cpp @@ -5,7 +5,8 @@ #ifndef HEADER #define HEADER - +// CHECK: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8* +// CHECK: [[IMPLICIT_BARRIER_SINGLE_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8* // CHECK-LABEL: foo void foo() {}; // CHECK-LABEL: bar @@ -26,7 +27,7 @@ float l = 0.0; // Used as a base point in checks. // CHECK: [[GTID:%.+]] = call{{.*}} i32 @__kmpc_global_thread_num({{.*}}) // CHECK: store float -#pragma omp sections nowait +#pragma omp sections { // CHECK: store i32 0, i32* [[LB_PTR:%.+]], // CHECK: store i32 1, i32* [[UB_PTR:%.+]], @@ -71,6 +72,13 @@ // CHECK: [[INNER_LOOP_END]] } // CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SECTIONS_LOC]], +#pragma omp sections nowait + { + foo(); +#pragma omp section + bar(); + } // CHECK-NOT: __kmpc_cancel_barrier return tmain(); } @@ -87,7 +95,7 @@ // CHECK: call void @__kmpc_end_single( // CHECK-NEXT: br label %[[END]] // CHECK: [[END]] -// CHECK-NEXT: call i32 @__kmpc_cancel_barrier( +// CHECK-NEXT: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SINGLE_LOC]], // CHECK-NEXT: ret // CHECK: [[TERM_LPAD]] // CHECK: call void @__clang_call_terminate(i8* Index: test/OpenMP/single_codegen.cpp =================================================================== --- test/OpenMP/single_codegen.cpp +++ test/OpenMP/single_codegen.cpp @@ -18,6 +18,7 @@ // CHECK-DAG: [[TEST_CLASS_TY:%.+]] = type { i{{[0-9]+}} } // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK: [[IMPLICIT_BARRIER_SINGLE_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8* // CHECK: define void [[FOO:@.+]]() @@ -47,6 +48,7 @@ // CHECK-NEXT: call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) // CHECK-NEXT: br label {{%?}}[[EXIT]] // CHECK: [[EXIT]] +// CHECK-NOT: __kmpc_cancel_barrier #pragma omp single nowait a = 2; // CHECK: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) @@ -73,7 +75,7 @@ // CHECK: [[COPY_LIST_VOID_PTR:%.+]] = bitcast [3 x i8*]* [[COPY_LIST]] to i8* // CHECK: [[DID_IT_VAL:%.+]] = load i32, i32* [[DID_IT]], // CHECK: call void @__kmpc_copyprivate([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 24, i8* [[COPY_LIST_VOID_PTR]], void (i8*, i8*)* [[COPY_FUNC:@.+]], i32 [[DID_IT_VAL]]) -// CHECK: call{{.*}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* {{@.+}}, i32 [[GTID]]) +// CHECK: call{{.*}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_SINGLE_LOC]], i32 [[GTID]]) #pragma omp single copyprivate(a, c, tc) foo(); // CHECK-NOT: call i32 @__kmpc_single