Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -282,9 +282,10 @@ } } -void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, - bool SeparateIter) { +void CodeGenFunction::EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, + const Expr *LoopCond, + const Expr *IncExpr, + const std::function &BodyGen) { auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); auto Cnt = getPGORegionCounter(&S); @@ -296,17 +297,13 @@ // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. auto ExitBlock = LoopExit.getBlock(); - if (LoopScope.requiresCleanups()) + if (RequiresCleanup) ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); auto LoopBody = createBasicBlock("omp.inner.for.body"); - // Emit condition: "IV < LastIteration + 1 [ - 1]" - // ("- 1" when lastprivate clause is present - separate one iteration). - llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter)); - Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, - PGO.createLoopWeights(S.getCond(SeparateIter), Cnt)); - + // Emit condition. + EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, Cnt.getCount()); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); EmitBranchThroughCleanup(LoopExit); @@ -319,12 +316,11 @@ auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); - EmitOMPLoopBody(S); - EmitStopPoint(&S); + BodyGen(); // Emit "IV = IV + 1" and a back-edge to the condition block. EmitBlock(Continue.getBlock()); - EmitIgnoredExpr(S.getInc()); + EmitIgnoredExpr(IncExpr); BreakContinueStack.pop_back(); EmitBranch(CondBlock); LoopStack.pop(); @@ -460,7 +456,12 @@ { OMPPrivateScope LoopScope(*this); EmitPrivateLoopCounters(*this, LoopScope, S.counters()); - EmitOMPInnerLoop(S, LoopScope, /* SeparateIter */ true); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/true), S.getInc(), + [&S, this]() { + EmitOMPLoopBody(S); + EmitStopPoint(&S); + }); EmitOMPLoopBody(S, /* SeparateIter */ true); } EmitOMPSimdFinal(S); @@ -471,7 +472,12 @@ { OMPPrivateScope LoopScope(*this); EmitPrivateLoopCounters(*this, LoopScope, S.counters()); - EmitOMPInnerLoop(S, LoopScope); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/false), S.getInc(), + [&S, this]() { + EmitOMPLoopBody(S); + EmitStopPoint(&S); + }); } EmitOMPSimdFinal(S); } @@ -543,7 +549,11 @@ auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); - EmitOMPInnerLoop(S, LoopScope); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/false), S.getInc(), [&S, this]() { + EmitOMPLoopBody(S); + EmitStopPoint(&S); + }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); @@ -638,7 +648,12 @@ // IV = LB; EmitIgnoredExpr(S.getInit()); // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(/*SeparateIter=*/false), S.getInc(), + [&S, this]() { + EmitOMPLoopBody(S); + EmitStopPoint(&S); + }); // Tell the runtime we are done. RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); } else { @@ -669,12 +684,108 @@ llvm_unreachable("CodeGen for 'omp for simd' is not supported yet."); } -void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &) { - llvm_unreachable("CodeGen for 'omp sections' is not supported yet."); +static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, + const Twine &Name, + llvm::Value *Init = nullptr) { + auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty); + if (Init) + CGF.EmitScalarInit(Init, LVal); + return LVal; +} + +void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { + InlinedOpenMPRegionScopeRAII Region(*this, S); + + auto *Stmt = cast(S.getAssociatedStmt())->getCapturedStmt(); + auto *CS = dyn_cast(Stmt); + if (CS && CS->size() > 1) { + auto &C = CGM.getContext(); + auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + // Emit helper vars inits. + LValue LB = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.lb.", + Builder.getInt32(0)); + auto *GlobalUBVal = Builder.getInt32(CS->size() - 1); + LValue UB = + createSectionLVal(*this, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal); + LValue ST = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.st.", + Builder.getInt32(1)); + LValue IL = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.il.", + Builder.getInt32(0)); + // Loop counter. + LValue IV = createSectionLVal(*this, KmpInt32Ty, ".omp.sections.iv."); + OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + OpaqueValueMapping OpaqueIV(*this, &IVRefExpr, IV); + OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue); + OpaqueValueMapping OpaqueUB(*this, &UBRefExpr, UB); + // Generate condition for loop. + BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, + OK_Ordinary, S.getLocStart(), /*fpContractable=*/false); + // Increment for loop counter. + UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, + S.getLocStart()); + auto BodyGen = [this, CS, &S, &IV]() { + // Iterate through all sections and emit a switch construct: + // switch (IV) { + // case 0: + // ; + // break; + // ... + // case - 1: + // - 1]>; + // break; + // } + // .omp.sections.exit: + auto *ExitBB = createBasicBlock(".omp.sections.exit"); + auto *SwitchStmt = Builder.CreateSwitch( + EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB, + CS->size()); + unsigned CaseNumber = 0; + for (auto C = CS->children(); C; ++C, ++CaseNumber) { + auto CaseBB = createBasicBlock(".omp.sections.case"); + EmitBlock(CaseBB); + SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); + EmitStmt(*C); + EmitBranch(ExitBB); + } + EmitBlock(ExitBB, /*IsFinished=*/true); + }; + // Emit static non-chunked loop. + CGM.getOpenMPRuntime().emitForInit( + *this, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32, + /*IVSigned=*/true, IL.getAddress(), LB.getAddress(), UB.getAddress(), + ST.getAddress()); + // UB = min(UB, GlobalUB); + auto *UBVal = EmitLoadOfScalar(UB, S.getLocStart()); + auto *MinUBGlobalUB = Builder.CreateSelect( + Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal); + EmitStoreOfScalar(MinUBGlobalUB, UB); + // IV = LB; + EmitStoreOfScalar(EmitLoadOfScalar(LB, S.getLocStart()), IV); + // while (idx <= UB) { BODY; ++idx; } + EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen); + // Tell the runtime we are done. + CGM.getOpenMPRuntime().emitForFinish(*this, S.getLocStart(), + OMPC_SCHEDULE_static); + } else { + // If only one section is found - no need to generate loop, emit as a single + // region. + CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { + InlinedOpenMPRegionScopeRAII Region(*this, S); + EmitStmt(Stmt); + EnsureInsertPoint(); + }, S.getLocStart()); + } + + // Emit an implicit barrier at the end. + if (!S.getSingleClause(OMPC_nowait)) + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), + /*IsExplicit=*/false); } -void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &) { - llvm_unreachable("CodeGen for 'omp section' is not supported yet."); +void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { + InlinedOpenMPRegionScopeRAII Region(*this, S); + EmitStmt(cast(S.getAssociatedStmt())->getCapturedStmt()); + EnsureInsertPoint(); } void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2094,8 +2094,9 @@ /// Helpers for the OpenMP loop directives. void EmitOMPLoopBody(const OMPLoopDirective &Directive, bool SeparateIter = false); - void EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope, - bool SeparateIter = false); + void EmitOMPInnerLoop(const Stmt &S, bool RequiresCleanup, + const Expr *LoopCond, const Expr *IncExpr, + const std::function &BodyGen); void EmitOMPSimdFinal(const OMPLoopDirective &S); void EmitOMPWorksharingLoop(const OMPLoopDirective &S); void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, Index: test/OpenMP/sections_codegen.cpp =================================================================== --- test/OpenMP/sections_codegen.cpp +++ test/OpenMP/sections_codegen.cpp @@ -0,0 +1,96 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -emit-llvm -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -include-pch %t -fsyntax-only -verify %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// CHECK-LABEL: foo +void foo() {}; +// CHECK-LABEL: bar +void bar() {}; + +template +T tmain() { +#pragma omp parallel +#pragma omp sections + { + foo(); + } + return T(); +} + +// CHECK-LABEL: @main +int main() { + float l = 0.0; // Used as a base point in checks. +// CHECK: [[GTID:%.+]] = call{{.*}} i32 @__kmpc_global_thread_num({{.*}}) +// CHECK: store float +#pragma omp sections nowait + { +// CHECK: store i32 0, i32* [[LB_PTR:%.+]], +// CHECK: store i32 1, i32* [[UB_PTR:%.+]], +// CHECK: call void @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_PTR:%.+]], i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[STRIDE_PTR:%.+]], i32 1, i32 1) +// <> +// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]] +// CHECK: [[CMP:%.+]] = icmp slt i32 [[UB]], 1 +// CHECK: [[MIN_UB_GLOBALUB:%.+]] = select i1 [[CMP]], i32 [[UB]], i32 1 +// CHECK: store i32 [[MIN_UB_GLOBALUB]], i32* [[UB_PTR]] +// <> +// CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]] +// CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]] +// CHECK: br label %[[INNER_FOR_COND:.+]] +// CHECK: [[INNER_FOR_COND]] +// <> +// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]] +// CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]] +// CHECK: [[CMP:%.+]] = icmp sle i32 [[IV]], [[UB]] +// CHECK: br i1 [[CMP]], label %[[INNER_LOOP_BODY:.+]], label %[[INNER_LOOP_END:.+]] +// CHECK: [[INNER_LOOP_BODY]] +// <> - > +// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]] +// CHECK: switch i32 [[IV]], label %[[SECTIONS_EXIT:.+]] [ +// CHECK-NEXT: i32 0, label %[[SECTIONS_CASE0:.+]] +// CHECK-NEXT: i32 1, label %[[SECTIONS_CASE1:.+]] +#pragma omp section +// CHECK: [[SECTIONS_CASE0]] +// CHECK-NEXT: invoke void @{{.*}}foo{{.*}}() +// CHECK: br label %[[SECTIONS_EXIT]] + foo(); +#pragma omp section +// CHECK: [[SECTIONS_CASE1]] +// CHECK-NEXT: invoke void @{{.*}}bar{{.*}}() +// CHECK: br label %[[SECTIONS_EXIT]] + bar(); +// CHECK: [[SECTIONS_EXIT]] +// <<++IV;>> +// CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]] +// CHECK-NEXT: [[INC:%.+]] = add nsw i32 [[IV]], 1 +// CHECK-NEXT: store i32 [[INC]], i32* [[IV_PTR]] +// CHECK-NEXT: br label %[[INNER_FOR_COND]] +// CHECK: [[INNER_LOOP_END]] + } +// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) +// CHECK-NOT: __kmpc_cancel_barrier + return tmain(); +} + +// CHECK-LABEL: tmain +// CHECK: call void {{.*}} @__kmpc_fork_call( +// CHECK-NOT: __kmpc_global_thread_num +// CHECK: [[RES:%.+]] = call i32 @__kmpc_single( +// CHECK-NEXT: [[BOOLRES:%.+]] = icmp ne i32 [[RES]], 0 +// CHECK-NEXT: br i1 [[BOOLRES]], label %[[THEN:.+]], label %[[END:.+]] +// CHECK: [[THEN]] +// CHECK-NEXT: invoke void @{{.*}}foo{{.*}}() +// CHECK-NEXT: unwind label %[[TERM_LPAD:.+]] +// CHECK: call void @__kmpc_end_single( +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] +// CHECK-NEXT: call i32 @__kmpc_cancel_barrier( +// CHECK-NEXT: ret +// CHECK: [[TERM_LPAD]] +// CHECK: call void @__clang_call_terminate(i8* +// CHECK-NEXT: unreachable + +#endif