Index: clang/lib/AST/StmtOpenMP.cpp =================================================================== --- clang/lib/AST/StmtOpenMP.cpp +++ clang/lib/AST/StmtOpenMP.cpp @@ -2374,6 +2374,10 @@ Dir->setNextLowerBound(Exprs.NLB); Dir->setNextUpperBound(Exprs.NUB); Dir->setNumIterations(Exprs.NumIterations); + Dir->setPrevLowerBoundVariable(Exprs.PrevLB); + Dir->setPrevUpperBoundVariable(Exprs.PrevUB); + Dir->setDistInc(Exprs.DistInc); + Dir->setPrevEnsureUpperBound(Exprs.PrevEUB); Dir->setCounters(Exprs.Counters); Dir->setPrivateCounters(Exprs.PrivateCounters); Dir->setInits(Exprs.Inits); @@ -2383,6 +2387,15 @@ Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); + Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); + Dir->setCombinedEnsureUpperBound(Exprs.DistCombinedFields.EUB); + Dir->setCombinedInit(Exprs.DistCombinedFields.Init); + Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); + Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); + Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } @@ -2418,6 +2431,10 @@ Dir->setNextLowerBound(Exprs.NLB); Dir->setNextUpperBound(Exprs.NUB); Dir->setNumIterations(Exprs.NumIterations); + Dir->setPrevLowerBoundVariable(Exprs.PrevLB); + Dir->setPrevUpperBoundVariable(Exprs.PrevUB); + Dir->setDistInc(Exprs.DistInc); + Dir->setPrevEnsureUpperBound(Exprs.PrevEUB); Dir->setCounters(Exprs.Counters); Dir->setPrivateCounters(Exprs.PrivateCounters); Dir->setInits(Exprs.Inits); @@ -2427,6 +2444,15 @@ Dir->setDependentInits(Exprs.DependentInits); Dir->setFinalsConditions(Exprs.FinalsConditions); Dir->setPreInits(Exprs.PreInits); + Dir->setCombinedLowerBoundVariable(Exprs.DistCombinedFields.LB); + Dir->setCombinedUpperBoundVariable(Exprs.DistCombinedFields.UB); + Dir->setCombinedEnsureUpperBound(Exprs.DistCombinedFields.EUB); + Dir->setCombinedInit(Exprs.DistCombinedFields.Init); + Dir->setCombinedCond(Exprs.DistCombinedFields.Cond); + Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB); + Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB); + Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond); + Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond); return Dir; } Index: clang/lib/Basic/OpenMPKinds.cpp =================================================================== --- clang/lib/Basic/OpenMPKinds.cpp +++ clang/lib/Basic/OpenMPKinds.cpp @@ -588,7 +588,9 @@ DKind == OMPD_teams_distribute_parallel_for_simd || DKind == OMPD_teams_distribute_parallel_for || DKind == OMPD_target_teams_distribute_parallel_for || - DKind == OMPD_target_teams_distribute_parallel_for_simd; + DKind == OMPD_target_teams_distribute_parallel_for_simd || + DKind == OMPD_parallel_loop || DKind == OMPD_teams_loop || + DKind == OMPD_target_parallel_loop || DKind == OMPD_target_teams_loop; } bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) { @@ -617,7 +619,8 @@ DKind == OMPD_parallel_master_taskloop_simd || DKind == OMPD_parallel_masked_taskloop || DKind == OMPD_parallel_masked_taskloop_simd || - DKind == OMPD_parallel_loop || DKind == OMPD_target_parallel_loop; + DKind == OMPD_parallel_loop || DKind == OMPD_target_parallel_loop || + DKind == OMPD_teams_loop; } bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) { @@ -714,7 +717,8 @@ Kind == OMPD_teams_distribute_parallel_for_simd || Kind == OMPD_teams_distribute_parallel_for || Kind == OMPD_target_teams_distribute_parallel_for || - Kind == OMPD_target_teams_distribute_parallel_for_simd; + Kind == OMPD_target_teams_distribute_parallel_for_simd || + Kind == OMPD_teams_loop || Kind == OMPD_target_teams_loop; } bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) { @@ -751,7 +755,6 @@ case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - case OMPD_target_teams_loop: CaptureRegions.push_back(OMPD_task); CaptureRegions.push_back(OMPD_target); CaptureRegions.push_back(OMPD_teams); @@ -766,6 +769,7 @@ CaptureRegions.push_back(OMPD_task); CaptureRegions.push_back(OMPD_target); break; + case OMPD_teams_loop: case OMPD_teams_distribute_parallel_for: case OMPD_teams_distribute_parallel_for_simd: CaptureRegions.push_back(OMPD_teams); @@ -800,6 +804,7 @@ CaptureRegions.push_back(OMPD_parallel); CaptureRegions.push_back(OMPD_taskloop); break; + case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: CaptureRegions.push_back(OMPD_task); @@ -807,9 +812,6 @@ CaptureRegions.push_back(OMPD_teams); CaptureRegions.push_back(OMPD_parallel); break; - case OMPD_teams_loop: - CaptureRegions.push_back(OMPD_teams); - break; case OMPD_nothing: CaptureRegions.push_back(OMPD_nothing); break; Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2778,7 +2778,7 @@ const StaticRTInput &Values) { OpenMPSchedType ScheduleNum = getRuntimeSchedule( ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); - assert(isOpenMPWorksharingDirective(DKind) && + assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && "Expected loop-based or sections-based directive."); llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, isOpenMPLoopDirective(DKind) @@ -6216,6 +6216,7 @@ DefaultVal = -1; return nullptr; } + case OMPD_target_teams_loop: case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: @@ -6235,12 +6236,14 @@ case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: + case OMPD_target_parallel_loop: case OMPD_target_simd: DefaultVal = 1; return nullptr; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_loop: case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: @@ -6457,6 +6460,8 @@ return ThreadLimit; } return nullptr; + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: @@ -6659,6 +6664,8 @@ getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal)) return NumThreads; return Bld.getInt32(0); + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: @@ -9315,7 +9322,8 @@ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - if (isOpenMPDistributeDirective(DKind)) + // For now, just treat 'target teams loop' as if it's distributed. + if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) return NestedDir; if (DKind == OMPD_teams) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( @@ -9798,7 +9806,8 @@ OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; // Get nested teams distribute kind directive, if any. - if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) + if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && + Kind != OMPD_target_teams_loop) TD = getNestedDistributeDirective(CGM.getContext(), D); if (!TD) return llvm::ConstantInt::get(CGF.Int64Ty, 0); @@ -10174,6 +10183,14 @@ CGM, ParentName, cast(E)); break; + case OMPD_target_teams_loop: + CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( + CGM, ParentName, cast(E)); + break; + case OMPD_target_parallel_loop: + CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( + CGM, ParentName, cast(E)); + break; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -655,6 +655,8 @@ case OMPD_target: case OMPD_target_teams: return hasNestedSPMDDirective(Ctx, D); + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -416,16 +416,19 @@ EmitOMPGenericLoopDirective(cast(*S)); break; case Stmt::OMPTeamsGenericLoopDirectiveClass: - llvm_unreachable("teams loop directive not supported yet."); + EmitOMPTeamsGenericLoopDirective(cast(*S)); break; case Stmt::OMPTargetTeamsGenericLoopDirectiveClass: - llvm_unreachable("target teams loop directive not supported yet."); + EmitOMPTargetTeamsGenericLoopDirective( + cast(*S)); break; case Stmt::OMPParallelGenericLoopDirectiveClass: - llvm_unreachable("parallel loop directive not supported yet."); + EmitOMPParallelGenericLoopDirective( + cast(*S)); break; case Stmt::OMPTargetParallelGenericLoopDirectiveClass: - llvm_unreachable("target parallel loop directive not supported yet."); + EmitOMPTargetParallelGenericLoopDirective( + cast(*S)); break; case Stmt::OMPParallelMaskedDirectiveClass: llvm_unreachable("parallel masked directive not supported yet."); Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -7807,6 +7807,148 @@ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); } +void CodeGenFunction::EmitOMPParallelGenericLoopDirective( + const OMPLoopDirective &S) { + // Emit combined directive as if its consituent constructs are 'parallel' + // and 'for'. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + emitOMPCopyinClause(CGF, S); + (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); + }; + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); +} + +void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( + const OMPTeamsGenericLoopDirective &S) { + // To be consistent with current behavior of 'target teams loop', emit + // 'teams loop' as if its constituent constructs are 'distribute, + // 'parallel, and 'for'. + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, + CodeGenDistribute); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); +} + +static void +emitTargetTeamsGenericLoopRegion(CodeGenFunction &CGF, + const OMPTargetTeamsGenericLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit 'teams loop' as if its constituent constructs are 'distribute, + // 'parallel, and 'for'. + auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; + + // Emit teams region as a standalone region. + auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitInlinedDirective( + CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); + }; + + emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, + CodeGenTeams); + emitPostUpdateForReductionClause(CGF, S, + [](CodeGenFunction &) { return nullptr; }); +} + +/// Emit combined directive 'target teams loop' as if its constituent +/// constructs are 'target', 'teams', 'distribute', 'parallel', and 'for'. +void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( + const OMPTargetTeamsGenericLoopDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsGenericLoopRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + +void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsGenericLoopDirective &S) { + // Emit SPMD target parallel loop region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsGenericLoopRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && + "Target device function emission failed for 'target teams loop'."); +} + +static void emitTargetParallelGenericLoopRegion( + CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, + PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit as 'parallel for'. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); +} + +void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelGenericLoopDirective &S) { + // Emit target parallel loop region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelGenericLoopRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +/// Emit combined directive 'target parallel loop' as if its constituent +/// constructs are 'target', 'parallel', and 'for'. +void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( + const OMPTargetParallelGenericLoopDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelGenericLoopRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + void CodeGenFunction::EmitSimpleOMPExecutableDirective( const OMPExecutableDirective &D) { if (const auto *SD = dyn_cast(&D)) { Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -3584,7 +3584,13 @@ void EmitOMPTargetTeamsDistributeSimdDirective( const OMPTargetTeamsDistributeSimdDirective &S); void EmitOMPGenericLoopDirective(const OMPGenericLoopDirective &S); + void EmitOMPParallelGenericLoopDirective(const OMPLoopDirective &S); + void EmitOMPTargetParallelGenericLoopDirective( + const OMPTargetParallelGenericLoopDirective &S); + void EmitOMPTargetTeamsGenericLoopDirective( + const OMPTargetTeamsGenericLoopDirective &S); void EmitOMPInteropDirective(const OMPInteropDirective &S); + void EmitOMPTeamsGenericLoopDirective(const OMPTeamsGenericLoopDirective &S); /// Emit device code for the target directive. static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, @@ -3623,6 +3629,16 @@ CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S); + /// Emit device code for the target teams loop directive. + static void EmitOMPTargetTeamsGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsGenericLoopDirective &S); + + /// Emit device code for the target parallel loop directive. + static void EmitOMPTargetParallelGenericLoopDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelGenericLoopDirective &S); + static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction( CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S); Index: clang/lib/Sema/SemaOpenMP.cpp =================================================================== --- clang/lib/Sema/SemaOpenMP.cpp +++ clang/lib/Sema/SemaOpenMP.cpp @@ -4196,7 +4196,6 @@ case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: - case OMPD_target_teams_loop: case OMPD_target_parallel_loop: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: { @@ -4450,6 +4449,7 @@ Params); break; } + case OMPD_target_teams_loop: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst(); @@ -4509,22 +4509,7 @@ break; } - case OMPD_teams_loop: { - QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst(); - QualType KmpInt32PtrTy = - Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); - - Sema::CapturedParamNameType ParamsTeams[] = { - std::make_pair(".global_tid.", KmpInt32PtrTy), - std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(StringRef(), QualType()) // __context with shared vars - }; - // Start a captured region for 'teams'. - ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsTeams, /*OpenMPCaptureLevel=*/0); - break; - } - + case OMPD_teams_loop: case OMPD_teams_distribute_parallel_for: case OMPD_teams_distribute_parallel_for_simd: { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1).withConst(); Index: clang/test/OpenMP/generic_loop_codegen.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/generic_loop_codegen.cpp @@ -0,0 +1,117 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR + +// Check same results after serialization round-trip +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-pch -o %t %s +// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR-PCH + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void foo(int t) { + + int i, j, z; + #pragma omp loop collapse(2) reduction(+:z) lastprivate(j) bind(thread) + for (int i = 0; i