Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -951,7 +951,39 @@ return CGF.EmitLValue(Helper); } -bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { +static llvm::Value *emitScheduleClause(CodeGenFunction &CGF, + const OMPLoopDirective &S) { + // Detect the loop schedule kind and chunk. + auto ScheduleKind = OMPC_SCHEDULE_unknown; + llvm::Value *Chunk = nullptr; + if (auto *C = + cast_or_null(S.getSingleClause(OMPC_schedule))) { + ScheduleKind = C->getScheduleKind(); + if (auto *Ch = C->getChunkSize()) { + Chunk = CGF.EmitScalarExpr(Ch); + Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(), + S.getIterationVariable()->getType()); + } + } + if (Chunk && !isa(Chunk)) { + auto *LoopChunk = new llvm::GlobalVariable( + CGF.CGM.getModule(), Chunk->getType(), /*isConstant=*/false, + llvm::GlobalValue::PrivateLinkage, + llvm::Constant::getNullValue(Chunk->getType())); + auto Alignment = + CGF.CGM.getContext() + .getTypeAlignInChars(S.getIterationVariable()->getType()) + .getQuantity(); + LoopChunk->setAlignment(Alignment); + LoopChunk->setUnnamedAddr(true); + CGF.Builder.CreateAlignedStore(Chunk, LoopChunk, Alignment); + return LoopChunk; + } + return Chunk; +} + +bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S, + llvm::Value *Chunk) { // Emit the loop iteration variable. auto IVExpr = cast(S.getIterationVariable()); auto IVDecl = cast(IVExpr->getDecl()); @@ -1014,14 +1046,12 @@ // Detect the loop schedule kind and chunk. auto ScheduleKind = OMPC_SCHEDULE_unknown; - llvm::Value *Chunk = nullptr; if (auto C = cast_or_null( S.getSingleClause(OMPC_schedule))) { ScheduleKind = C->getScheduleKind(); - if (auto Ch = C->getChunkSize()) { - Chunk = EmitScalarExpr(Ch); - Chunk = EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType()); + if (Chunk && Chunk->getType()->isPointerTy()) { + Chunk = Builder.CreateAlignedLoad( + Chunk, cast(Chunk)->getAlignment()); } } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); @@ -1075,8 +1105,9 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { LexicalScope Scope(*this, S.getSourceRange()); bool HasLastprivates = false; - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + auto *Chunk = emitScheduleClause(*this, S); + auto &&CodeGen = [&S, &HasLastprivates, Chunk](CodeGenFunction &CGF) { + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, Chunk); }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); @@ -1329,8 +1360,9 @@ // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CGF.EmitOMPWorksharingLoop(S); + auto *Chunk = emitScheduleClause(*this, S); + auto &&CodeGen = [&S, Chunk](CodeGenFunction &CGF) { + CGF.EmitOMPWorksharingLoop(S, Chunk); // Emit implicit barrier at the end of parallel region, but this barrier // is at the end of 'for' directive, so emit it as the implicit barrier for // this 'for' directive. Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2193,9 +2193,12 @@ bool SeparateIter = false); void EmitOMPSimdFinal(const OMPLoopDirective &S); /// \brief Emit code for the worksharing loop-based directive. + /// \param Chunk Chunk value for 'schedule' clause. May be a value (if it is a + /// constant), pointer to global variable with calculated schedule (if it is + /// not a constant) or nullptr (if not specified). /// \return true, if this construct has any lastprivate clause, false - /// otherwise. - bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); + bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, llvm::Value *Chunk); void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, llvm::Value *LB, Index: test/OpenMP/for_codegen.cpp =================================================================== --- test/OpenMP/for_codegen.cpp +++ test/OpenMP/for_codegen.cpp @@ -12,6 +12,21 @@ // CHECK-DAG: [[I:@.+]] = global i8 1, // CHECK-DAG: [[J:@.+]] = global i8 2, // CHECK-DAG: [[K:@.+]] = global i8 3, +// CHECK-DAG: [[CHUNK:@.+]] = {{.+}}global i64 + +// CHECK-LABEL: with_var_schedule +void with_var_schedule() { + int a = 5; +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]]) +// CHECK: store i64 %{{.+}}, i64* [[CHUNK]] +// CHECK: [[CHUNK_SIZE:%.+]] = load i64, i64* [[CHUNK]], +// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]]) +// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: __kmpc_cancel_barrier +#pragma omp for schedule(static, a) + for (unsigned long long i = 1; i < 2; ++i) { + } +} // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) void without_schedule_clause(float *a, float *b, float *c, float *d) { Index: test/OpenMP/parallel_for_codegen.cpp =================================================================== --- test/OpenMP/parallel_for_codegen.cpp +++ test/OpenMP/parallel_for_codegen.cpp @@ -8,6 +8,22 @@ #define HEADER // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[CHUNK:@.+]] = {{.+}}global i64 + +// CHECK-LABEL: with_var_schedule +void with_var_schedule() { + int a = 5; +// CHECK: store i64 %{{.+}}, i64* [[CHUNK]] +// CHECK: call void {{.+}} @__kmpc_fork_call( +// CHECK: [[CHUNK_SIZE:%.+]] = load i64, i64* [[CHUNK]], +// CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC:@[^,]+]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]]) +// CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) +// CHECK: __kmpc_cancel_barrier +#pragma omp parallel for schedule(static, a) + for (unsigned long long i = 1; i < 2; ++i) { + } +} + // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) void without_schedule_clause(float *a, float *b, float *c, float *d) { #pragma omp parallel for