Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -1490,6 +1490,12 @@ const VarDecl *NativeParam, const VarDecl *TargetParam) const; + /// Choose default schedule type and chunk value for the + /// dist_schedule clause. + virtual void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, + const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, + llvm::Value *&Chunk) const {} + /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. virtual void Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.h =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -340,6 +340,11 @@ /// void functionFinished(CodeGenFunction &CGF) override; + /// Choose a default value for the schedule clause. + void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF, + const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind, + llvm::Value *&Chunk) const override; + private: /// Track the execution mode when codegening directives within a target /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4081,3 +4081,15 @@ FunctionGlobalizedDecls.erase(CGF.CurFn); CGOpenMPRuntime::functionFinished(CGF); } + +void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk( + CodeGenFunction &CGF, const OMPLoopDirective &S, + OpenMPDistScheduleClauseKind &ScheduleKind, + llvm::Value *&Chunk) const { + if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) { + ScheduleKind = OMPC_DIST_SCHEDULE_static; + Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF), + CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + S.getIterationVariable()->getType(), S.getBeginLoc()); + } +} Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -3325,6 +3325,10 @@ S.getIterationVariable()->getType(), S.getBeginLoc()); } + } else { + // Default behaviour for dist_schedule clause. + CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( + *this, S, ScheduleKind, Chunk); } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); Index: test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp =================================================================== --- test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -35,7 +35,7 @@ l = i; } - #pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64) +#pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64) for(int i = 0; i < n; i++) { aa[i] += 1; } @@ -87,7 +87,7 @@ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: {{call|invoke}} void [[OUTL2:@.+]]( // CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_spmd_kernel_deinit() @@ -101,7 +101,7 @@ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: {{call|invoke}} void [[OUTL3:@.+]]( // CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_spmd_kernel_deinit() @@ -117,7 +117,7 @@ // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], // CHECK: {{call|invoke}} void [[OUTL4:@.+]]( // CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_spmd_kernel_deinit() Index: test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp =================================================================== --- test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -33,7 +33,7 @@ l = i; } - #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64) + #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64) for(int i = 0; i < n; i++) { aa[i] += 1; } @@ -82,7 +82,7 @@ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: {{call|invoke}} void [[OUTL2:@.+]]( // CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_spmd_kernel_deinit() @@ -96,7 +96,7 @@ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, // CHECK: {{call|invoke}} void [[OUTL3:@.+]]( // CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_spmd_kernel_deinit() @@ -112,7 +112,7 @@ // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0) // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align -// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], // CHECK: {{call|invoke}} void [[OUTL4:@.+]]( // CHECK: call void @__kmpc_for_static_fini( // CHECK: call void @__kmpc_spmd_kernel_deinit()