diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -3500,7 +3500,8 @@ return; if (auto *VD = dyn_cast(E->getDecl())) { // Check the datasharing rules for the expressions in the clauses. - if (!CS) { + if (!CS || (isa(VD) && !CS->capturesVariable(VD) && + !Stack->getTopDSA(VD, /*FromParent=*/false).RefExpr)) { if (auto *CED = dyn_cast(VD)) if (!CED->hasAttr()) { Visit(CED->getInit()); @@ -3819,6 +3820,10 @@ } void VisitOMPExecutableDirective(OMPExecutableDirective *S) { for (OMPClause *C : S->clauses()) { + // Skip analysis of arguments of private clauses for task|target + // directives. + if (isa_and_nonnull(C)) + continue; // Skip analysis of arguments of implicitly defined firstprivate clause // for task|target directives. // Skip analysis of arguments of implicitly defined map clause for target @@ -3841,6 +3846,15 @@ VisitStmt(S); } + void VisitCallExpr(CallExpr *S) { + for (Stmt *C : S->arguments()) { + if (C) { + // Check implicitly captured variables in the task-based directives to + // check if they must be firstprivatized. + Visit(C); + } + } + } void VisitStmt(Stmt *S) { for (Stmt *C : S->children()) { if (C) { diff --git a/clang/test/Analysis/cfg-openmp.cpp b/clang/test/Analysis/cfg-openmp.cpp --- a/clang/test/Analysis/cfg-openmp.cpp +++ b/clang/test/Analysis/cfg-openmp.cpp @@ -598,19 +598,19 @@ void tdpf(int argc) { int x, cond, fp, rd, lin, step, map; // CHECK-DAG: [B1] -// CHECK-DAG: [[#TDPF:]]: cond -// CHECK-DAG: [[#TDPF+1]]: [B1.[[#TDPF]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-DAG: [[#TDPF:]]: [B1.{{.+}}] +// CHECK-DAG: [[#TDPF+1]]: [B1.[[#TDPF+6]]] (ImplicitCastExpr, LValueToRValue, int) // CHECK-DAG: [[#TDPF+2]]: [B1.[[#TDPF+1]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-DAG: [[#TDPF+3]]: [B1.[[#TDPF+6]]] -// CHECK-DAG: [[#TDPF+4]]: [B1.[[#TDPF+7]]] +// CHECK-DAG: [[#TDPF+3]]: [B1.[[#TDPF+7]]] +// CHECK-DAG: [[#TDPF+4]]: [B1.[[#TDPF+8]]] // CHECK-DAG: [[#TDPF+5]]: #pragma omp teams distribute parallel for if(cond) firstprivate(fp) reduction(+: rd) // CHECK-DAG: for (int i = 0; // CHECK-DAG: [B3.[[#TDPFB:]]]; -// CHECK-DAG: [[#TDPF+6]]: fp -// CHECK-DAG: [[#TDPF+7]]: rd -// CHECK-DAG: [[#TDPF+8]]: argc -// CHECK-DAG: [[#TDPF+9]]: x -// CHECK-DAG: [[#TDPF+10]]: cond +// CHECK-DAG: [[#TDPF+6]]: cond +// CHECK-DAG: [[#TDPF+7]]: fp +// CHECK-DAG: [[#TDPF+8]]: rd +// CHECK-DAG: [[#TDPF+9]]: argc +// CHECK-DAG: [[#TDPF+10]]: x // CHECK-DAG: [[#TDPF+11]]: #pragma omp target // CHECK-DAG: [B3] // CHECK-DAG: [[#TDPFB-3]]: x @@ -627,19 +627,19 @@ void tdpfs(int argc) { int x, cond, fp, rd, lin, step, map; // CHECK-DAG: [B1] -// CHECK-DAG: [[#TDPFS:]]: cond -// CHECK-DAG: [[#TDPFS+1]]: [B1.[[#TDPFS]]] (ImplicitCastExpr, LValueToRValue, int) +// CHECK-DAG: [[#TDPFS:]]: [B1.{{.+}}] +// CHECK-DAG: [[#TDPFS+1]]: [B1.[[#TDPFS+6]]] (ImplicitCastExpr, LValueToRValue, int) // CHECK-DAG: [[#TDPFS+2]]: [B1.[[#TDPFS+1]]] (ImplicitCastExpr, IntegralToBoolean, _Bool) -// CHECK-DAG: [[#TDPFS+3]]: [B1.[[#TDPFS+6]]] -// CHECK-DAG: [[#TDPFS+4]]: [B1.[[#TDPFS+7]]] +// CHECK-DAG: [[#TDPFS+3]]: [B1.[[#TDPFS+7]]] +// CHECK-DAG: [[#TDPFS+4]]: [B1.[[#TDPFS+8]]] // CHECK-DAG: [[#TDPFS+5]]: #pragma omp teams distribute parallel for simd if(cond) firstprivate(fp) reduction(+: rd) // CHECK-DAG: for (int i = 0; // CHECK-DAG: [B3.[[#TDPFSB:]]]; -// CHECK-DAG: [[#TDPFS+6]]: fp -// CHECK-DAG: [[#TDPFS+7]]: rd -// CHECK-DAG: [[#TDPFS+8]]: argc -// CHECK-DAG: [[#TDPFS+9]]: x -// CHECK-DAG: [[#TDPFS+10]]: cond +// CHECK-DAG: [[#TDPFS+6]]: cond +// CHECK-DAG: [[#TDPFS+7]]: fp +// CHECK-DAG: [[#TDPFS+8]]: rd +// CHECK-DAG: [[#TDPFS+9]]: argc +// CHECK-DAG: [[#TDPFS+10]]: x // CHECK-DAG: [[#TDPFS+11]]: #pragma omp target // CHECK-DAG: [B3] // CHECK-DAG: [[#TDPFSB-3]]: x diff --git a/clang/test/OpenMP/task_target_device_codegen.c b/clang/test/OpenMP/task_target_device_codegen.c new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/task_target_device_codegen.c @@ -0,0 +1,105 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[ .].+[.|,]" --prefix-filecheck-ir-name _ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-version=50 -x c -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +void test_task_affinity() { + int t; +#pragma omp task + { +#pragma omp target device(t) + ; + } +} +#endif +// CHECK-LABEL: define {{[^@]+}}@test_task_affinity +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[T:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct.kmp_task_t_with_privates* +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP2]], i32 0, i32 1 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[T]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], i32* [[TMP5]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i8* [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_task_affinity_l18 +// CHECK-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// CHECK-SAME: (%struct..kmp_privates.t* noalias [[TMP0:%.*]], i32** noalias [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP4:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32* [[TMP3]], i32** [[TMP4]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry. +// CHECK-SAME: (i32 [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTCAPTURE_EXPR__I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* +// CHECK-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK-NEXT: store i32 [[TMP17]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !12 +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_task_affinity_l18() #[[ATTR4]] +// CHECK-NEXT: ret i32 0 +// diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -238,8 +238,8 @@ // CHECK-LABEL: taskloop_with_class void taskloop_with_class() { St s1; - // CHECK: [[TD:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 [[GTID:%.+]], i32 1, i64 88, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[TD_TYPE:%.+]]*)* @{{.+}} to i32 (i32, i8*)*)) - // CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* [[TD]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[TD_TYPE]]*, [[TD_TYPE]]*, i32)* @{{.+}} to i8*)) + // CHECK: [[TD:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 [[GTID:%.+]], i32 1, i64 80, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[TD_TYPE:%.+]]*)* @{{.+}} to i32 (i32, i8*)*)) + // CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* [[TD]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) #pragma omp taskloop for (St s = St(); s < s1; s += 1) { }