diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -2343,7 +2343,7 @@ Selector.Kind) && "Ill-formed construct selector!"); - VMI.ConstructTraits.push_back(Selector.Properties.front().Kind); + // VMI.ConstructTraits.push_back(Selector.Properties.front().Kind); } } } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -310,6 +310,8 @@ /// Vector of previously encountered target directives SmallVector TargetLocations; SourceLocation AtomicLocation; + /// Vector of declare variant construct traits. + SmallVector ConstructTraits; public: explicit DSAStackTy(Sema &S) : SemaRef(S) {} @@ -726,6 +728,14 @@ OMPC_DEFAULTMAP_MODIFIER_unknown; } + ArrayRef getConstructTraits() { + return ConstructTraits; + } + void addConstructTrait(llvm::omp::TraitProperty Property) { + ConstructTraits.push_back(Property); + } + void eraseConstructTrait() { ConstructTraits.clear(); } + DefaultDataSharingAttributes getDefaultDSA(unsigned Level) const { return getStackSize() <= Level ? DSA_unspecified : getStackElemAtLevel(Level).DefaultAttr; @@ -3871,6 +3881,23 @@ }; } // namespace +static void addDeclareVariantConstructTrait(DSAStackTy *Stack, + OpenMPDirectiveKind DKind) { + if (isOpenMPTargetExecutionDirective(DKind)) + Stack->addConstructTrait(llvm::omp::TraitProperty::construct_target_target); + if (isOpenMPTeamsDirective(DKind)) + Stack->addConstructTrait(llvm::omp::TraitProperty::construct_teams_teams); + if (isOpenMPParallelDirective(DKind)) + Stack->addConstructTrait( + llvm::omp::TraitProperty::construct_parallel_parallel); + // TODO check this! + if (isOpenMPWorksharingDirective(DKind)) { + Stack->addConstructTrait(llvm::omp::TraitProperty::construct_for_for); + } + if (isOpenMPSimdDirective(DKind)) + Stack->addConstructTrait(llvm::omp::TraitProperty::construct_simd_simd); +} + void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { switch (DKind) { case OMPD_parallel: @@ -4285,6 +4312,7 @@ llvm_unreachable("Unknown OpenMP directive"); } DSAStack->setContext(CurContext); + addDeclareVariantConstructTrait(DSAStack, DKind); } int Sema::getNumberOfConstructScopes(unsigned Level) const { @@ -4460,6 +4488,7 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, ArrayRef Clauses) { + DSAStack->eraseConstructTrait(); if (DSAStack->getCurrentDirective() == OMPD_atomic || DSAStack->getCurrentDirective() == OMPD_critical || DSAStack->getCurrentDirective() == OMPD_section || @@ -6805,6 +6834,8 @@ }; TargetOMPContext OMPCtx(Context, std::move(DiagUnknownTrait), getCurFunctionDecl()); + for (auto Property : DSAStack->getConstructTraits()) + OMPCtx.addTrait(Property); QualType CalleeFnType = CalleeFnDecl->getType(); diff --git a/clang/test/OpenMP/declare_variant_construct_codegen_1.c b/clang/test/OpenMP/declare_variant_construct_codegen_1.c new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -0,0 +1,87 @@ +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1 + +// RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#define N 100 + +void p_vxv(int *v1, int *v2, int *v3, int n); +void t_vxv(int *v1, int *v2, int *v3, int n); + +#pragma omp declare variant(t_vxv) match(construct={target}) +#pragma omp declare variant(p_vxv) match(construct={parallel}) +void vxv(int *v1, int *v2, int *v3, int n) { + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; +} +// CK1: define dso_local void @vxv + +void p_vxv(int *v1, int *v2, int *v3, int n) { +#pragma omp for + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; +} +// CK1: define dso_local void @p_vxv + +#pragma omp declare target +void t_vxv(int *v1, int *v2, int *v3, int n) { +#pragma distribute simd + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; +} +#pragma omp end declare target +// CK1: define dso_local void @t_vxv + + +// CK1-LABEL: define {{[^@]+}}@main +int main() { + int v1[N], v2[N], v3[N]; + + // init + for (int i = 0; i < N; i++) { + v1[i] = (i + 1); + v2[i] = -(i + 1); + v3[i] = 0; + } + +#pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) + { + vxv(v1, v2, v3, N); + } +// CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}}) + + vxv(v1, v2, v3, N); +// CK1: call void @vxv + +#pragma omp parallel + { + vxv(v1, v2, v3, N); + } +// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void + + return 0; +} + +// CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) +// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void + +// CK1: define internal void [[TARGET_REGION]]( +// CK1: call void @t_vxv + +// CK1: define internal void [[PARALLEL_REGION]]( +// CK1: call void @p_vxv + +#endif // HEADER