diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -8547,10 +8547,11 @@ /// Clang specific specialization of the OMPContext to lookup target features. struct TargetOMPContext final : public llvm::omp::OMPContext { - TargetOMPContext(ASTContext &ASTCtx, std::function &&DiagUnknownTrait, - const FunctionDecl *CurrentFunctionDecl); + const FunctionDecl *CurrentFunctionDecl, + ArrayRef ConstructTraits); + virtual ~TargetOMPContext() = default; /// See llvm::omp::OMPContext::matchesISATrait diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -2342,8 +2342,6 @@ getOpenMPContextTraitPropertyForSelector( Selector.Kind) && "Ill-formed construct selector!"); - - VMI.ConstructTraits.push_back(Selector.Properties.front().Kind); } } } @@ -2474,7 +2472,8 @@ TargetOMPContext::TargetOMPContext( ASTContext &ASTCtx, std::function &&DiagUnknownTrait, - const FunctionDecl *CurrentFunctionDecl) + const FunctionDecl *CurrentFunctionDecl, + ArrayRef ConstructTraits) : OMPContext(ASTCtx.getLangOpts().OpenMPIsDevice, ASTCtx.getTargetInfo().getTriple()), FeatureValidityCheck([&](StringRef FeatureName) { @@ -2482,6 +2481,9 @@ }), DiagUnknownTrait(std::move(DiagUnknownTrait)) { ASTCtx.getFunctionFeatureMap(FeatureMap, CurrentFunctionDecl); + + for (llvm::omp::TraitProperty Property : ConstructTraits) + addTrait(Property); } bool TargetOMPContext::matchesISATrait(StringRef RawString) const { diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -828,7 +828,7 @@ static StringRef getNameFromIdOrString(Parser &P, Token &Tok, OMPContextLvl Lvl) { - if (Tok.is(tok::identifier)) { + if (Tok.is(tok::identifier) || Tok.is(tok::kw_for)) { llvm::SmallString<16> Buffer; StringRef Name = P.getPreprocessor().getSpelling(Tok, Buffer); (void)P.ConsumeToken(); @@ -2046,8 +2046,10 @@ // improve the diagnostic location. Diag(Loc, diag::warn_unknown_begin_declare_variant_isa_trait) << ISATrait; }; - TargetOMPContext OMPCtx(ASTCtx, std::move(DiagUnknownTrait), - /* CurrentFunctionDecl */ nullptr); + TargetOMPContext OMPCtx( + ASTCtx, std::move(DiagUnknownTrait), + /* CurrentFunctionDecl */ nullptr, + /* ConstructTraits */ ArrayRef()); if (isVariantApplicableInContext(VMI, OMPCtx, /* DeviceSetOnly */ true)) { Actions.ActOnOpenMPBeginDeclareVariant(Loc, TI); diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -310,6 +310,8 @@ /// Vector of previously encountered target directives SmallVector TargetLocations; SourceLocation AtomicLocation; + /// Vector of declare variant construct traits. + SmallVector ConstructTraits; public: explicit DSAStackTy(Sema &S) : SemaRef(S) {} @@ -726,6 +728,20 @@ OMPC_DEFAULTMAP_MODIFIER_unknown; } + ArrayRef getConstructTraits() { + return ConstructTraits; + } + void handleConstructTrait(ArrayRef Traits, + bool ScopeEntry) { + if (ScopeEntry) + ConstructTraits.append(Traits.begin(), Traits.end()); + else + for (llvm::omp::TraitProperty Trait : llvm::reverse(Traits)) { + llvm::omp::TraitProperty Top = ConstructTraits.pop_back_val(); + assert(Top == Trait && "Something left a trait on the stack!"); + } + } + DefaultDataSharingAttributes getDefaultDSA(unsigned Level) const { return getStackSize() <= Level ? DSA_unspecified : getStackElemAtLevel(Level).DefaultAttr; @@ -3871,6 +3887,23 @@ }; } // namespace +static void handleDeclareVariantConstructTrait(DSAStackTy *Stack, + OpenMPDirectiveKind DKind, + bool ScopeEntry) { + SmallVector Traits; + if (isOpenMPTargetExecutionDirective(DKind)) + Traits.emplace_back(llvm::omp::TraitProperty::construct_target_target); + if (isOpenMPTeamsDirective(DKind)) + Traits.emplace_back(llvm::omp::TraitProperty::construct_teams_teams); + if (isOpenMPParallelDirective(DKind)) + Traits.emplace_back(llvm::omp::TraitProperty::construct_parallel_parallel); + if (isOpenMPWorksharingDirective(DKind)) + Traits.emplace_back(llvm::omp::TraitProperty::construct_for_for); + if (isOpenMPSimdDirective(DKind)) + Traits.emplace_back(llvm::omp::TraitProperty::construct_simd_simd); + Stack->handleConstructTrait(Traits, ScopeEntry); +} + void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { switch (DKind) { case OMPD_parallel: @@ -4285,6 +4318,7 @@ llvm_unreachable("Unknown OpenMP directive"); } DSAStack->setContext(CurContext); + handleDeclareVariantConstructTrait(DSAStack, DKind, /* ScopeEntry */ true); } int Sema::getNumberOfConstructScopes(unsigned Level) const { @@ -4460,6 +4494,8 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S, ArrayRef Clauses) { + handleDeclareVariantConstructTrait(DSAStack, DSAStack->getCurrentDirective(), + /* ScopeEntry */ false); if (DSAStack->getCurrentDirective() == OMPD_atomic || DSAStack->getCurrentDirective() == OMPD_critical || DSAStack->getCurrentDirective() == OMPD_section || @@ -6804,7 +6840,7 @@ << ISATrait; }; TargetOMPContext OMPCtx(Context, std::move(DiagUnknownTrait), - getCurFunctionDecl()); + getCurFunctionDecl(), DSAStack->getConstructTraits()); QualType CalleeFnType = CalleeFnDecl->getType(); diff --git a/clang/test/OpenMP/declare_variant_ast_print.c b/clang/test/OpenMP/declare_variant_ast_print.c --- a/clang/test/OpenMP/declare_variant_ast_print.c +++ b/clang/test/OpenMP/declare_variant_ast_print.c @@ -6,6 +6,12 @@ int foo(void); +#pragma omp declare variant(foo) match(construct={target}) +#pragma omp declare variant(foo) match(construct={teams}) +#pragma omp declare variant(foo) match(construct={parallel}) +#pragma omp declare variant(foo) match(construct={for}) +#pragma omp declare variant(foo) match(construct={simd}) +#pragma omp declare variant(foo) match(construct={target,teams,parallel,for,simd}) #pragma omp declare variant(foo) match(xxx={}, yyy={ccc}) #pragma omp declare variant(foo) match(xxx={vvv}) #pragma omp declare variant(foo) match(implementation={vendor(score(0):llvm)}, device={kind(fpga)}) @@ -29,4 +35,10 @@ // CHECK-NEXT: #pragma omp declare variant(foo) match(implementation={vendor(unknown)}, device={kind(gpu)}) // CHECK-NEXT: #pragma omp declare variant(foo) match(implementation={vendor(llvm)}) // CHECK-NEXT: #pragma omp declare variant(foo) match(implementation={vendor(score(0): llvm)}, device={kind(fpga)}) +// CHECK-NEXT: #pragma omp declare variant(foo) match(construct={target, teams, parallel, for, simd}) +// CHECK-NEXT: #pragma omp declare variant(foo) match(construct={simd}) +// CHECK-NEXT: #pragma omp declare variant(foo) match(construct={for}) +// CHECK-NEXT: #pragma omp declare variant(foo) match(construct={parallel}) +// CHECK-NEXT: #pragma omp declare variant(foo) match(construct={teams}) +// CHECK-NEXT: #pragma omp declare variant(foo) match(construct={target}) // CHECK-NEXT: int bar(); diff --git a/clang/test/OpenMP/declare_variant_ast_print.cpp b/clang/test/OpenMP/declare_variant_ast_print.cpp --- a/clang/test/OpenMP/declare_variant_ast_print.cpp +++ b/clang/test/OpenMP/declare_variant_ast_print.cpp @@ -17,7 +17,9 @@ // CHECK-NEXT: return int(); // CHECK-NEXT: } -// CHECK: #pragma omp declare variant(foofoo) match(implementation={vendor(score(5): ibm)}, device={kind(fpga)}) +// CHECK: #pragma omp declare variant(foofoo) match(construct={target}) +// CHECK-NEXT: #pragma omp declare variant(foofoo) match(construct={simd}) +// CHECK-NEXT: #pragma omp declare variant(foofoo) match(implementation={vendor(score(5): ibm)}, device={kind(fpga)}) // CHECK-NEXT: #pragma omp declare variant(foofoo) match(implementation={vendor(unknown)}) // CHECK-NEXT: #pragma omp declare variant(foofoo) match(implementation={vendor(score(0): llvm)}, device={kind(cpu)}) // CHECK-NEXT: int bar(); @@ -26,6 +28,8 @@ #pragma omp declare variant(foofoo ) match(implementation = {vendor(score(0): "llvm"), xxx}, device = {kind(cpu)}) #pragma omp declare variant(foofoo ) match(implementation = {vendor("unknown")}) #pragma omp declare variant(foofoo ) match(implementation = {vendor(score(5): ibm)}, device = {kind(fpga)}) +#pragma omp declare variant(foofoo ) match(construct = {simd}) +#pragma omp declare variant(foofoo ) match(construct = {target}) int bar(); // CHECK: #pragma omp declare variant(foofoo) match(implementation={vendor(score(C + 5): ibm)}, device={kind(cpu, host)}) diff --git a/clang/test/OpenMP/declare_variant_construct_codegen_1.c b/clang/test/OpenMP/declare_variant_construct_codegen_1.c new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -0,0 +1,334 @@ +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1 +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1 + +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +#ifdef CK1 + +#define N 100 + +void p_vxv(int *v1, int *v2, int *v3, int n); +void t_vxv(int *v1, int *v2, int *v3, int n); + +#pragma omp declare variant(t_vxv) match(construct={target}) +#pragma omp declare variant(p_vxv) match(construct={parallel}) +void vxv(int *v1, int *v2, int *v3, int n) { + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; +} +// CK1: define dso_local void @vxv + +void p_vxv(int *v1, int *v2, int *v3, int n) { +#pragma omp for + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; +} +// CK1: define dso_local void @p_vxv + +#pragma omp declare target +void t_vxv(int *v1, int *v2, int *v3, int n) { +#pragma distribute simd + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; +} +#pragma omp end declare target +// CK1: define dso_local void @t_vxv + + +// CK1-LABEL: define {{[^@]+}}@test +int test() { + int v1[N], v2[N], v3[N]; + + // init + for (int i = 0; i < N; i++) { + v1[i] = (i + 1); + v2[i] = -(i + 1); + v3[i] = 0; + } + +#pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) + { + vxv(v1, v2, v3, N); + } +// CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}}) + + vxv(v1, v2, v3, N); +// CK1: call void @vxv + +#pragma omp parallel + { + vxv(v1, v2, v3, N); + } +// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void + + return 0; +} + +// CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) +// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void +// CK1: define internal void [[TARGET_REGION]]( +// CK1: call void @t_vxv + +// CK1: define internal void [[PARALLEL_REGION]]( +// CK1: call void @p_vxv +#endif // CK1 + +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK2 +// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2 +// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2 +// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK2 + +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +#ifdef CK2 + +void test_teams(int ***v1, int ***v2, int ***v3, int n); +void test_target(int ***v1, int ***v2, int ***v3, int n); +void test_parallel(int ***v1, int ***v2, int ***v3, int n); + +#pragma omp declare variant(test_teams) match(construct = {teams}) +#pragma omp declare variant(test_target) match(construct = {target}) +#pragma omp declare variant(test_parallel) match(construct = {parallel}) +void test_base(int ***v1, int ***v2, int ***v3, int n) { + for (int i = 0; i < n; i++) + for (int j = 0; j < n; ++j) + for (int k = 0; k < n; ++k) + v3[i][j][k] = v1[i][j][k] * v2[i][j][k]; +} + +#pragma omp declare target +void test_teams(int ***v1, int ***v2, int ***v3, int n) { +#pragma omp distribute parallel for simd collapse(2) + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + for (int k = 0; k < n; ++k) + v3[i][j][k] = v1[i][j][k] * v2[i][j][k]; +} +#pragma omp end declare target + +#pragma omp declare target +void test_target(int ***v1, int ***v2, int ***v3, int n) { +#pragma omp parallel for simd collapse(3) + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + for (int k = 0; k < n; ++k) + v3[i][j][k] = v1[i][j][k] * v2[i][j][k]; +} +#pragma omp end declare target + +void test_parallel(int ***v1, int ***v2, int ***v3, int n) { +#pragma omp for collapse(3) + for (int i = 0; i < n; ++i) + for (int j = 0; j < n; ++j) + for (int k = 0; k < n; ++k) + v3[i][j][k] = v1[i][j][k] * v2[i][j][k]; +} + +// CK2-LABEL: define {{[^@]+}}@test +void test(int ***v1, int ***v2, int ***v3, int n) { + int i; + +#pragma omp target +#pragma omp teams + { + test_base(v1, v2, v3, 0); + } +// CK2: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}}) + +#pragma omp target + { + test_base(v1, v2, v3, 0); + } +// CK2: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}}) + +#pragma omp parallel + { + test_base(v1, v2, v3, 0); + } +// CK2: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[PARALLEL_REGION:@.+]] to void +} + +// CK2: define internal void @__omp_offloading_[[OFFLOAD_1]]({{.+}}) +// CK2: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[TARGET_REGION_1:@.+]] to void +// CK2: define internal void [[TARGET_REGION_1]]( +// CK2: call void @test_teams + +// CK2: define internal void @__omp_offloading_[[OFFLOAD_2]]({{.+}}) +// CK2: call void @test_target + +// CK2: define internal void [[PARALLEL_REGION]]( +// CK2: call void @test_parallel + +#endif // CK2 + +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK3 +// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3 +// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3 +// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK3 + +// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +#ifdef CK3 + +#define N 100 + +int t_for(int *v1, int *v2, int *v3, int n); +int t_simd(int *v1, int *v2, int *v3, int n); + +#pragma omp declare variant(t_simd) match(construct = {simd}) +#pragma omp declare variant(t_for) match(construct = {for}) +int t(int *v1, int *v2, int *v3, int idx) { + return v1[idx] * v2[idx]; +} + +int t_for(int *v1, int *v2, int *v3, int idx) { + return v1[idx] * v2[idx]; +} + +#pragma omp declare simd +int t_simd(int *v1, int *v2, int *v3, int idx) { + return v1[idx] * v2[idx]; +} + +// CK3-LABEL: define {{[^@]+}}@test +void test() { + int v1[N], v2[N], v3[N]; + + // init + for (int i = 0; i < N; i++) { + v1[i] = (i + 1); + v2[i] = -(i + 1); + v3[i] = 0; + } + +#pragma omp simd + for (int i = 0; i < N; i++) { + v3[i] = t(v1, v2, v3, i); + } +// CK3: call = call i32 @t_simd + + +#pragma omp for + for (int i = 0; i < N; i++) { + v3[i] = t(v1, v2, v3, i); + } +// CK3: call{{.+}} = call i32 @t_for +} + +#endif // CK3 + +// RUN: %clang_cc1 -DCK4 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4 +// RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK4 +// RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4 +// RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4 +// RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK4 + +// RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK4 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}" + +#ifdef CK4 + +#define N 100 + +void not_selected_vxv(int *v1, int *v2, int *v3, int n); +void combined_vxv(int *v1, int *v2, int *v3, int n); +void all_vxv(int *v1, int *v2, int *v3, int n); + +#pragma omp declare variant(all_vxv) match(construct={target,teams,parallel,for,simd}) +#pragma omp declare variant(combined_vxv) match(construct={target,teams,parallel,for}) +#pragma omp declare variant(not_selected_vxv) match(construct={parallel,for}) +void vxv(int *v1, int *v2, int *v3, int n) { + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i]; +} + +void not_selected_vxv(int *v1, int *v2, int *v3, int n) { + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3; +} + +#pragma omp declare target +void combined_vxv(int *v1, int *v2, int *v3, int n) { + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2; +} +#pragma omp end declare target + +#pragma omp declare target +void all_vxv(int *v1, int *v2, int *v3, int n) { + for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 4; +} +#pragma omp end declare target + +// CK4-LABEL: define {{[^@]+}}@test +void test() { + int v1[N], v2[N], v3[N]; + + //init + for (int i = 0; i < N; i++) { + v1[i] = (i + 1); + v2[i] = -(i + 1); + v3[i] = 0; + } + +#pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N]) + { +#pragma omp parallel for + for (int i = 0; i < N; i++) + vxv(v1, v2, v3, N); + } +// CK4: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}}) + +#pragma omp simd + for (int i = 0; i < N; i++) + vxv(v1, v2, v3, N); +// CK4: call void @vxv + +#pragma omp target teams distribute parallel for simd map(from: v3[:N]) + for (int i = 0; i < N; i++) + for (int i = 0; i < N; i++) + for (int i = 0; i < N; i++) + vxv(v1, v2, v3, N); +// CK4: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}}) +} +// CK4-DAG: call void @all_vxv +// CK4-DAG: call void @combined_vxv + +#endif // CK4 + +#endif // HEADER diff --git a/clang/test/OpenMP/declare_variant_messages.c b/clang/test/OpenMP/declare_variant_messages.c --- a/clang/test/OpenMP/declare_variant_messages.c +++ b/clang/test/OpenMP/declare_variant_messages.c @@ -57,6 +57,12 @@ #pragma omp declare variant(foo) match(user = {condition()}) // expected-error {{expected expression}} expected-error {{use of undeclared identifier 'expr'}} expected-error {{expected expression}} expected-note {{the ignored selector spans until here}} int score_and_cond_non_const(); +#pragma omp declare variant(foo) match(construct={teams,parallel,for,simd}) +#pragma omp declare variant(foo) match(construct={target teams}) // expected-error {{expected ')'}} expected-warning {{expected '}' after the context selectors for the context set "construct"; '}' assumed}} expected-note {{to match this '('}} +#pragma omp declare variant(foo) match(construct={parallel for}) // expected-error {{expected ')'}} expected-warning {{expected '}' after the context selectors for the context set "construct"; '}' assumed}} expected-note {{to match this '('}} +#pragma omp declare variant(foo) match(construct={for simd}) // expected-error {{expected ')'}} expected-warning {{expected '}' after the context selectors for the context set "construct"; '}' assumed}} expected-note {{to match this '('}} +int construct(void); + #pragma omp declare variant(foo) match(xxx={}) // expected-warning {{'xxx' is not a valid context set in a `declare variant`; set ignored}} expected-note {{context set options are: 'construct' 'device' 'implementation' 'user'}} expected-note {{the ignored set spans until here}} int a; // expected-error {{'#pragma omp declare variant' can only be applied to functions}}