Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -8611,9 +8611,17 @@ S.VTableUses.swap(SavedVTableUses); // Restore the set of pending implicit instantiations. - assert(S.PendingInstantiations.empty() && - "PendingInstantiations should be empty before it is discarded."); - S.PendingInstantiations.swap(SavedPendingInstantiations); + if (S.TUKind != TU_Prefix) { + assert(S.PendingInstantiations.empty() && + "PendingInstantiations should be empty before it is discarded."); + S.PendingInstantiations.swap(SavedPendingInstantiations); + } else { + // Template instantiations in the PCH may be delayed until the TU. + S.PendingInstantiations.swap(SavedPendingInstantiations); + S.PendingInstantiations.insert(S.PendingInstantiations.end(), + SavedPendingInstantiations.begin(), + SavedPendingInstantiations.end()); + } } private: Index: clang/lib/Sema/Sema.cpp =================================================================== --- clang/lib/Sema/Sema.cpp +++ clang/lib/Sema/Sema.cpp @@ -1009,6 +1009,11 @@ LateParsedInstantiations.begin(), LateParsedInstantiations.end()); LateParsedInstantiations.clear(); + + { + llvm::TimeTraceScope TimeScope("PerformPendingInstantiations"); + PerformPendingInstantiations(); + } } DiagnoseUnterminatedPragmaPack(); Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp =================================================================== --- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -5912,6 +5912,7 @@ /// Performs template instantiation for all implicit template /// instantiations we have seen until this point. void Sema::PerformPendingInstantiations(bool LocalOnly) { + std::deque delayedPCHInstantiations; while (!PendingLocalImplicitInstantiations.empty() || (!LocalOnly && !PendingInstantiations.empty())) { PendingImplicitInstantiation Inst; @@ -5942,6 +5943,9 @@ if (Function->isDefined()) Function->setInstantiationIsPending(false); } + // Definition of a PCH'd template declaration may be available only in the TU. + if(!LocalOnly && TUKind == TU_Prefix && Function->instantiationIsPending()) + delayedPCHInstantiations.push_back(Inst); continue; } @@ -5987,6 +5991,9 @@ InstantiateVariableDefinition(/*FIXME:*/ Inst.second, Var, true, DefinitionRequired, true); } + + if(!LocalOnly) + PendingInstantiations.swap(delayedPCHInstantiations); } void Sema::PerformDependentDiagnostics(const DeclContext *Pattern, Index: clang/test/CodeGenCXX/vla-lambda-capturing.cpp =================================================================== --- clang/test/CodeGenCXX/vla-lambda-capturing.cpp +++ clang/test/CodeGenCXX/vla-lambda-capturing.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - | FileCheck --check-prefixes=CHECK,NOPCH %s // RUN: %clang_cc1 %s -std=c++11 -emit-pch -o %t -// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -emit-llvm -o - | FileCheck --check-prefixes=CHECK,PCH %s #ifndef HEADER #define HEADER @@ -111,14 +111,14 @@ // CHECK: call void @llvm.stackrestore( // CHECK: ret void -// CHECK: define linkonce_odr{{.*}} void [[F_INT_LAMBDA]]([[CAP_TYPE2]]* -// CHECK: [[THIS:%.+]] = load [[CAP_TYPE2]]*, [[CAP_TYPE2]]** -// CHECK: [[SIZE_REF:%.+]] = getelementptr inbounds [[CAP_TYPE2]], [[CAP_TYPE2]]* [[THIS]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[SIZE:%.+]] = load [[INTPTR_T]], [[INTPTR_T]]* [[SIZE_REF]] -// CHECK: call i{{.+}}* @llvm.stacksave() -// CHECK: alloca [[INTPTR_T]], [[INTPTR_T]] [[SIZE]] -// CHECK: call void @llvm.stackrestore( -// CHECK: ret void +// NOPCH: define linkonce_odr{{.*}} void [[F_INT_LAMBDA]]([[CAP_TYPE2]]* +// NOPCH: [[THIS:%.+]] = load [[CAP_TYPE2]]*, [[CAP_TYPE2]]** +// NOPCH: [[SIZE_REF:%.+]] = getelementptr inbounds [[CAP_TYPE2]], [[CAP_TYPE2]]* [[THIS]], i{{.+}} 0, i{{.+}} 0 +// NOPCH: [[SIZE:%.+]] = load [[INTPTR_T]], [[INTPTR_T]]* [[SIZE_REF]] +// NOPCH: call i{{.+}}* @llvm.stacksave() +// NOPCH: alloca [[INTPTR_T]], [[INTPTR_T]] [[SIZE]] +// NOPCH: call void @llvm.stackrestore( +// NOPCH: ret void // CHECK: define linkonce_odr{{.*}} void [[B_INT_LAMBDA]]([[CAP_TYPE3]]* // CHECK: [[SIZE2_REF:%.+]] = getelementptr inbounds [[CAP_TYPE3]], [[CAP_TYPE3]]* [[THIS:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 @@ -168,4 +168,14 @@ // CHECK: [[MUL:%.+]] = mul {{.*}} i{{[0-9]+}} [[SIZE2]], [[SIZE1]] // CHECK: mul {{.*}} i{{[0-9]+}} {{[0-9]+}}, [[MUL]] // CHECK: ret void + +// PCH: define linkonce_odr{{.*}} void [[F_INT_LAMBDA]]([[CAP_TYPE2]]* +// PCH: [[THIS:%.+]] = load [[CAP_TYPE2]]*, [[CAP_TYPE2]]** +// PCH: [[SIZE_REF:%.+]] = getelementptr inbounds [[CAP_TYPE2]], [[CAP_TYPE2]]* [[THIS]], i{{.+}} 0, i{{.+}} 0 +// PCH: [[SIZE:%.+]] = load [[INTPTR_T]], [[INTPTR_T]]* [[SIZE_REF]] +// PCH: call i{{.+}}* @llvm.stacksave() +// PCH: alloca [[INTPTR_T]], [[INTPTR_T]] [[SIZE]] +// PCH: call void @llvm.stackrestore( +// PCH: ret void + #endif Index: clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp =================================================================== --- clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp +++ clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes CHECK,CHECK-NOPCH // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-PCH // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s @@ -94,6 +94,22 @@ // CHECK: call i{{[0-9]+}} @__tgt_target_teams( // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]]( +// CHECK-PCH: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK-PCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + +// CHECK-PCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK-PCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK-PCH: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK-PCH: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK-PCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK-PCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]( +// CHECK-PCH-DAG: [[CALL_RES:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}}) +// CHECK-PCH-DAG: [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}} +// CHECK-PCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]]) +// CHECK-PCH: call {{.*}}void {{.*}} @__kmpc_fork_call( + // CHECK: define internal void [[T_OFFLOADING_FUN_0]]( // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) @@ -108,19 +124,19 @@ // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23) // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( -// CHECK: define internal void [[T_OFFLOADING_FUN_2]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) +// CHECK-NOPCH: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK-NOPCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// CHECK-NOPCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK-NOPCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK-NOPCH: call {{.*}}void {{.*}} @__kmpc_fork_call( -// CHECK: define internal void [[T_OFFLOADING_FUN_3]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) +// CHECK-NOPCH: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK-NOPCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]( -// CHECK-DAG: [[CALL_RES:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}}) -// CHECK-DAG: [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}} -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]]) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// CHECK-NOPCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]( +// CHECK-NOPCH-DAG: [[CALL_RES:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}}) +// CHECK-NOPCH-DAG: [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}} +// CHECK-NOPCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]]) +// CHECK-NOPCH: call {{.*}}void {{.*}} @__kmpc_fork_call( #endif Index: clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp =================================================================== --- clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp +++ clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes CHECK,CHECK-NOPCH // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-PCH // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s @@ -94,6 +94,22 @@ // CHECK: call i{{[0-9]+}} @__tgt_target_teams( // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]]( +// CHECK-PCH: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK-PCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + +// CHECK-PCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK-PCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK-PCH: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK-PCH: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK-PCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK-PCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]( +// CHECK-PCH-DAG: [[CALL_RES:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}}) +// CHECK-PCH-DAG: [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}} +// CHECK-PCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]]) +// CHECK-PCH: call {{.*}}void {{.*}} @__kmpc_fork_call( + // CHECK: define internal void [[T_OFFLOADING_FUN_0]]( // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) @@ -108,19 +124,19 @@ // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23) // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( -// CHECK: define internal void [[T_OFFLOADING_FUN_2]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) +// CHECK-NOPCH: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK-NOPCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// CHECK-NOPCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK-NOPCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK-NOPCH: call {{.*}}void {{.*}} @__kmpc_fork_call( -// CHECK: define internal void [[T_OFFLOADING_FUN_3]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) +// CHECK-NOPCH: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK-NOPCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]( -// CHECK-DAG: [[CALL_RES:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}}) -// CHECK-DAG: [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}} -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]]) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// CHECK-NOPCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]( +// CHECK-NOPCH-DAG: [[CALL_RES:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP:@.+]]([[S_TY]]* {{.+}}) +// CHECK-NOPCH-DAG: [[CALL_RES_SEXT:%.+]] = sext i8 [[CALL_RES]] to {{.+}} +// CHECK-NOPCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[CALL_RES_SEXT]]) +// CHECK-NOPCH: call {{.*}}void {{.*}} @__kmpc_fork_call( #endif Index: clang/test/OpenMP/single_codegen.cpp =================================================================== --- clang/test/OpenMP/single_codegen.cpp +++ clang/test/OpenMP/single_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefixes=CHECK,NOPCH %s // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,PCH %s // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -std=c++11 -fopenmp -fnoopenmp-use-tls -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s @@ -230,6 +230,58 @@ // ARRAY: store %struct.St* %{{.+}}, %struct.St** %{{.+}}, #endif +// PCH-LABEL: @_ZN3SSTIdEC2Ev +// PCH: getelementptr inbounds [[SST_TY]], [[SST_TY]]* %{{.+}}, i32 0, i32 0 +// PCH-NEXT: store double 0.000000e+00, double* % +// PCH-NEXT: getelementptr inbounds [[SST_TY]], [[SST_TY]]* %{{.+}}, i32 0, i32 0 +// PCH-NEXT: store double* %{{.+}}, double** % +// PCH-NEXT: load double*, double** % +// PCH-NEXT: load double, double* % +// PCH-NEXT: bitcast i64* %{{.+}} to double* +// PCH-NEXT: store double %{{.+}}, double* % +// PCH-NEXT: load i64, i64* % +// PCH-NEXT: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[SST_TY]]*, i64)* [[SST_MICROTASK:@.+]] to void +// PCH-NEXT: ret void + +// PCH: define internal void [[SST_MICROTASK]](i32* {{[^,]+}}, i32* {{[^,]+}}, [[SST_TY]]* {{.+}}, i64 {{.+}}) +// PCH: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// PCH-NEXT: icmp ne i32 [[RES]], 0 +// PCH-NEXT: br i1 + +// PCH: getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 1 +// PCH-NEXT: load double*, double** % +// PCH-NEXT: store double* % +// PCH-LABEL: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv( + +// PCH: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// PCH-NEXT: store i32 1, i32* [[DID_IT]], +// PCH-NEXT: br label + +// PCH: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// PCH-NEXT: br label + +// PCH: getelementptr inbounds [1 x i8*], [1 x i8*]* [[LIST:%.+]], i64 0, i64 0 +// PCH: load double*, double** % +// PCH-NEXT: bitcast double* % +// PCH-NEXT: store i8* % +// PCH-NEXT: bitcast [1 x i8*]* [[LIST]] to i8* +// PCH-NEXT: load i32, i32* [[DID_IT]], +// PCH-NEXT: call void @__kmpc_copyprivate([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}, i64 8, i8* %{{.+}}, void (i8*, i8*)* [[COPY_FUNC:@[^,]+]], i32 %{{.+}}) +// PCH-NEXT: ret void + +// PCH-LABEL: @_ZZN3SSTIdEC1EvENKUlvE_clEv( +// PCH: getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 1 +// PCH-NEXT: getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 1 +// PCH-NEXT: load double*, double** % +// PCH-NEXT: store double* % +// PCH-LABEL: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv( +// PCH-NEXT: ret void + +// PCH: define internal void [[COPY_FUNC]](i8* %0, i8* %1) +// PCH: ret void + +// PCH-LABEL: @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv( + // CHECK-LABEL:@_ZN2SSC2ERi( // CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[SS_TY]]*, i64, i64, i64)* [[SS_MICROTASK:@.+]] to void // CHECK-NEXT: ret void @@ -385,54 +437,54 @@ // CHECK: define internal void [[COPY_FUNC]](i8* %0, i8* %1) // CHECK: ret void -// CHECK-LABEL: @_ZN3SSTIdEC2Ev -// CHECK: getelementptr inbounds [[SST_TY]], [[SST_TY]]* %{{.+}}, i32 0, i32 0 -// CHECK-NEXT: store double 0.000000e+00, double* % -// CHECK-NEXT: getelementptr inbounds [[SST_TY]], [[SST_TY]]* %{{.+}}, i32 0, i32 0 -// CHECK-NEXT: store double* %{{.+}}, double** % -// CHECK-NEXT: load double*, double** % -// CHECK-NEXT: load double, double* % -// CHECK-NEXT: bitcast i64* %{{.+}} to double* -// CHECK-NEXT: store double %{{.+}}, double* % -// CHECK-NEXT: load i64, i64* % -// CHECK-NEXT: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[SST_TY]]*, i64)* [[SST_MICROTASK:@.+]] to void -// CHECK-NEXT: ret void - -// CHECK: define internal void [[SST_MICROTASK]](i32* {{[^,]+}}, i32* {{[^,]+}}, [[SST_TY]]* {{.+}}, i64 {{.+}}) -// CHECK: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) -// CHECK-NEXT: icmp ne i32 [[RES]], 0 -// CHECK-NEXT: br i1 - -// CHECK: getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 1 -// CHECK-NEXT: load double*, double** % -// CHECK-NEXT: store double* % -// CHECK-LABEL: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv( - -// CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) -// CHECK-NEXT: store i32 1, i32* [[DID_IT]], -// CHECK-NEXT: br label - -// CHECK: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) -// CHECK-NEXT: br label - -// CHECK: getelementptr inbounds [1 x i8*], [1 x i8*]* [[LIST:%.+]], i64 0, i64 0 -// CHECK: load double*, double** % -// CHECK-NEXT: bitcast double* % -// CHECK-NEXT: store i8* % -// CHECK-NEXT: bitcast [1 x i8*]* [[LIST]] to i8* -// CHECK-NEXT: load i32, i32* [[DID_IT]], -// CHECK-NEXT: call void @__kmpc_copyprivate([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}, i64 8, i8* %{{.+}}, void (i8*, i8*)* [[COPY_FUNC:@[^,]+]], i32 %{{.+}}) -// CHECK-NEXT: ret void - -// CHECK-LABEL: @_ZZN3SSTIdEC1EvENKUlvE_clEv( -// CHECK: getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 1 -// CHECK-NEXT: getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 1 -// CHECK-NEXT: load double*, double** % -// CHECK-NEXT: store double* % -// CHECK-LABEL: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv( -// CHECK-NEXT: ret void - -// CHECK: define internal void [[COPY_FUNC]](i8* %0, i8* %1) -// CHECK: ret void - -// CHECK-LABEL: @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv( +// NOPCH-LABEL: @_ZN3SSTIdEC2Ev +// NOPCH: getelementptr inbounds [[SST_TY]], [[SST_TY]]* %{{.+}}, i32 0, i32 0 +// NOPCH-NEXT: store double 0.000000e+00, double* % +// NOPCH-NEXT: getelementptr inbounds [[SST_TY]], [[SST_TY]]* %{{.+}}, i32 0, i32 0 +// NOPCH-NEXT: store double* %{{.+}}, double** % +// NOPCH-NEXT: load double*, double** % +// NOPCH-NEXT: load double, double* % +// NOPCH-NEXT: bitcast i64* %{{.+}} to double* +// NOPCH-NEXT: store double %{{.+}}, double* % +// NOPCH-NEXT: load i64, i64* % +// NOPCH-NEXT: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [[SST_TY]]*, i64)* [[SST_MICROTASK:@.+]] to void +// NOPCH-NEXT: ret void + +// NOPCH: define internal void [[SST_MICROTASK]](i32* {{[^,]+}}, i32* {{[^,]+}}, [[SST_TY]]* {{.+}}, i64 {{.+}}) +// NOPCH: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// NOPCH-NEXT: icmp ne i32 [[RES]], 0 +// NOPCH-NEXT: br i1 + +// NOPCH: getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 1 +// NOPCH-NEXT: load double*, double** % +// NOPCH-NEXT: store double* % +// NOPCH-LABEL: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv( + +// NOPCH: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// NOPCH-NEXT: store i32 1, i32* [[DID_IT]], +// NOPCH-NEXT: br label + +// NOPCH: call void @__kmpc_end_single([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}) +// NOPCH-NEXT: br label + +// NOPCH: getelementptr inbounds [1 x i8*], [1 x i8*]* [[LIST:%.+]], i64 0, i64 0 +// NOPCH: load double*, double** % +// NOPCH-NEXT: bitcast double* % +// NOPCH-NEXT: store i8* % +// NOPCH-NEXT: bitcast [1 x i8*]* [[LIST]] to i8* +// NOPCH-NEXT: load i32, i32* [[DID_IT]], +// NOPCH-NEXT: call void @__kmpc_copyprivate([[IDENT_T_TY]]* @{{.+}}, i32 %{{.+}}, i64 8, i8* %{{.+}}, void (i8*, i8*)* [[COPY_FUNC:@[^,]+]], i32 %{{.+}}) +// NOPCH-NEXT: ret void + +// NOPCH-LABEL: @_ZZN3SSTIdEC1EvENKUlvE_clEv( +// NOPCH: getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 1 +// NOPCH-NEXT: getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 1 +// NOPCH-NEXT: load double*, double** % +// NOPCH-NEXT: store double* % +// NOPCH-LABEL: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv( +// NOPCH-NEXT: ret void + +// NOPCH: define internal void [[COPY_FUNC]](i8* %0, i8* %1) +// NOPCH: ret void + +// NOPCH-LABEL: @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv( Index: clang/test/OpenMP/target_codegen.cpp =================================================================== --- clang/test/OpenMP/target_codegen.cpp +++ clang/test/OpenMP/target_codegen.cpp @@ -1,10 +1,10 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-NOPCH,CHECK-NOPCH-64 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-PCH,CHECK-PCH-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-NOPCH,CHECK-NOPCH-32 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-PCH,CHECK-PCH-32 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -744,6 +744,24 @@ // CHECK-DAG: getelementptr inbounds i16, i16* [[REF_C]], i[[SZ]] %{{.+}} +// CHECK-PCH: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK-PCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-PCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-PCH-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-PCH-DAG: [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-PCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], +// Use captures. +// CHECK-PCH-64-DAG: load i32, i32* [[REF_A]] +// CHECK-PCH-32-DAG: load i32, i32* [[LOCAL_A]] +// CHECK-PCH-DAG: load i16, i16* [[REF_AA]] +// CHECK-PCH-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 + // CHECK: define internal void [[HVT6]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] @@ -766,23 +784,23 @@ // CHECK-32-DAG: load i32, i32* [[LOCAL_A]] // CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 -// CHECK: define internal void [[HVT5]] +// CHECK-NOPCH: define internal void [[HVT5]] // Create local storage for each capture. -// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] -// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] -// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// CHECK-NOPCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-NOPCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* -// CHECK-DAG: [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* -// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], +// CHECK-NOPCH-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-NOPCH-DAG: [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-NOPCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], // Use captures. -// CHECK-64-DAG: load i32, i32* [[REF_A]] -// CHECK-32-DAG: load i32, i32* [[LOCAL_A]] -// CHECK-DAG: load i16, i16* [[REF_AA]] -// CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 +// CHECK-NOPCH-64-DAG: load i32, i32* [[REF_A]] +// CHECK-NOPCH-32-DAG: load i32, i32* [[LOCAL_A]] +// CHECK-NOPCH-DAG: load i16, i16* [[REF_AA]] +// CHECK-NOPCH-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 void bar () { #define pragma_target _Pragma("omp target") Index: clang/test/OpenMP/target_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/target_firstprivate_codegen.cpp +++ clang/test/OpenMP/target_firstprivate_codegen.cpp @@ -16,13 +16,13 @@ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-64,TCHECK-NOPCH,TCHECK-NOPCH-64 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-64,TCHECK-PCH,TCHECK-PCH-64 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-32,TCHECK-NOPCH // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-32,TCHECK-PCH // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s @@ -395,6 +395,26 @@ return a; } +// TCHECK-PCH: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK-PCH: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, +// TCHECK-PCH: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, +// TCHECK-PCH-NOT: alloca i{{[0-9]+}}, +// TCHECK-PCH: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], +// TCHECK-PCH: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], +// TCHECK-PCH: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], +// TCHECK-PCH-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* +// TCHECK-PCH: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], + +// firstprivate(a) +// TCHECK-PCH-NOT: store i{{[0-9]+}} % + +// firstprivate(b) +// TCHECK-PCH: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* +// TCHECK-PCH: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* +// TCHECK-PCH: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) + +// TCHECK-PCH: ret void + // TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, @@ -610,24 +630,24 @@ // CHECK: call i32 @__tgt_target(i64 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT6]], i32 0, i32 0)) -// TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) -// TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, -// TCHECK-NOT: alloca i{{[0-9]+}}, -// TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], -// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], -// TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* -// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], +// TCHECK-NOPCH: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK-NOPCH: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, +// TCHECK-NOPCH: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, +// TCHECK-NOPCH-NOT: alloca i{{[0-9]+}}, +// TCHECK-NOPCH: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], +// TCHECK-NOPCH: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], +// TCHECK-NOPCH: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], +// TCHECK-NOPCH-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* +// TCHECK-NOPCH: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], // firstprivate(a) -// TCHECK-NOT: store i{{[0-9]+}} % +// TCHECK-NOPCH-NOT: store i{{[0-9]+}} % // firstprivate(b) -// TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* -// TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) +// TCHECK-NOPCH: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* +// TCHECK-NOPCH: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* +// TCHECK-NOPCH: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) -// TCHECK: ret void +// TCHECK-NOPCH: ret void #endif Index: clang/test/OpenMP/target_parallel_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_codegen.cpp +++ clang/test/OpenMP/target_parallel_codegen.cpp @@ -1,10 +1,10 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-NOPCH,CHECK-NOPCH-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-PCH,CHECK-PCH-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-NOPCH,CHECK-NOPCH-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-PCH,CHECK-PCH-32 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -724,6 +724,39 @@ // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. +// CHECK-PCH: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK-PCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-PCH: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-PCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-PCH-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-PCH-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-PCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-PCH-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-PCH-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-PCH-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-PCH-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-PCH-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-PCH-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-PCH-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-PCH-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-PCH-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-PCH-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK-PCH: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + // CHECK: define internal void [[HVT6]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] @@ -766,37 +799,37 @@ // CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. -// CHECK: define internal void [[HVT5]] +// CHECK-NOPCH: define internal void [[HVT5]] // Create local storage for each capture. -// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] -// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] -// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] -// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// CHECK-NOPCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-NOPCH: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-NOPCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* -// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* -// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], - -// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] -// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* -// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align -// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] -// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align -// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], - -// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] -// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* -// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align -// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], - -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// CHECK-NOPCH-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-NOPCH-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-NOPCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-NOPCH-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-NOPCH-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-NOPCH-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-NOPCH-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-NOPCH-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-NOPCH-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-NOPCH-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-NOPCH-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-NOPCH-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-NOPCH-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// CHECK-NOPCH: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. // CHECK: define internal void @.omp_offloading.requires_reg() Index: clang/test/OpenMP/target_parallel_for_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_for_codegen.cpp +++ clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -1,10 +1,10 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,CHECK-NOPCH,CHECK-NOPCH-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,CHECK-PCH,CHECK-PCH-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,CHECK-NOPCH,CHECK-NOPCH-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,CHECK-PCH,CHECK-PCH-32 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -759,6 +759,39 @@ // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. +// CHECK-PCH: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK-PCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-PCH: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-PCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-PCH-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-PCH-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-PCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-PCH-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-PCH-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-PCH-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-PCH-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-PCH-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-PCH-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-PCH-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-PCH-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-PCH-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-PCH-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK-PCH: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + // CHECK: define internal void [[HVT6]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] @@ -801,37 +834,37 @@ // CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. -// CHECK: define internal void [[HVT5]] +// CHECK-NOPCH: define internal void [[HVT5]] // Create local storage for each capture. -// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] -// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] -// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] -// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// CHECK-NOPCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-NOPCH: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-NOPCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* -// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* -// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], - -// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] -// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* -// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align -// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] -// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align -// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], - -// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] -// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* -// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align -// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], - -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// CHECK-NOPCH-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-NOPCH-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-NOPCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-NOPCH-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-NOPCH-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-NOPCH-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-NOPCH-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-NOPCH-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-NOPCH-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-NOPCH-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-NOPCH-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-NOPCH-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-NOPCH-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// CHECK-NOPCH: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. // CHECK: define internal void @.omp_offloading.requires_reg() Index: clang/test/OpenMP/target_parallel_for_simd_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -1,16 +1,16 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,OMP45,CHECK-A,CHECK-A-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,OMP45,CHECK-B,CHECK-B-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,OMP45,CHECK-A,CHECK-A-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,OMP45,CHECK-B,CHECK-B-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,OMP50,CHECK-A,CHECK-A-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP50 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,OMP50,CHECK-A,CHECK-A-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,OMP50,CHECK-A,CHECK-A-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,OMP50,CHECK-A,CHECK-A-32 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -842,6 +842,39 @@ // OMP50: load double{{.*}}!nontemporal +// CHECK-B: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK-B: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-B: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-B: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-B: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-B: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-B-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-B-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-B-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-B-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-B-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-B-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-B-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-B-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-B-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-B-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-B-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-B-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-B-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-B-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-B-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-B-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-B: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK-B: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + // CHECK: define internal void [[HVT6]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] @@ -884,37 +917,37 @@ // CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. -// CHECK: define internal void [[HVT5]] +// CHECK-A: define internal void [[HVT5]] // Create local storage for each capture. -// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] -// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] -// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] -// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// CHECK-A: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-A: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-A: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-A: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-A: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-A-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-A-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-A-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* -// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* -// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], - -// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] -// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* -// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align -// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] -// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align -// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], - -// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] -// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* -// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align -// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], - -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// CHECK-A-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-A-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-A-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-A-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-A-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-A-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-A-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-A-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-A-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-A-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-A-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-A-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-A-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-A: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// CHECK-A: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. // OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} Index: clang/test/OpenMP/target_parallel_if_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_if_codegen.cpp +++ clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -1,10 +1,10 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-NOPCH,CHECK-NOPCH-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-PCH,CHECK-PCH-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-NOPCH,CHECK-NOPCH-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-PCH,CHECK-PCH-32 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -338,6 +338,24 @@ +// CHECK-PCH: define internal void [[HVT5]]( +// CHECK-PCH-NOT: @__kmpc_fork_call +// CHECK-PCH: call void @__kmpc_serialized_parallel( +// CHECK-PCH: call void [[OMP_OUTLINED5:@.+]](i32* {{%.+}}, i32* {{%.+}}, i[[SZ]] {{.+}}) +// CHECK-PCH: call void @__kmpc_end_serialized_parallel( +// CHECK-PCH: ret +// +// + + +// CHECK-PCH: define internal void [[HVT6]]( +// CHECK-PCH-NOT: call void @__kmpc_serialized_parallel( +// CHECK-PCH-NOT: call void [[OMP_OUTLINED5:@.+]](i32* {{%.+}}, i32* {{%.+}}, i[[SZ]] {{.+}}) +// CHECK-PCH-NOT: call void @__kmpc_end_serialized_parallel( +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), +// CHECK-PCH: ret +// +// @@ -372,22 +390,22 @@ -// CHECK: define internal void [[HVT5]]( -// CHECK-NOT: @__kmpc_fork_call -// CHECK: call void @__kmpc_serialized_parallel( -// CHECK: call void [[OMP_OUTLINED5:@.+]](i32* {{%.+}}, i32* {{%.+}}, i[[SZ]] {{.+}}) -// CHECK: call void @__kmpc_end_serialized_parallel( -// CHECK: ret +// CHECK-NOPCH: define internal void [[HVT5]]( +// CHECK-NOPCH-NOT: @__kmpc_fork_call +// CHECK-NOPCH: call void @__kmpc_serialized_parallel( +// CHECK-NOPCH: call void [[OMP_OUTLINED5:@.+]](i32* {{%.+}}, i32* {{%.+}}, i[[SZ]] {{.+}}) +// CHECK-NOPCH: call void @__kmpc_end_serialized_parallel( +// CHECK-NOPCH: ret // // -// CHECK: define internal void [[HVT6]]( -// CHECK-NOT: call void @__kmpc_serialized_parallel( -// CHECK-NOT: call void [[OMP_OUTLINED5:@.+]](i32* {{%.+}}, i32* {{%.+}}, i[[SZ]] {{.+}}) -// CHECK-NOT: call void @__kmpc_end_serialized_parallel( -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), -// CHECK: ret +// CHECK-NOPCH: define internal void [[HVT6]]( +// CHECK-NOPCH-NOT: call void @__kmpc_serialized_parallel( +// CHECK-NOPCH-NOT: call void [[OMP_OUTLINED5:@.+]](i32* {{%.+}}, i32* {{%.+}}, i[[SZ]] {{.+}}) +// CHECK-NOPCH-NOT: call void @__kmpc_end_serialized_parallel( +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]])* [[OMP_OUTLINED5:@.+]] to void (i32*, i32*, ...)*), +// CHECK-NOPCH: ret // // Index: clang/test/OpenMP/target_parallel_num_threads_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ clang/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -1,10 +1,10 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-NOPCH,CHECK-NOPCH-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-PCH,CHECK-PCH-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-NOPCH,CHECK-NOPCH-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-PCH,CHECK-PCH-32 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -291,6 +291,22 @@ +// CHECK-PCH: define internal void [[HVT5]]( +// CHECK-PCH: call void @__kmpc_push_num_threads(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20) +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 0, +// +// + + +// CHECK-PCH: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) +// CHECK-PCH-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-PCH: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* +// CHECK-PCH: [[T:%.+]] = load i16, i16* [[CONV]], align +// CHECK-PCH: [[NT:%.+]] = sext i16 [[T]] to i32 +// CHECK-PCH: call void @__kmpc_push_num_threads(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 2, +// +// @@ -318,20 +334,20 @@ -// CHECK: define internal void [[HVT5]]( -// CHECK: call void @__kmpc_push_num_threads(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20) -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 0, +// CHECK-NOPCH: define internal void [[HVT5]]( +// CHECK-NOPCH: call void @__kmpc_push_num_threads(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20) +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 0, // // -// CHECK: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) -// CHECK-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align -// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* -// CHECK: [[T:%.+]] = load i16, i16* [[CONV]], align -// CHECK: [[NT:%.+]] = sext i16 [[T]] to i32 -// CHECK: call void @__kmpc_push_num_threads(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 2, +// CHECK-NOPCH: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) +// CHECK-NOPCH-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-NOPCH: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* +// CHECK-NOPCH: [[T:%.+]] = load i16, i16* [[CONV]], align +// CHECK-NOPCH: [[NT:%.+]] = sext i16 [[T]] to i32 +// CHECK-NOPCH: call void @__kmpc_push_num_threads(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 2, // // Index: clang/test/OpenMP/target_private_codegen.cpp =================================================================== --- clang/test/OpenMP/target_private_codegen.cpp +++ clang/test/OpenMP/target_private_codegen.cpp @@ -1,13 +1,13 @@ // Only test codegen on target side, as private clause does not require any action on the host side // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-64,TCHECK-NOPCH // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-64,TCHECK-PCH // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-32,TCHECK-NOPCH // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-32,TCHECK-PCH // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s @@ -179,6 +179,17 @@ return a; } +// template +// TCHECK-PCH: define weak void @__omp_offloading_{{.+}}() +// TCHECK-PCH: [[A:%.+]] = alloca i{{[0-9]+}}, +// TCHECK-PCH: [[A2:%.+]] = alloca i{{[0-9]+}}, +// TCHECK-PCH: [[B:%.+]] = alloca [10 x i{{[0-9]+}}], +// TCHECK-PCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A]], +// TCHECK-PCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A2]], +// TCHECK-PCH: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// TCHECK-PCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]], +// TCHECK-PCH: ret void + // TCHECK: define weak void @__omp_offloading_{{.+}}() // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, @@ -261,14 +272,14 @@ } // template -// TCHECK: define weak void @__omp_offloading_{{.+}}() -// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}], -// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A]], -// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A2]], -// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]], -// TCHECK: ret void +// TCHECK-NOPCH: define weak void @__omp_offloading_{{.+}}() +// TCHECK-NOPCH: [[A:%.+]] = alloca i{{[0-9]+}}, +// TCHECK-NOPCH: [[A2:%.+]] = alloca i{{[0-9]+}}, +// TCHECK-NOPCH: [[B:%.+]] = alloca [10 x i{{[0-9]+}}], +// TCHECK-NOPCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A]], +// TCHECK-NOPCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A2]], +// TCHECK-NOPCH: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// TCHECK-NOPCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]], +// TCHECK-NOPCH: ret void #endif Index: clang/test/OpenMP/target_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/target_reduction_codegen.cpp +++ clang/test/OpenMP/target_reduction_codegen.cpp @@ -1,13 +1,13 @@ // Only test codegen on target side, as private clause does not require any action on the host side // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-64,TCHECK-NOPCH // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-64,TCHECK-PCH // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-32,TCHECK-NOPCH // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s -// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes TCHECK,TCHECK-32,TCHECK-PCH // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY0 %s @@ -118,6 +118,23 @@ return a; } +// template +// TCHECK-PCH: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}}) +// TCHECK-PCH: [[A:%.+]] = alloca i{{[0-9]+}}*, +// TCHECK-PCH: [[A2:%.+]] = alloca i{{[0-9]+}}*, +// TCHECK-PCH: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*, +// TCHECK-PCH: store {{.+}}, {{.+}} [[A]], +// TCHECK-PCH: store {{.+}}, {{.+}} [[A2]], +// TCHECK-PCH: store {{.+}}, {{.+}} [[B]], +// TCHECK-PCH: [[A_REF:%.+]] = load i32*, i32** [[A]], +// TCHECK-PCH: [[AA_REF:%.+]] = load i16*, i16** [[AA]], +// TCHECK-PCH: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]], +// TCHECK-PCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]], +// TCHECK-PCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]], +// TCHECK-PCH: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// TCHECK-PCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]], +// TCHECK-PCH: ret void + // TCHECK: define weak void @__omp_offloading_{{.+}}(i32*{{.+}}, i16*{{.+}}, i8*{{.+}}, [10 x i32]*{{.+}}) // TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, // TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*, @@ -206,20 +223,20 @@ } // template -// TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}}) -// TCHECK: [[A:%.+]] = alloca i{{[0-9]+}}*, -// TCHECK: [[A2:%.+]] = alloca i{{[0-9]+}}*, -// TCHECK: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*, -// TCHECK: store {{.+}}, {{.+}} [[A]], -// TCHECK: store {{.+}}, {{.+}} [[A2]], -// TCHECK: store {{.+}}, {{.+}} [[B]], -// TCHECK: [[A_REF:%.+]] = load i32*, i32** [[A]], -// TCHECK: [[AA_REF:%.+]] = load i16*, i16** [[AA]], -// TCHECK: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]], -// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]], -// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]], -// TCHECK: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// TCHECK: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]], -// TCHECK: ret void +// TCHECK-NOPCH: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}}*{{.+}}, i{{[0-9]+}}*{{.+}}, [10 x i32]*{{.+}}) +// TCHECK-NOPCH: [[A:%.+]] = alloca i{{[0-9]+}}*, +// TCHECK-NOPCH: [[A2:%.+]] = alloca i{{[0-9]+}}*, +// TCHECK-NOPCH: [[B:%.+]] = alloca [10 x i{{[0-9]+}}]*, +// TCHECK-NOPCH: store {{.+}}, {{.+}} [[A]], +// TCHECK-NOPCH: store {{.+}}, {{.+}} [[A2]], +// TCHECK-NOPCH: store {{.+}}, {{.+}} [[B]], +// TCHECK-NOPCH: [[A_REF:%.+]] = load i32*, i32** [[A]], +// TCHECK-NOPCH: [[AA_REF:%.+]] = load i16*, i16** [[AA]], +// TCHECK-NOPCH: [[B_REF:%.+]] = load {{.+}}*, {{.+}}** [[B]], +// TCHECK-NOPCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[A_REF]], +// TCHECK-NOPCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[AA_REF]], +// TCHECK-NOPCH: [[B_GEP:%.+]] = getelementptr inbounds [10 x i{{[0-9]+}}], [10 x i{{[0-9]+}}]* [[B_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// TCHECK-NOPCH: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[B_GEP]], +// TCHECK-NOPCH: ret void #endif Index: clang/test/OpenMP/target_simd_codegen.cpp =================================================================== --- clang/test/OpenMP/target_simd_codegen.cpp +++ clang/test/OpenMP/target_simd_codegen.cpp @@ -1,16 +1,16 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,OMP45,CHECK-A,CHECK-A-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,OMP45,CHECK-B,CHECK-B-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,OMP45,CHECK-A // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,OMP45,CHECK-B +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,OMP50,CHECK-A,CHECK-A-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP50 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,OMP50,CHECK-A,CHECK-A-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,OMP50,CHECK-A // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,OMP50,CHECK-A // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -735,21 +735,21 @@ // OMP50: load double, {{.*}}!nontemporal // OMP50: store double {{.*}}!nontemporal -// CHECK: define internal void [[HVT6]] +// CHECK-A: define internal void [[HVT6]] // Create local storage for each capture. -// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AAA:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] -// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] -// CHECK-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]] -// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// CHECK-A: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-A: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-A: [[LOCAL_AAA:%.+]] = alloca i[[SZ]] +// CHECK-A: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-A-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-A-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-A-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]] +// CHECK-A-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* -// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* -// CHECK-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* -// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], +// CHECK-A-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-A-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-A-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* +// CHECK-A-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], // CHECK: define internal void [[HVT5]] // Create local storage for each capture. @@ -764,6 +764,23 @@ // CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* // CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], +// CHECK-B: define internal void [[HVT6]] +// Create local storage for each capture. +// CHECK-B: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-B: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-B: [[LOCAL_AAA:%.+]] = alloca i[[SZ]] +// CHECK-B: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-B-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-B-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-B-DAG: store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]] +// CHECK-B-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-B-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-B-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-B-DAG: [[CONV_AAAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8* +// CHECK-B-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + + // OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} // TOMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} // OMP50: !{!"llvm.loop.vectorize.enable", i1 false} Index: clang/test/OpenMP/target_teams_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_codegen.cpp +++ clang/test/OpenMP/target_teams_codegen.cpp @@ -1,10 +1,10 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,CHECK-NOPCH,CHECK-NOPCH-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,CHECK-PCH,CHECK-PCH-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,CHECK-NOPCH,CHECK-NOPCH-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,CHECK-PCH,CHECK-PCH-32 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -779,6 +779,39 @@ // CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. +// CHECK-PCH: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK-PCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-PCH: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-PCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-PCH-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-PCH-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-PCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-PCH-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-PCH-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-PCH-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-PCH-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-PCH-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-PCH-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-PCH-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-PCH-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-PCH-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-PCH-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK-PCH: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + // CHECK: define internal void [[HVT6]] // Create local storage for each capture. @@ -822,37 +855,38 @@ // CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. -// CHECK: define internal void [[HVT5]] + +// CHECK-NOPCH: define internal void [[HVT5]] // Create local storage for each capture. -// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] -// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] -// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] -// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// CHECK-NOPCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-NOPCH: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-NOPCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* -// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* -// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], - -// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] -// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* -// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align -// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] -// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align -// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], - -// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] -// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* -// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align -// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], - -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// CHECK-NOPCH-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-NOPCH-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-NOPCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-NOPCH-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-NOPCH-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-NOPCH-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-NOPCH-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-NOPCH-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-NOPCH-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-NOPCH-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-NOPCH-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-NOPCH-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-NOPCH-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// CHECK-NOPCH: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. #endif Index: clang/test/OpenMP/target_teams_distribute_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -1,10 +1,10 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,CHECK-NOPCH,CHECK-NOPCH-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-64,CHECK-PCH,CHECK-PCH-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,CHECK-NOPCH,CHECK-NOPCH-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes CHECK,CHECK-32,CHECK-PCH,CHECK-PCH-32 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -776,6 +776,39 @@ // CHECK: define internal {{.*}}void [[OMP_OUTLINED5]](i32* noalias %.global_tid., i32* noalias %.bound_tid., [[S1]]* %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i16* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. +// CHECK-PCH: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK-PCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-PCH: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-PCH: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-PCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-PCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-PCH-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-PCH-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-PCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-PCH-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-PCH-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-PCH-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-PCH-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-PCH-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-PCH-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-PCH-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-PCH-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-PCH-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-PCH-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK-PCH: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + // CHECK: define internal void [[HVT6]] // Create local storage for each capture. @@ -821,37 +854,37 @@ // CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. -// CHECK: define internal void [[HVT5]] +// CHECK-NOPCH: define internal void [[HVT5]] // Create local storage for each capture. -// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] -// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] -// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] -// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// CHECK-NOPCH: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-NOPCH: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-NOPCH-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-NOPCH-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* -// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* -// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], - -// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] -// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* -// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align -// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] -// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align -// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], - -// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] -// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* -// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align -// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], - -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// CHECK-NOPCH-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-NOPCH-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-NOPCH-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-NOPCH-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-NOPCH-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-NOPCH-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-NOPCH-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-NOPCH-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-NOPCH-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-NOPCH-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-NOPCH-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-NOPCH-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-NOPCH-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// CHECK-NOPCH: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. #endif Index: clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -1,16 +1,16 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP45 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,OMP45,CHECK-A,CHECK-A-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP45 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,OMP45,CHECK-B,CHECK-B-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,OMP45,CHECK-A,CHECK-A-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP45 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,OMP45,CHECK-B,CHECK-B-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,OMP50,CHECK-A,CHECK-A-64 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 --check-prefix OMP50 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,OMP50,CHECK-A,CHECK-A-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,OMP50,CHECK-A,CHECK-A-32 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 --check-prefix OMP50 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -DOMP5 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,OMP50,CHECK-A,CHECK-A-32 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -840,6 +840,39 @@ // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. +// CHECK-B: define internal void [[HVT5]] +// Create local storage for each capture. +// CHECK-B: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-B: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-B: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-B: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-B: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-B-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-B-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-B-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// Store captures in the context. +// CHECK-B-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-B-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-B-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-B-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-B-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-B-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-B-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-B-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-B-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-B-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-B-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-B-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-B-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-B: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// +// +// CHECK-B: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. + // CHECK: define internal void [[HVT6]] // Create local storage for each capture. // CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] @@ -884,37 +917,37 @@ // CHECK: define internal {{.*}}void [[OMP_OUTLINED6]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. -// CHECK: define internal void [[HVT5]] +// CHECK-A: define internal void [[HVT5]] // Create local storage for each capture. -// CHECK: [[LOCAL_A:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_B:%.+]] = alloca [10 x i32]* -// CHECK: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] -// CHECK: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] -// CHECK-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] -// CHECK-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] -// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] +// CHECK-A: [[LOCAL_A:%.+]] = alloca i[[SZ]] +// CHECK-A: [[LOCAL_AA:%.+]] = alloca i[[SZ]] +// CHECK-A: [[LOCAL_B:%.+]] = alloca [10 x i32]* +// CHECK-A: [[LOCAL_A_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-A: [[LOCAL_AA_CASTED:%.+]] = alloca i[[SZ]] +// CHECK-A-DAG: store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]] +// CHECK-A-DAG: store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]] +// CHECK-A-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]] // Store captures in the context. -// CHECK-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* -// CHECK-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* -// CHECK-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], - -// CHECK-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] -// CHECK-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* -// CHECK-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align -// CHECK-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] -// CHECK-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align -// CHECK-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], - -// CHECK-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] -// CHECK-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* -// CHECK-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align -// CHECK-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], - -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) +// CHECK-A-64-DAG:[[CONV_AP:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32* +// CHECK-A-DAG: [[CONV_AAP:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16* +// CHECK-A-DAG: [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]], + +// CHECK-A-64-DAG:[[CONV_A:%.+]] = load i32, i32* [[CONV_AP]] +// CHECK-A-64-DAG:[[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_A_CASTED]] to i32* +// CHECK-A-64-DAG:store i32 [[CONV_A]], i32* [[CONV]], align +// CHECK-A-32-DAG:[[LOCAL_AV:%.+]] = load i32, i32* [[LOCAL_A]] +// CHECK-A-32-DAG:store i32 [[LOCAL_AV]], i32* [[LOCAL_A_CASTED]], align +// CHECK-A-DAG: [[REF_A:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_A_CASTED]], + +// CHECK-A-DAG: [[CONV_AA:%.+]] = load i16, i16* [[CONV_AAP]] +// CHECK-A-DAG: [[CONV:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA_CASTED]] to i16* +// CHECK-A-DAG: store i16 [[CONV_AA]], i16* [[CONV]], align +// CHECK-A-DAG: [[REF_AA:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_AA_CASTED]], + +// CHECK-A: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]], i[[SZ]], [10 x i32]*)* [[OMP_OUTLINED7:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] [[REF_A]], i[[SZ]] [[REF_AA]], [10 x i32]* [[REF_B]]) // // -// CHECK: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) +// CHECK-A: define internal {{.*}}void [[OMP_OUTLINED7]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, [10 x i32]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. // OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} Index: clang/test/OpenMP/target_teams_num_teams_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_num_teams_codegen.cpp +++ clang/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -1,10 +1,10 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-NOPCH // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-PCH +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-NOPCH // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-PCH // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -292,6 +292,22 @@ +// CHECK-PCH: define internal void [[HVT5]]( +// CHECK-PCH: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20, i32 0) +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 0, +// +// + + +// CHECK-PCH: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) +// CHECK-PCH-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-PCH: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* +// CHECK-PCH: [[T:%.+]] = load i16, i16* [[CONV]], align +// CHECK-PCH: [[NT:%.+]] = sext i16 [[T]] to i32 +// CHECK-PCH: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]], i32 0) +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 2, +// +// @@ -318,20 +334,20 @@ -// CHECK: define internal void [[HVT5]]( -// CHECK: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20, i32 0) -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 0, +// CHECK-NOPCH: define internal void [[HVT5]]( +// CHECK-NOPCH: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20, i32 0) +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 0, // // -// CHECK: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) -// CHECK-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align -// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* -// CHECK: [[T:%.+]] = load i16, i16* [[CONV]], align -// CHECK: [[NT:%.+]] = sext i16 [[T]] to i32 -// CHECK: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]], i32 0) -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 2, +// CHECK-NOPCH: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) +// CHECK-NOPCH-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-NOPCH: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* +// CHECK-NOPCH: [[T:%.+]] = load i16, i16* [[CONV]], align +// CHECK-NOPCH: [[NT:%.+]] = sext i16 [[T]] to i32 +// CHECK-NOPCH: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]], i32 0) +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 2, // // Index: clang/test/OpenMP/target_teams_thread_limit_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ clang/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -1,10 +1,10 @@ // Test host codegen. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-NOPCH // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-64,CHECK-PCH +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-NOPCH // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-32,CHECK-PCH // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s @@ -299,6 +299,22 @@ +// CHECK-PCH: define internal void [[HVT5]]( +// CHECK-PCH: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 0, i32 20) +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 0, +// +// + + +// CHECK-PCH: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) +// CHECK-PCH-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-PCH: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* +// CHECK-PCH: [[T:%.+]] = load i16, i16* [[CONV]], align +// CHECK-PCH: [[NT:%.+]] = sext i16 [[T]] to i32 +// CHECK-PCH: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]], i32 1024) +// CHECK-PCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 2, +// +// @@ -331,20 +347,20 @@ -// CHECK: define internal void [[HVT5]]( -// CHECK: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 0, i32 20) -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 0, +// CHECK-NOPCH: define internal void [[HVT5]]( +// CHECK-NOPCH: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 0, i32 20) +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 0, // // -// CHECK: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) -// CHECK-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align -// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* -// CHECK: [[T:%.+]] = load i16, i16* [[CONV]], align -// CHECK: [[NT:%.+]] = sext i16 [[T]] to i32 -// CHECK: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]], i32 1024) -// CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 2, +// CHECK-NOPCH: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) +// CHECK-NOPCH-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-NOPCH: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* +// CHECK-NOPCH: [[T:%.+]] = load i16, i16* [[CONV]], align +// CHECK-NOPCH: [[NT:%.+]] = sext i16 [[T]] to i32 +// CHECK-NOPCH: call i32 @__kmpc_push_num_teams(%struct.ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]], i32 1024) +// CHECK-NOPCH: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 2, // // Index: clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp =================================================================== --- clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes CHECK,CHECK-NOPCH // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-PCH // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s @@ -90,6 +90,25 @@ // CHECK: call i{{[0-9]+}} @__tgt_target_teams( // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]]( +// CHECK-PCH: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK-PCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + +// CHECK-PCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK-PCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK-PCH: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK-PCH: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK-PCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK-PCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) +// CHECK-PCH: [[NUM_TH_CPT:%.+]] = alloca i64, +// CHECK-PCH: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], +// CHECK-PCH: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8* +// CHECK-PCH-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], +// CHECK-PCH-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} +// CHECK-PCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) +// CHECK-PCH: call {{.*}}void {{.*}} @__kmpc_fork_call( + // CHECK: define internal void [[T_OFFLOADING_FUN_0]]( // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) @@ -104,22 +123,22 @@ // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23) // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( -// CHECK: define internal void [[T_OFFLOADING_FUN_2]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) +// CHECK-NOPCH: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK-NOPCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// CHECK-NOPCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK-NOPCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK-NOPCH: call {{.*}}void {{.*}} @__kmpc_fork_call( -// CHECK: define internal void [[T_OFFLOADING_FUN_3]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) +// CHECK-NOPCH: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK-NOPCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) -// CHECK: [[NUM_TH_CPT:%.+]] = alloca i64, -// CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], -// CHECK: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8* -// CHECK-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], -// CHECK-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// CHECK-NOPCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) +// CHECK-NOPCH: [[NUM_TH_CPT:%.+]] = alloca i64, +// CHECK-NOPCH: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], +// CHECK-NOPCH: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8* +// CHECK-NOPCH-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], +// CHECK-NOPCH-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} +// CHECK-NOPCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) +// CHECK-NOPCH: call {{.*}}void {{.*}} @__kmpc_fork_call( #endif Index: clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp =================================================================== --- clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes CHECK,CHECK-NOPCH // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes CHECK,CHECK-PCH // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-ibm-linux-gnu -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-ibm-linux-gnu -fexceptions -fcxx-exceptions -emit-pch -o %t %s @@ -90,6 +90,25 @@ // CHECK: call i{{[0-9]+}} @__tgt_target_teams( // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]]( +// CHECK-PCH: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK-PCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) + +// CHECK-PCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK-PCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK-PCH: call {{.*}}void {{.*}} @__kmpc_fork_call( + +// CHECK-PCH: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK-PCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) + +// CHECK-PCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) +// CHECK-PCH: [[NUM_TH_CPT:%.+]] = alloca i64, +// CHECK-PCH: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], +// CHECK-PCH: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8* +// CHECK-PCH-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], +// CHECK-PCH-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} +// CHECK-PCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) +// CHECK-PCH: call {{.*}}void {{.*}} @__kmpc_fork_call( + // CHECK: define internal void [[T_OFFLOADING_FUN_0]]( // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_0:@.+]] to {{.+}}) @@ -104,24 +123,24 @@ // CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 23) // CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( -// CHECK: define internal void [[T_OFFLOADING_FUN_2]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) +// CHECK-NOPCH: define internal void [[T_OFFLOADING_FUN_2]]( +// CHECK-NOPCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[T_OMP_TEAMS_OUTLINED_2:@.+]] to {{.+}}) -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// CHECK-NOPCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_2]]( +// CHECK-NOPCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 1) +// CHECK-NOPCH: call {{.*}}void {{.*}} @__kmpc_fork_call( -// CHECK: define internal void [[T_OFFLOADING_FUN_3]]( -// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) +// CHECK-NOPCH: define internal void [[T_OFFLOADING_FUN_3]]( +// CHECK-NOPCH: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 {{.+}}, {{.+}}* [[T_OMP_TEAMS_OUTLINED_3:@.+]] to {{.+}}) -// CHECK: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) -// CHECK: [[NUM_TH_CPT:%.+]] = alloca i64, -// CHECK: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], -// CHECK: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8* -// CHECK-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], -// CHECK-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} -// CHECK: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) -// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call( +// CHECK-NOPCH: define{{.+}} void [[T_OMP_TEAMS_OUTLINED_3]]({{.+}}, {{.+}}, {{.+}} [[NUM_TH_CPT_IN:%.+]]) +// CHECK-NOPCH: [[NUM_TH_CPT:%.+]] = alloca i64, +// CHECK-NOPCH: store {{.+}} [[NUM_TH_CPT_IN]], {{.+}} [[NUM_TH_CPT]], +// CHECK-NOPCH: [[NUM_TH_REF:%.+]] = bitcast i64* [[NUM_TH_CPT]] to i8* +// CHECK-NOPCH-DAG: [[NUM_TH_VAL:%.+]] = load {{.+}}, {{.+}} [[NUM_TH_REF]], +// CHECK-NOPCH-DAG: [[NUM_TH_SEXT:%.+]] = sext i8 [[NUM_TH_VAL]] to {{.+}} +// CHECK-NOPCH: call {{.*}}void @__kmpc_push_num_threads([[IDENT_T_TY]]* [[DEF_LOC_2]], i32 {{.+}}, i32 [[NUM_TH_SEXT]]) +// CHECK-NOPCH: call {{.*}}void {{.*}} @__kmpc_fork_call( // CHECK: !{!"llvm.loop.vectorize.enable", i1 true} Index: clang/test/OpenMP/threadprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/threadprivate_codegen.cpp +++ clang/test/OpenMP/threadprivate_codegen.cpp @@ -166,14 +166,14 @@ // CHECK-DEBUG-DAG: [[LOC10:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;527;10;;\00" // CHECK-DEBUG-DAG: [[LOC11:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;550;27;;\00" // CHECK-DEBUG-DAG: [[LOC12:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;573;10;;\00" -// CHECK-DEBUG-DAG: [[LOC13:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;693;9;;\00" -// CHECK-DEBUG-DAG: [[LOC14:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;716;10;;\00" -// CHECK-DEBUG-DAG: [[LOC15:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;752;10;;\00" -// CHECK-DEBUG-DAG: [[LOC16:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;781;10;;\00" -// CHECK-DEBUG-DAG: [[LOC17:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;801;10;;\00" -// CHECK-DEBUG-DAG: [[LOC18:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;824;27;;\00" -// CHECK-DEBUG-DAG: [[LOC19:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;847;10;;\00" // CHECK-DEBUG-DAG: [[LOC20:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;315;1;;\00" +// CHECK-DEBUG-DAG: [[LOC13:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;705;9;;\00" +// CHECK-DEBUG-DAG: [[LOC14:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;728;10;;\00" +// CHECK-DEBUG-DAG: [[LOC15:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;764;10;;\00" +// CHECK-DEBUG-DAG: [[LOC16:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;793;10;;\00" +// CHECK-DEBUG-DAG: [[LOC17:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;813;10;;\00" +// CHECK-DEBUG-DAG: [[LOC18:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;836;27;;\00" +// CHECK-DEBUG-DAG: [[LOC19:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;859;10;;\00" // CHECK-TLS-DAG: [[GS1:@.+]] = internal thread_local global [[S1]] zeroinitializer // CHECK-TLS-DAG: [[GS2:@.+]] = internal global [[S2]] zeroinitializer // CHECK-TLS-DAG: [[ARR_X:@.+]] = thread_local global [2 x [3 x [[S1]]]] zeroinitializer @@ -664,6 +664,18 @@ #endif #ifdef BODY +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC20]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: @__kmpc_global_thread_num +// CHECK-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]]) +// CHECK-DEBUG: define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8* %0) +// CHECK-DEBUG: } +// CHECK-DEBUG: define {{.*}} [[S4_CTOR:@.*]]([[S4]]* {{.*}}, +// CHECK-DEBUG: define internal {{.*}}void [[ST_S4_ST_DTOR]](i8* %0) +// CHECK-DEBUG: } +// CHECK-DEBUG: define {{.*}} [[S4_DTOR:@.*]]([[S4]]* {{.*}}) + // CHECK-LABEL: @{{.*}}foobar{{.*}}() // CHECK-DEBUG-LABEL: @{{.*}}foobar{{.*}}() // CHECK-TLS: @{{.*}}foobar{{.*}}() @@ -873,18 +885,6 @@ // CHECK-NEXT: ret void // CHECK-NEXT: } // CHECK: define {{.*}} [[S4_DTOR]]([[S4]]* {{.*}}) -// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] -// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 -// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC20]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] -// CHECK-DEBUG: @__kmpc_global_thread_num -// CHECK-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]]) -// CHECK-DEBUG: define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8* %0) -// CHECK-DEBUG: } -// CHECK-DEBUG: define {{.*}} [[S4_CTOR:@.*]]([[S4]]* {{.*}}, -// CHECK-DEBUG: define internal {{.*}}void [[ST_S4_ST_DTOR]](i8* %0) -// CHECK-DEBUG: } -// CHECK-DEBUG: define {{.*}} [[S4_DTOR:@.*]]([[S4]]* {{.*}}) - // CHECK: define internal {{.*}}void {{@.*}}() // CHECK-DAG: call {{.*}}void [[GS1_INIT]]() // CHECK-DAG: call {{.*}}void [[ARR_X_INIT]]() Index: clang/test/PCH/delayed-pch-instantiate.cpp =================================================================== --- /dev/null +++ clang/test/PCH/delayed-pch-instantiate.cpp @@ -0,0 +1,25 @@ +// Test this without pch. +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -DBODY %s -o - | FileCheck %s + +// Test with pch. +// RUN: %clang_cc1 -emit-pch -o %t %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -include-pch %t -DBODY %s -o - | FileCheck %s + +// expected-no-diagnostics + +#ifndef HEADER_H +#define HEADER_H +struct A { + void foo() { bar<0>(); } // This will trigger implicit instantiation of bar<0>() in the PCH. + template void bar(); +}; +#endif + +#ifdef BODY +// But the definition is only in the source, so the instantiation must be delayed until the TU. +template void A::bar() {} + +void test(A *a) { a->foo(); } +#endif + +// CHECK: define linkonce_odr void @_ZN1A3barILi0EEEvv Index: clang/test/PCH/specialization-after-instantiation.cpp =================================================================== --- /dev/null +++ clang/test/PCH/specialization-after-instantiation.cpp @@ -0,0 +1,29 @@ +// Test this without pch. +// RUN: %clang_cc1 -fsyntax-only -verify -DBODY %s + +// Test with pch. +// RUN: %clang_cc1 -emit-pch -o %t %s +// RUN: %clang_cc1 -include-pch %t -fsyntax-only -verify -DBODY %s + +#ifndef HEADER_H +#define HEADER_H + +template +struct A { + int foo() const; +}; + +int bar(A *a) { + return a->foo(); +} + +#endif // HEADER_H + +#ifdef BODY + +template <> +int A::foo() const { // expected-error {{explicit specialization of 'foo' after instantiation}} // expected-note@17 {{implicit instantiation first required here}} + return 10; +} + +#endif // BODY