diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9426,7 +9426,8 @@ assert(OutlinedFn && "Invalid outlined function!"); - const bool RequiresOuterTask = D.hasClausesOfKind(); + const bool RequiresOuterTask = D.hasClausesOfKind() || + D.hasClausesOfKind(); llvm::SmallVector CapturedVars; const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp --- a/clang/test/OpenMP/declare_mapper_codegen.cpp +++ b/clang/test/OpenMP/declare_mapper_codegen.cpp @@ -22,6 +22,18 @@ #ifdef CK0 // Mapper function code generation and runtime interface. +// CK0: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CK0: [[ENTRY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CK0: [[ANON_T:%.+]] = type { %class.C* } +// CK0: [[ANON_T_0:%.+]] = type { %class.C* } +// CK0: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%[^,]+]], [[KMP_PRIVATES_T:%.+]] } +// CK0: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, %{{[^,]+}}, %{{[^,]+}} } +// CK0-32: [[KMP_PRIVATES_T]] = type { [1 x i64], [1 x i8*], [1 x i8*], [1 x i8*] } +// CK0-64: [[KMP_PRIVATES_T]] = type { [1 x i8*], [1 x i8*], [1 x i64], [1 x i8*] } +// CK0: [[KMP_TASK_T_WITH_PRIVATES_1:%.+]] = type { [[KMP_TASK_T]], [[KMP_PRIVATES_T_2:%.+]] } +// CK0-32: [[KMP_PRIVATES_T_2]] = type { [1 x i64], [1 x i8*], [1 x i8*], [1 x i8*] } +// CK0-64: [[KMP_PRIVATES_T_2]] = type { [1 x i8*], [1 x i8*], [1 x i64], [1 x i8*] } + // CK0-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}.region_id = weak constant i8 0 // CK0-64: [[SIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16] // CK0-32: [[SIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8] @@ -248,25 +260,28 @@ // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]] // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]] // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]] - // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]]) + // CK0: call void [[KERNEL_1:@.+]](%class.C* [[VAL]]) #pragma omp target map(mapper(id),tofrom: c) { ++c.a; } - // CK0-DAG: call i32 @__tgt_target_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[NWSIZES]]{{.+}}, {{.+}}[[NWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]]) - // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 - // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 - // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8** - // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 - // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 - // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0 - // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C** - // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C** - // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]] - // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]] - // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]] - // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]]) + // CK0: [[BP2GEP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[OFFLOAD_BP2:%[^,]+]], i32 0, i32 0 + // CK0: [[BP2CAST:%.+]] = bitcast i8** [[BP2GEP]] to %class.C** + // CK0: store %class.C* [[CADDR:%[^,]+]], %class.C** [[BP2CAST]], align + // CK0: [[P2GEP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[OFFLOAD_P2:%[^,]+]], i32 0, i32 0 + // CK0: [[P2CAST:%.+]] = bitcast i8** [[P2GEP]] to %class.C** + // CK0: store %class.C* [[CADDR]], %class.C** [[P2CAST]], align + // CK0: [[MAPPER2GEP:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[OFFLOAD_MAPPER2:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 + // CK0: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MAPPER2GEP]], align + // CK0: [[BP2:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[OFFLOAD_BP2]], i32 0, i32 0 + // CK0: [[P2:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[OFFLOAD_P2]], i32 0, i32 0 + // CK0: [[MAPPER2:%.+]] = bitcast [1 x i8*]* [[OFFLOAD_MAPPER2]] to i8** + // CK0-32: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{@.+}}, i32 {{%.+}}, i32 1, i32 40, i32 4, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CK0-64: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{@.+}}, i32 {{%.+}}, i32 1, i64 72, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CK0: [[TASK_CAST:%.+]] = bitcast i8* [[TASK]] to [[KMP_TASK_T_WITH_PRIVATES]]* + // CK0: [[TASK_WITH_PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_CAST]], i32 0, i32 1 + // CK0: {{.+}} = call i32 @__kmpc_omp_task([[IDENT_T]]* @1, i32 {{.+}}, i8* [[TASK]]) #pragma omp target map(mapper(id),tofrom: c) nowait { ++c.a; @@ -284,25 +299,18 @@ // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]] // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]] // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]] - // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]]) + // CK0: call void [[KERNEL_3:@.+]](%class.C* [[VAL]]) #pragma omp target teams map(mapper(id),to: c) { ++c.a; } - // CK0-DAG: call i32 @__tgt_target_teams_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[TEAMNWSIZES]]{{.+}}, {{.+}}[[TEAMNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]], i32 0, i32 0) - // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 - // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 - // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8** - // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 - // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 - // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0 - // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C** - // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C** - // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]] - // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]] - // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]] - // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]]) + // CK0-32: [[TASK_1:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{@.+}}, i32 {{%.+}}, i32 1, i32 40, i32 4, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES_1]]*)* [[TASK_ENTRY_1:@.+]] to i32 (i32, i8*)*), i64 -1) + // CK0-64: [[TASK_1:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{@.+}}, i32 {{%.+}}, i32 1, i64 72, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES_1]]*)* [[TASK_ENTRY_1:@.+]] to i32 (i32, i8*)*), i64 -1) + // CK0: [[TASK_CAST_1:%.+]] = bitcast i8* [[TASK_1]] to [[KMP_TASK_T_WITH_PRIVATES_1]]* + // CK0: [[TASK_CAST_GET_1:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES_1]], [[KMP_TASK_T_WITH_PRIVATES_1]]* [[TASK_CAST_1]], i32 0, i32 0 + // CK0: {{.+}} = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_CAST_GET_1]], i32 0, i32 0 + // CK0: {{.+}} = call i32 @__kmpc_omp_task([[IDENT_T]]* @1, i32 {{.+}}, i8* [[TASK_1]]) #pragma omp target teams map(mapper(id),to: c) nowait { ++c.a; @@ -408,7 +416,7 @@ } -// CK0: define internal void [[KERNEL]](%class.C* {{.+}}[[ARG:%.+]]) +// CK0: define internal void [[KERNEL_1]](%class.C* {{.+}}[[ARG:%.+]]) // CK0: [[ADDR:%.+]] = alloca %class.C*, // CK0: store %class.C* [[ARG]], %class.C** [[ADDR]] // CK0: [[CADDR:%.+]] = load %class.C*, %class.C** [[ADDR]] @@ -417,6 +425,77 @@ // CK0: {{.+}} = add nsw i32 [[VAL]], 1 // CK0: } +// CK0: define internal void [[KERNEL_2:@.+]](%class.C* {{.+}}[[ARG:%.+]]) +// CK0: [[ADDR:%.+]] = alloca %class.C*, +// CK0: store %class.C* [[ARG]], %class.C** [[ADDR]] +// CK0: [[CADDR:%.+]] = load %class.C*, %class.C** [[ADDR]] +// CK0: [[CAADDR:%.+]] = getelementptr inbounds %class.C, %class.C* [[CADDR]], i32 0, i32 0 +// CK0: [[VAL:%[^,]+]] = load i32, i32* [[CAADDR]] +// CK0: {{.+}} = add nsw i32 [[VAL]], 1 +// CK0: } + +// CK0: define internal void [[OUTLINED:@.+]](i32 {{.*}}{{[^,]+}}, [[ANON_T]]* noalias [[CTXARG:%.+]]) +// CK0-DAG: call i32 @__tgt_target_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZEGEP:%[0-9]+]], {{.+}}[[NWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]]) +// CK0-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CK0-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CK0-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CK0-DAG: [[MPRGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[MPRFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CK0-DAG: [[BPFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_BP:%.+]], align +// CK0-DAG: [[PFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_P:%.+]], align +// CK0-DAG: [[SIZEFPADDR]] = load [1 x i64]*, [1 x i64]** [[FPPTRADDR_SIZE:%.+]], align +// CK0-DAG: [[MPRFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_MPR:%.+]], align +// CK0-DAG: call void (i8*, ...) %1(i8* %2, {{.+}}[[FPPTRADDR_BP]], {{.+}}[[FPPTRADDR_P]], {{.+}}[[FPPTRADDR_SIZE]], {{.+}}[[FPPTRADDR_MPR]]) +// CK0-DAG: call void [[KERNEL_2:@.+]](%class.C* [[KERNELARG:%.+]]) +// CK0-DAG: [[KERNELARG]] = load %class.C*, %class.C** [[KERNELARGGEP:%.+]], align +// CK0-DAG: [[KERNELARGGEP]] = getelementptr inbounds [[ANON_T]], [[ANON_T]]* [[CTX:%.+]], i32 0, i32 0 +// CK0-DAG: [[CTX]] = load [[ANON_T]]*, [[ANON_T]]** [[CTXADDR:%.+]], align +// CK0-DAG: store [[ANON_T]]* [[CTXARG]], [[ANON_T]]** [[CTXADDR]], align +// CK0: } + +// CK0: define internal {{.*}}i32 [[TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CK0: store [[KMP_TASK_T_WITH_PRIVATES]]* %1, [[KMP_TASK_T_WITH_PRIVATES]]** [[ADDR:%.+]], align +// CK0: [[TASK_T_WITH_PRIVATES:%.+]] = load [[KMP_TASK_T_WITH_PRIVATES]]*, [[KMP_TASK_T_WITH_PRIVATES]]** [[ADDR]], align +// CK0: [[TASKGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_T_WITH_PRIVATES]], i32 0, i32 0 +// CK0: [[SHAREDSGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASKGEP]], i32 0, i32 0 +// CK0: [[SHAREDS:%.+]] = load i8*, i8** [[SHAREDSGEP]], align +// CK0: [[ANON:%.+]] = bitcast i8* [[SHAREDS]] to [[ANON_T]]* +// CK0: [[PRIVATESGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_T_WITH_PRIVATES]], i32 0, i32 1 +// CK0: [[PRIVATES:%.+]] = bitcast [[KMP_PRIVATES_T]]* [[PRIVATESGEP]] to i8* +// CK0: [[TASK_WITH_PRIVATES:%.+]] = bitcast [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_T_WITH_PRIVATES]] to i8* +// CK0: call void [[OUTLINED]](i32 {{%.+}}, i32* {{%.+}}, i8* [[PRIVATES]], {{.+}}, i8* [[TASK_WITH_PRIVATES]], [[ANON_T]]* [[ANON]]) +// CK0: } + +// CK0: define internal void [[OUTLINE_1:@.+]](i32 {{.*}}%.global_tid.{{.+}}, [[ANON_T_0]]* noalias [[CTXARG:%.+]]) +// CK0-DAG: call i32 @__tgt_target_teams_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZEGEP:%[0-9]+]], {{.+}}[[TEAMNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]], i32 0, i32 0) +// CK0-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CK0-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CK0-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CK0-DAG: [[MPRGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[MPRFPADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CK0-DAG: [[BPFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_BP:%.+]], align +// CK0-DAG: [[PFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_P:%.+]], align +// CK0-DAG: [[SIZEFPADDR]] = load [1 x i64]*, [1 x i64]** [[FPPTRADDR_SIZE:%.+]], align +// CK0-DAG: [[MPRFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_MPR:%.+]], align +// CK0-DAG: call void (i8*, ...) %1(i8* %2, {{.+}}[[FPPTRADDR_BP]], {{.+}}[[FPPTRADDR_P]], {{.+}}[[FPPTRADDR_SIZE]], {{.+}}[[FPPTRADDR_MPR]]) +// CK0-DAG: call void [[KERNEL_2:@.+]](%class.C* [[KERNELARG:%.+]]) +// CK0-DAG: [[KERNELARG]] = load %class.C*, %class.C** [[KERNELARGGEP:%.+]], align +// CK0-DAG: [[KERNELARGGEP]] = getelementptr inbounds [[ANON_T_0]], [[ANON_T_0]]* [[CTX:%.+]], i32 0, i32 0 +// CK0-DAG: [[CTX]] = load [[ANON_T_0]]*, [[ANON_T_0]]** [[CTXADDR:%.+]], align +// CK0-DAG: store [[ANON_T_0]]* [[CTXARG]], [[ANON_T_0]]** [[CTXADDR]], align +// CK0: } + +// CK0: define internal {{.*}}i32 [[TASK_ENTRY_1]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES_1]]* noalias %1) +// CK0: store [[KMP_TASK_T_WITH_PRIVATES_1]]* %1, [[KMP_TASK_T_WITH_PRIVATES_1]]** [[ADDR:%.+]], align +// CK0: [[TASK_T_WITH_PRIVATES:%.+]] = load [[KMP_TASK_T_WITH_PRIVATES_1]]*, [[KMP_TASK_T_WITH_PRIVATES_1]]** [[ADDR]], align +// CK0: [[TASKGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES_1]], [[KMP_TASK_T_WITH_PRIVATES_1]]* [[TASK_T_WITH_PRIVATES]], i32 0, i32 0 +// CK0: [[SHAREDSGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASKGEP]], i32 0, i32 0 +// CK0: [[SHAREDS:%.+]] = load i8*, i8** [[SHAREDSGEP]], align +// CK0: [[ANON:%.+]] = bitcast i8* [[SHAREDS]] to [[ANON_T_0]]* +// CK0: [[PRIVATESGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES_1]], [[KMP_TASK_T_WITH_PRIVATES_1]]* [[TASK_T_WITH_PRIVATES]], i32 0, i32 1 +// CK0: [[PRIVATES:%.+]] = bitcast [[KMP_PRIVATES_T_2]]* [[PRIVATESGEP]] to i8* +// CK0: [[TASK_WITH_PRIVATES:%.+]] = bitcast [[KMP_TASK_T_WITH_PRIVATES_1]]* [[TASK_T_WITH_PRIVATES]] to i8* +// CK0: call void [[OUTLINE_1]](i32 {{%.+}}, i32* {{%.+}}, i8* [[PRIVATES]], {{.+}}, i8* [[TASK_WITH_PRIVATES]], [[ANON_T_0]]* [[ANON]]) +// CK0: } + #endif // CK0 diff --git a/clang/test/OpenMP/target_codegen.cpp b/clang/test/OpenMP/target_codegen.cpp --- a/clang/test/OpenMP/target_codegen.cpp +++ b/clang/test/OpenMP/target_codegen.cpp @@ -54,10 +54,16 @@ #ifndef HEADER #define HEADER +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]], [[KMP_PRIVATES_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, {{%.+}}, {{%.+}} } // CHECK-DAG: [[TT:%.+]] = type { i64, i8 } // CHECK-DAG: [[S1:%.+]] = type { double } // CHECK-DAG: [[S2:%.+]] = type { i32, i32, i32 } // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[ANON_T:%.+]] = type { i[[SZ]]*, i32, i32 } +// CHECK-32-DAG: [[KMP_PRIVATES_T]] = type { [2 x i64], i32*, i32, [2 x i8*], [2 x i8*], [2 x i8*] } +// CHECK-64-DAG: [[KMP_PRIVATES_T]] = type { i64*, [2 x i8*], [2 x i8*], [2 x i64], [2 x i8*], i32 } // TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } @@ -114,6 +120,9 @@ // CHECK: define {{.*}}[[FOO:@.+]]( int foo(int n) { + // CHECK: [[OFFLOADBPTR:%.+]] = alloca [2 x i8*], align + // CHECK: [[OFFLOADPTR:%.+]] = alloca [2 x i8*], align + // CHECK: [[OFFLOADMAPPER:%.+]] = alloca [2 x i8*], align int a = 0; short aa = 0; float b[10]; @@ -138,33 +147,38 @@ { } - // CHECK-DAG: [[ADD:%.+]] = add nsw i32 - // CHECK-DAG: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]], - // CHECK-DAG: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], - // CHECK-DAG: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** null) - // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 - - // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 - // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 - // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]** - // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]** - // CHECK-DAG: store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]] - // CHECK-DAG: store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]] - - // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 - // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 - // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]] - // CHECK-DAG: store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]] - // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT0_:@.+]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]]) - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK: [[BPRGEP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOADBPTR]], i32 0, i32 0 + // CHECK: [[PRGEP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOADPTR]], i32 0, i32 0 + // CHECK: [[BPRGEP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOADBPTR]], i32 0, i32 0 + // CHECK: [[PRGEP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOADPTR]], i32 0, i32 0 + // CHECK: [[DEVICE:%.+]] = sext i32 {{%.+}} to i64 + // CHECK-32: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{.+}}, i32 %0, i32 1, i32 68, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 [[DEVICE]]) + // CHECK-64: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{.+}}, i32 %0, i32 1, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 [[DEVICE]]) + // CHECK: [[TASK_WITH_PRIVATES:%.+]] = bitcast i8* [[TASK]] to [[KMP_TASK_T_WITH_PRIVATES]]* + // CHECK: [[TASK_WITH_PRIVATES_GEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_WITH_PRIVATES]], i32 0, i32 1 + // CHECK-32: [[SIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 0 + // CHECK-32: [[SIZEADDR:%.+]] = bitcast [2 x i64]* [[SIZEGEP]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[SIZEADDR]], i8* align 4 bitcast ([2 x i64]* [[SIZET]] to i8*), i32 16, i1 false) + // CHECK-32: [[FPBPRGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 3 + // CHECK-32: [[FPBPRCAST:%.+]] = bitcast [2 x i8*]* [[FPBPRGEP]] to i8* + // CHECK-32: [[BPRCAST:%.+]] = bitcast i8** [[BPRGEP]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPBPRCAST]], i8* align 4 [[BPRCAST]], i32 8, i1 false) + // CHECK-32: [[FPPRGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 4 + // CHECK-32: [[FPPRCAST:%.+]] = bitcast [2 x i8*]* [[FPPRGEP]] to i8* + // CHECK-32: [[PRCAST:%.+]] = bitcast i8** [[PRGEP]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPPRCAST]], i8* align 4 [[PRCAST]], i32 8, i1 false) + // CHECK-64: [[FPBPRGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 1 + // CHECK-64: [[FPBPRCAST:%.+]] = bitcast [2 x i8*]* [[FPBPRGEP]] to i8* + // CHECK-64: [[BPR_CAST:%.+]] = bitcast i8** [[BPRGEP]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPBPRCAST]], i8* align 8 [[BPR_CAST]], i64 16, i1 false) + // CHECK-64: [[FPPRGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 2 + // CHECK-64: [[FPPRCAST:%.+]] = bitcast [2 x i8*]* [[FPPRGEP]] to i8* + // CHECK-64: [[PR_CAST:%.+]] = bitcast i8** [[PRGEP]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPPRCAST]], i8* align 8 [[PR_CAST]], i64 16, i1 false) + // CHECK-64: [[SIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 3 + // CHECK-64: [[SIZE_CAST:%.+]] = bitcast [2 x i64]* [[SIZEGEP]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SIZE_CAST]], i8* align 8 bitcast ([2 x i64]* [[SIZET]] to i8*), i64 16, i1 false) + // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* {{.+}}, i32 {{.+}}, i8* [[TASK]]) #pragma omp target device(global + a) nowait { static int local1; @@ -378,6 +392,36 @@ // CHECK: define internal void [[HVT0]]() +// CHECK: define internal void [[HVT0_:@.+]](i[[SZ]]* {{%[^,]+}}, i[[SZ]] {{%[^,]+}}) +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE:%.+]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SIZE:%.+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[MAPPER:%.+]]) +// CHECK-DAG: [[DEVICE]] = sext i32 [[DEV:%.+]] to i64 +// CHECK-DAG: [[DEV]] = load i32, i32* [[DEVADDR:%.+]], align +// CHECK-DAG: [[DEVADDR]] = getelementptr inbounds [[ANON_T]], [[ANON_T]]* %12, i32 0, i32 2 +// CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BPRADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PRADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[SIZE]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZEADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[MAPPER]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[MAPPERADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[BPRADDR]] = load [2 x i8*]*, [2 x i8*]** [[FPPTR_BPR:%.+]], align +// CHECK-DAG: [[PRADDR]] = load [2 x i8*]*, [2 x i8*]** [[FPPTR_PR:%.+]], align +// CHECK-DAG: [[SIZEADDR]] = load [2 x i64]*, [2 x i64]** [[FPPTR_SIZE:%.+]], align +// CHECK-DAG: [[MAPPERADDR]] = load [2 x i8*]*, [2 x i8*]** [[FPPTR_MAPPER:%.+]], align +// CHECK-DAG: call void (i8*, ...) {{%[0-9]+}}(i8* {{%[^,]+}}, i[[SZ]]*** [[FPPTR_PLOCAL:%.+]], i32** [[FPPTR_GLOBAL:%.+]], [2 x i8*]** [[FPPTR_BPR]], [2 x i8*]** [[FPPTR_PR]], [2 x i64]** [[FPPTR_SIZE]], [2 x i8*]** [[FPPTR_MAPPER]]) +// CHECK-DAG: [[PLOCALADDR:%.+]] = load i[[SZ]]**, i[[SZ]]*** [[FPPTR_PLOCAL]], align +// CHECK-DAG: {{%.+}} = load i32*, i32** [[FPPTR_GLOBAL:%.+]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: [[PLOCAL:%.+]] = load i[[SZ]]*, i[[SZ]]** [[PLOCALADDR]], align +// CHECK: [[GLOBAL:%.+]] = load i32, i32* {{@.+}}, align +// CHECK-64: [[CONVI:%.+]] = bitcast i64* [[GLOBALCAST:%.+]] to i32* +// CHECK-32: store i32 [[GLOBAL]], i32* [[GLOBALCAST:%.+]], align +// CHECK-64: store i32 [[GLOBAL]], i32* [[CONVI]], align +// CHECK: [[GLOBAL:%.+]] = load i[[SZ]], i[[SZ]]* [[GLOBALCAST]], align +// CHECK: call void [[HVT0_]](i[[SZ]]* [[PLOCAL]], i[[SZ]] [[GLOBAL]]) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) // Create stack storage and store argument in there. // CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align @@ -706,7 +750,7 @@ // CHECK: [[IFEND]] -// OMP45: define internal void @__omp_offloading_{{.+}}_{{.+}}bar{{.+}}_l838(i[[SZ]] %{{.+}}) +// OMP45: define internal void @__omp_offloading_{{.+}}_{{.+}}bar{{.+}}_l{{[0-9]+}}(i[[SZ]] %{{.+}}) // OMP45: define {{.*}}@{{.*}}zee{{.*}} @@ -805,7 +849,7 @@ // CHECK-DAG: load i16, i16* [[REF_AA]] // CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 -// OMP50: define internal void @__omp_offloading_{{.+}}_{{.+}}bar{{.+}}_l838(i[[SZ]] %{{.+}}) +// OMP50: define internal void @__omp_offloading_{{.+}}_{{.+}}bar{{.+}}_l{{[0-9]+}}(i[[SZ]] %{{.+}}) // OMP50: define {{.*}}@{{.*}}zee{{.*}} diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -74,8 +74,11 @@ #ifndef HEADER #define HEADER -// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[ANON_T:%.+]] = type { i8 } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, {{%[^,]+}}, {{%[^,]+}} } // CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // CHECK-DAG: [[TT:%.+]] = type { i64, i8 } @@ -134,13 +137,9 @@ double cn[5][n]; TT d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0) - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT0:@.+]]() - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK-32: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{[^,]+}}, i32 %0, i32 1, i32 20, i32 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES:%.+]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-64: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{[^,]+}}, i32 %0, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES:%.+]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* {{[^,]+}}, i32 {{%[^,]+}}, i8* [[TASK]]) #pragma omp target parallel nowait { } @@ -346,7 +345,7 @@ // Check that the offloading functions are emitted and that the arguments are // correct and loaded correctly for the target regions in foo(). -// CHECK: define internal void [[HVT0]]() +// CHECK: define internal void [[HVT0:@.+]]() // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*)) // // @@ -354,6 +353,14 @@ // CHECK: ret void // CHECK-NEXT: } +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* {{@[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT0]]() +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) // Create stack storage and store argument in there. diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -74,7 +74,11 @@ #ifndef HEADER #define HEADER -// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]], [[KMP_PRIVATES_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, {{%[^,]+}}, {{%[^,]+}} } +// CHECK-32-DAG: [[KMP_PRIVATES_T]] = type { [3 x i64], [3 x i8*], [3 x i8*], [3 x i8*], i16 } +// CHECL-64-DAG: [[KMP_PRIVATES_T]] = type { [3 x i8*], [3 x i8*], [3 x i64], [3 x i8*], i16 } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } @@ -158,34 +162,53 @@ a += 1; } - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 1, i32 0) - // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0 - // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 0 - // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] [[VAL0:%.+]], i[[SZ]]* [[CBPADDR0]], - // CHECK-DAG: store i[[SZ]] [[VAL0]], i[[SZ]]* [[CPADDR0]], - // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1 - // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1 - // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] [[VAL1:%.+]], i[[SZ]]* [[CBPADDR1]], - // CHECK-DAG: store i[[SZ]] [[VAL1]], i[[SZ]]* [[CPADDR1]], - // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 1 - // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 1 - // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] [[VAL2:%.+]], i[[SZ]]* [[CBPADDR2]], - // CHECK-DAG: store i[[SZ]] [[VAL2]], i[[SZ]]* [[CPADDR2]], - - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK: [[BP0GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BP:%[^,]+]], i32 0, i32 0 + // CHECK: [[BP0ADDR:%.+]] = bitcast i8** [[BP0GEP]] to i[[SZ]]* + // CHECK: store i[[SZ]] [[VAL:%[^,]+]], i[[SZ]]* [[BP0ADDR]], align + // CHECK: [[P0GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_P:%[^,]+]], i32 0, i32 0 + // CHECK: [[P0ADDR:%.+]] = bitcast i8** [[P0GEP]] to i[[SZ]]* + // CHECK: store i[[SZ]] [[VAL]], i[[SZ]]* [[P0ADDR]], align + // CHECK: [[BP1GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BP]], i32 0, i32 1 + // CHECK: [[BP1ADDR:%.+]] = bitcast i8** [[BP1GEP]] to i[[SZ]]* + // CHECK: store i[[SZ]] [[VAL1:%[^,]+]], i[[SZ]]* [[BP1ADDR]], align + // CHECK: [[P1GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_P]], i32 0, i32 1 + // CHECK: [[P1ADDR:%.+]] = bitcast i8** [[P1GEP]] to i[[SZ]]* + // CHECK: store i[[SZ]] [[VAL1]], i[[SZ]]* [[P1ADDR]], align + // CHECK: [[BP2GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BP]], i32 0, i32 2 + // CHECK: [[BP2ADDR:%.+]] = bitcast i8** [[BP2GEP]] to i[[SZ]]* + // CHECK: store i[[SZ]] [[VAL2:%[^,]+]], i[[SZ]]* [[BP2ADDR]], align + // CHECK: [[P2GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_P]], i32 0, i32 2 + // CHECK: [[P2ADDR:%.+]] = bitcast i8** [[P2GEP]] to i[[SZ]]* + // CHECK: store i[[SZ]] [[VAL2]], i[[SZ]]* [[P2ADDR]], align + // CHECK: [[BPGEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_BP]], i32 0, i32 0 + // CHECK: [[PGEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[OFFLOAD_P]], i32 0, i32 0 + // CHECK-32: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i32 84, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@[^,]+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-64: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 144, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@[^,]+]] to i32 (i32, i8*)*), i64 -1) + // CHECK: [[TASK_WITH_PRIVATES_CAST:%.+]] = bitcast i8* [[TASK]] to [[KMP_TASK_T_WITH_PRIVATES]]* + // CHECK: [[KMP_PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_WITH_PRIVATES_CAST]], i32 0, i32 1 + // CEHCK-32: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 0 + // CEHCK-32: [[FPSIZEADDR:%.+]] = bitcast [3 x i64]* [[FPSIZEGEP]] to i8* + // CEHCK-32: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPSIZEADDR]], i8* align 8 bitcast ([3 x i64]* [[SIZET2]] to i8*), i64 24, i1 false) + // CEHCK-32: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 1 + // CEHCK-32: [[FPBPADDR:%.+]] = bitcast [3 x i8*]* [[FPBPGEP]] to i8* + // CEHCK-32: [[BPCAST:%.+]] = bitcast i8** [[BPGEP]] to i8* + // CEHCK-32: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPBPADDR]], i8* align 8 [[BPCAST]], i64 24, i1 false) + // CEHCK-32: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 2 + // CEHCK-32: [[FPPADDR:%.+]] = bitcast [3 x i8*]* [[FPPGEP]] to i8* + // CEHCK-32: [[PCAST:%.+]] = bitcast i8** [[PGEP]] to i8* + // CEHCK-32: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPPADDR]], i8* align 8 [[BCAST]], i64 24, i1 false) + // CEHCK-64: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 0 + // CEHCK-64: [[FPBPADDR:%.+]] = bitcast [3 x i8*]* [[FPBPGEP]] to i8* + // CEHCK-64: [[BPCAST:%.+]] = bitcast i8** [[BPGEP]] to i8* + // CEHCK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPBPADDR]], i8* align 8 [[BPCAST]], i64 24, i1 false) + // CEHCK-64: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 1 + // CEHCK-64: [[FPPADDR:%.+]] = bitcast [3 x i8*]* [[FPPGEP]] to i8* + // CEHCK-64: [[PCAST:%.+]] = bitcast i8** [[PGEP]] to i8* + // CEHCK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPPADDR]], i8* align 8 [[BCAST]], i64 24, i1 false) + // CEHCK-64: [[FPSIZEGEP]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 2 + // CEHCK-64: [[FPSIZEADDR:%.+]] = bitcast [3 x i64]* [[FPSIZEGEP]] to i8* + // CEHCK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPSIZEADDR]], i8* align 8 bitcast ([3 x i64]* [[SIZET2]] to i8*), i64 24, i1 false) + // CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{[^,]+}}, i32 %{{[^,]+}}, i8* [[TASK]]) int lin = 12; #pragma omp target parallel for if(target: 1) linear(lin, a : get_val()) nowait for (unsigned long long it = 2000; it >= 600; it-=400) { @@ -376,7 +399,6 @@ // CHECK: ret void // CHECK: } - // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}, i{{32|64}}{{[*]*.*}} %{{.+}}) // Create stack storage and store argument in there. // CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align @@ -402,7 +424,7 @@ // CHECK: ret void // CHECK-NEXT: } -// CHECK: define internal void [[HVT2]](i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) +// CHECK: define internal void [[HVT2:@.+]](i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}) // Create stack storage and store argument in there. // CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align // CHECK: alloca i[[SZ]], align @@ -425,6 +447,24 @@ // CHECK: ret void // CHECK-NEXT: } +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SIZE:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i8** [[MAPPER:%[^,]+]], i32 1, i32 0) +// CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[FPBPR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[FPPR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[SIZE]] = getelementptr inbounds [3 x i64], [3 x i64]* [[FPSIZE:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[MAPPER]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[FPMAPPER:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[FPBPR]] = load [3 x i8*]*, [3 x i8*]** [[FPBPRADDR:%[^,]+]], align +// CHECK-DAG: [[FPPR]] = load [3 x i8*]*, [3 x i8*]** [[FPPRADDR:%[^,]+]], align +// CHECK-DAG: [[FPSIZE]] = load [3 x i64]*, [3 x i64]** [[FPSIZEADDR:%[^,]+]], align +// CHECK-DAG: [[FPMAPPER]] = load [3 x i8*]*, [3 x i8*]** [[FPMAPPERADDR:%[^,]+]], align +// CHECK-DAG: call void (i8*, ...) %{{[0-9]+}}(i8* %{{[^,]+}}, i16** %{{[^,]+}}, [3 x i8*]** [[FPBPRADDR]], [3 x i8*]** [[FPPRADDR]], [3 x i64]** [[FPSIZEADDR]], [3 x i8*]** [[FPMAPPERADDR]]) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT2]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + // CHECK: define internal void [[HVT3]] // Create stack storage and store argument in there. // CHECK: [[A_ADDR:%.+]] = alloca i[[SZ]], align diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -65,11 +65,14 @@ // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[ANON_T:%.+]] = type { i8 } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, %{{[^,]+}}, %{{[^,]+}} } // CHECK-DAG: [[S1:%.+]] = type { double } // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } @@ -130,13 +133,9 @@ double cn[5][n]; TT d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0) - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT0:@.+]]() - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK-32: [[TASK:%[^,]+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i32 20, i32 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@[^,]+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-64: [[TASK:%[^,]+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@[^,]+]] to i32 (i32, i8*)*), i64 -1) + // CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{[^,]+}}, i32 %{{[^,]+}}, i8* [[TASK]]) #pragma omp target parallel for simd nowait for (int i = 3; i < 32; i += 5) { } @@ -356,7 +355,7 @@ // Check that the offloading functions are emitted and that the arguments are // correct and loaded correctly for the target regions in foo(). -// CHECK: define internal void [[HVT0]]() +// CHECK: define internal void [[HVT0:@.+]]() // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*)) // // @@ -365,6 +364,15 @@ // CHECK: ret void // CHECK-NEXT: } +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT0]]() +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}, i{{32|64}}{{[*]*.*}} %{{.+}}) // Create stack storage and store argument in there. diff --git a/clang/test/OpenMP/target_simd_codegen.cpp b/clang/test/OpenMP/target_simd_codegen.cpp --- a/clang/test/OpenMP/target_simd_codegen.cpp +++ b/clang/test/OpenMP/target_simd_codegen.cpp @@ -66,6 +66,9 @@ #ifndef HEADER #define HEADER +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, %{{[^,]+}}, %{{[^,]+}} } // CHECK-DAG: [[TT:%.+]] = type { i64, i8 } // CHECK-DAG: [[S1:%.+]] = type { double } // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } @@ -127,13 +130,9 @@ double cn[5][n]; TT d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 1) - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT0:@.+]]() - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK-32: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i32 20, i32 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@[^,]+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-64: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@[^,]+]] to i32 (i32, i8*)*), i64 -1) + // CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{[^,]+}}, i32 %{{[^,]+}}, i8* [[TASK]]) #pragma omp target simd nowait for (int i = 3; i < 32; i += 5) { } @@ -351,12 +350,22 @@ // Check that the offloading functions are emitted and that the arguments are // correct and loaded correctly for the target regions in foo(). -// CHECK: define internal void [[HVT0]]() +// CHECK: define internal void [[HVT0:@.+]]() // CHECK: !llvm.loop // CHECK: ret void // CHECK-NEXT: } +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 1) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT0]]() +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + + // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}, i{{32|64}}{{[*]*.*}} %{{.+}}) // CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align // CHECK: store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -73,9 +73,13 @@ #ifndef HEADER #define HEADER -// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]], [[KMP_PRIVATES_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, %{{[^,]+}}, %{{[^,]+}} } +// CHECK-32-DAG: [[KMP_PRIVATES_T]] = type { [3 x i64], [3 x i8*], [3 x i8*], [3 x i8*], i16 } +// CHECK-64-DAG: [[KMP_PRIVATES_T]] = type { [3 x i8*], [3 x i8*], [3 x i64], [3 x i8*], i16 } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant [[IDENT_T]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // CHECK-DAG: [[TT:%.+]] = type { i64, i8 } // CHECK-DAG: [[S1:%.+]] = type { double } @@ -142,33 +146,26 @@ double cn[5][n]; TT d; - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i32 {{[^,]+}}, i32 {{[^)]+}}) - // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] - // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX0]] - // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] - // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] - // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] - // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]] - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]] - // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] - // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] - // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2]] - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2]] - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK-DAG: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i8* [[TASK:%.+]]) + // CHECK-32-DAG: [[TASK]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i32 84, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-64-DAG: [[TASK]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 144, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-DAG: [[TASK_CAST:%.+]] = bitcast i8* [[TASK]] to [[KMP_TASK_T_WITH_PRIVATES]]* + // CHECK-DAG: [[KMP_PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_CAST]], i32 0, i32 1 + // CHECK-32-DAG: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-64-DAG: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-DAG: [[FPBPCAST:%.+]] = bitcast [3 x i8*]* [[FPBPGEP]] to i8* + // CHECK-32-DAG: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPBPCAST]], i8* align 4 %{{[^,]+}}, i32 12, i1 false) + // CHECK-64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPBPCAST]], i8* align 8 %{{[^,]+}}, i64 24, i1 false) + // CHECK-32-DAG: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-64-DAG: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-DAG: [[FPPCAST:%.+]] = bitcast [3 x i8*]* [[FPPGEP]] to i8* + // CHECK-32-DAG: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPPCAST]], i8* align 4 %{{[^,]+}}, i32 12, i1 false) + // CHECK-64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPPCAST]], i8* align 8 %{{[^,]+}}, i64 24, i1 false) + // CHECK-32-DAG: [[FPSIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-64-DAG: [[FPSIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-DAG: [[FPSIZECAST:%.+]] = bitcast [3 x i64]* [[FPSIZEGEP]] to i8* + // CHECK-32-DAG: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPSIZECAST]], i8* align 4 bitcast ([3 x i64]* [[SIZET]] to i8*), i32 24, i1 false) + // CHECK-64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPSIZECAST]], i8* align 8 bitcast ([3 x i64]* [[SIZET]] to i8*), i64 24, i1 false) #pragma omp target teams num_teams(a) thread_limit(a) firstprivate(aa) nowait { } @@ -378,7 +375,7 @@ // Check that the offloading functions are emitted and that the arguments are // correct and loaded correctly for the target regions in foo(). -// CHECK: define internal void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK: define internal void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] {{[^)]+}}) // // @@ -388,6 +385,23 @@ // CHECK: ret void // CHECK-NEXT: } +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[SIZE:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** [[MAPPER:%[^,]+]], i32 {{[^,]+}}, i32 {{[^)]+}}) +// CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[SIZE]] = getelementptr inbounds [3 x i64], [3 x i64]* [[SIZEADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[MAPPER]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[MAPPERADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[BPADDR]] = load [3 x i8*]*, [3 x i8*]** [[FPBPADDR:%[^,]+]], align +// CHECK-DAG: [[PADDR]] = load [3 x i8*]*, [3 x i8*]** [[FPPADDR:%[^,]+]], align +// CHECK-DAG: [[SIZEADDR]] = load [3 x i64]*, [3 x i64]** [[FPSIZEADDR:%[^,]+]], align +// CHECK-DAG: [[MAPPERADDR]] = load [3 x i8*]*, [3 x i8*]** [[FPMAPPERADDR:%[^,]+]], align +// CHECK-DAG: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, i16** %{{.+}}, [3 x i8*]** [[FPBPADDR]], [3 x i8*]** [[FPPADDR]], [3 x i64]** [[FPSIZEADDR]], [3 x i8*]** [[FPMAPPERADDR]]) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) // Create stack storage and store argument in there. @@ -512,13 +526,13 @@ // CHECK: define internal {{.*}}void [[OMP_OUTLINED4]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i[[SZ]] %{{.+}}, [10 x float]* {{.+}}, i[[SZ]] %{{.+}}, float* {{.+}}, [5 x [10 x double]]* {{.+}}, i[[SZ]] %{{.+}}, i[[SZ]] %{{.+}}, double* {{.+}}, [[TT]]* {{.+}}) // To reduce complexity, we're only going as far as validating the signature of the outlined parallel function. -// CHECK: define {{.*}}void @__omp_offloading_{{.*}}foo{{.*}}_l369(i[[SZ]] %{{.+}}) +// CHECK: define {{.*}}void @__omp_offloading_{{.*}}foo{{.*}}_l{{[0-9]+}}(i[[SZ]] %{{.+}}) // CHECK: define internal void {{@.+}}(i32* {{.+}}, i32* {{.+}}, i[[SZ]] %{{.+}}) -// CHECK: define {{.*}}void @__omp_offloading_{{.*}}foo{{.*}}_l372(i[[SZ]] %{{.+}}) +// CHECK: define {{.*}}void @__omp_offloading_{{.*}}foo{{.*}}_l{{[0-9]+}}(i[[SZ]] %{{.+}}) // CHECK: define internal void {{@.+}}(i32* {{.+}}, i32* {{.+}}, i32* nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) %{{.+}}) void bazzzz(int n, int f[n]) { -// CHECK: define internal void @__omp_offloading_{{.+}}bazzzz{{.+}}_l524(i[[SZ]] %{{[^,]+}}) +// CHECK: define internal void @__omp_offloading_{{.+}}bazzzz{{.+}}_l{{[0-9]+}}(i[[SZ]] %{{[^,]+}}) // CHECK: [[VLA:%.+]] = load i[[SZ]], i[[SZ]]* % // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* @{{.+}} to void (i32*, i32*, ...)*), i[[SZ]] [[VLA]]) #pragma omp target teams private(f) diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -74,7 +74,11 @@ #ifndef HEADER #define HEADER -// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]], [[KMP_PRIVATES_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, %{{[^,]+}}, %{{[^,]+}} } +// CHECK-32-DAG: [[KMP_PRIVATES_T]] = type { [3 x i64], [3 x i8*], [3 x i8*], [3 x i8*], i16 } +// CHECK-64-DAG: [[KMP_PRIVATES_T]] = type { [3 x i8*], [3 x i8*], [3 x i64], [3 x i8*], i16 } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } @@ -139,33 +143,26 @@ double cn[5][n]; TT d; - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i32 {{[^,]+}}, i32 {{[^)]+}}) - // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] - // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX0]] - // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] - // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] - // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] - // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]] - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]] - // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] - // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] - // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2]] - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2]] - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK-DAG: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i8* [[TASK:%.+]]) + // CHECK-32-DAG: [[TASK]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i32 84, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-64-DAG: [[TASK]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 144, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-DAG: [[TASK_CAST:%.+]] = bitcast i8* [[TASK]] to [[KMP_TASK_T_WITH_PRIVATES]]* + // CHECK-DAG: [[KMP_PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_CAST]], i32 0, i32 1 + // CHECK-32-DAG: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-64-DAG: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-DAG: [[FPBPCAST:%.+]] = bitcast [3 x i8*]* [[FPBPGEP]] to i8* + // CHECK-32-DAG: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPBPCAST]], i8* align 4 %{{[^,]+}}, i32 12, i1 false) + // CHECK-64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPBPCAST]], i8* align 8 %{{[^,]+}}, i64 24, i1 false) + // CHECK-32-DAG: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-64-DAG: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-DAG: [[FPPCAST:%.+]] = bitcast [3 x i8*]* [[FPPGEP]] to i8* + // CHECK-32-DAG: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPPCAST]], i8* align 4 %{{[^,]+}}, i32 12, i1 false) + // CHECK-64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPPCAST]], i8* align 8 %{{[^,]+}}, i64 24, i1 false) + // CHECK-32-DAG: [[FPSIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-64-DAG: [[FPSIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-DAG: [[FPSIZECAST:%.+]] = bitcast [3 x i64]* [[FPSIZEGEP]] to i8* + // CHECK-32-DAG: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPSIZECAST]], i8* align 4 bitcast ([3 x i64]* [[SIZET]] to i8*), i32 24, i1 false) + // CHECK-64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPSIZECAST]], i8* align 8 bitcast ([3 x i64]* [[SIZET]] to i8*), i64 24, i1 false) #pragma omp target teams distribute num_teams(a) thread_limit(a) firstprivate(aa) nowait for (int i = 0; i < 10; ++i) { } @@ -378,7 +375,7 @@ // Check that the offloading functions are emitted and that the arguments are // correct and loaded correctly for the target regions in foo(). -// CHECK: define internal void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK: define internal void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] {{[^)]+}}) // // @@ -388,6 +385,24 @@ // CHECK: ret void // CHECK-NEXT: } +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[SIZE:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** [[MAPPER:%[^,]+]], i32 {{[^,]+}}, i32 {{[^)]+}}) +// CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[SIZE]] = getelementptr inbounds [3 x i64], [3 x i64]* [[SIZEADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[MAPPER]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[MAPPERADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[BPADDR]] = load [3 x i8*]*, [3 x i8*]** [[FPBPADDR:%[^,]+]], align +// CHECK-DAG: [[PADDR]] = load [3 x i8*]*, [3 x i8*]** [[FPPADDR:%[^,]+]], align +// CHECK-DAG: [[SIZEADDR]] = load [3 x i64]*, [3 x i64]** [[FPSIZEADDR:%[^,]+]], align +// CHECK-DAG: [[MAPPERADDR]] = load [3 x i8*]*, [3 x i8*]** [[FPMAPPERADDR:%[^,]+]], align +// CHECK-DAG: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, i16** %{{.+}}, [3 x i8*]** [[FPBPADDR]], [3 x i8*]** [[FPPADDR]], [3 x i64]** [[FPSIZEADDR]], [3 x i8*]** [[FPMAPPERADDR]]) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) // Create stack storage and store argument in there. diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -66,7 +66,11 @@ #ifndef HEADER #define HEADER -// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]], [[KMP_PRIVATES_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, %{{[^,]+}}, %{{[^,]+}} } +// CHECK-32-DAG: [[KMP_PRIVATES_T]] = type { [3 x i64], [3 x i8*], [3 x i8*], [3 x i8*], i16 } +// CHECK-64-DAG: [[KMP_PRIVATES_T]] = type { [3 x i8*], [3 x i8*], [3 x i64], [3 x i8*], i16 } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } @@ -132,33 +136,26 @@ double cn[5][n]; TT d; - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i32 {{[^,]+}}, i32 {{[^)]+}}) - // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] - // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX0]] - // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR0]] - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR0]] - // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] - // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] - // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR1]] - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR1]] - // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX1:[0-9]+]] - // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR]], i32 0, i32 [[IDX1]] - // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CBPADDR2]] - // CHECK-DAG: store i[[SZ]] %{{.+}}, i[[SZ]]* [[CPADDR2]] - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK-DAG: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i8* [[TASK:%.+]]) + // CHECK-32-DAG: [[TASK]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i32 84, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-64-DAG: [[TASK]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 144, i64 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-DAG: [[TASK_CAST:%.+]] = bitcast i8* [[TASK]] to [[KMP_TASK_T_WITH_PRIVATES]]* + // CHECK-DAG: [[KMP_PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_CAST]], i32 0, i32 1 + // CHECK-32-DAG: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-64-DAG: [[FPBPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-DAG: [[FPBPCAST:%.+]] = bitcast [3 x i8*]* [[FPBPGEP]] to i8* + // CHECK-32-DAG: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPBPCAST]], i8* align 4 %{{[^,]+}}, i32 12, i1 false) + // CHECK-64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPBPCAST]], i8* align 8 %{{[^,]+}}, i64 24, i1 false) + // CHECK-32-DAG: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-64-DAG: [[FPPGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 1 + // CHECK-DAG: [[FPPCAST:%.+]] = bitcast [3 x i8*]* [[FPPGEP]] to i8* + // CHECK-32-DAG: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPPCAST]], i8* align 4 %{{[^,]+}}, i32 12, i1 false) + // CHECK-64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPPCAST]], i8* align 8 %{{[^,]+}}, i64 24, i1 false) + // CHECK-32-DAG: [[FPSIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 0 + // CHECK-64-DAG: [[FPSIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[KMP_PRIVATES]], i32 0, i32 2 + // CHECK-DAG: [[FPSIZECAST:%.+]] = bitcast [3 x i64]* [[FPSIZEGEP]] to i8* + // CHECK-32-DAG: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPSIZECAST]], i8* align 4 bitcast ([3 x i64]* [[SIZET]] to i8*), i32 24, i1 false) + // CHECK-64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPSIZECAST]], i8* align 8 bitcast ([3 x i64]* [[SIZET]] to i8*), i64 24, i1 false) #pragma omp target teams distribute simd num_teams(a) thread_limit(a) firstprivate(aa) simdlen(16) nowait for (int i = 0; i < 10; ++i) { } @@ -365,7 +362,7 @@ // Check that the offloading functions are emitted and that the arguments are // correct and loaded correctly for the target regions in foo(). -// CHECK: define internal void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK: define internal void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* [[DEF_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i[[SZ]])* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i[[SZ]] {{[^)]+}}) // // @@ -375,6 +372,24 @@ // CHECK: ret void // CHECK-NEXT: } +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[SIZE:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** [[MAPPER:%[^,]+]], i32 {{[^,]+}}, i32 {{[^)]+}}) +// CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[SIZE]] = getelementptr inbounds [3 x i64], [3 x i64]* [[SIZEADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[MAPPER]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[MAPPERADDR:%[^,]+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[BPADDR]] = load [3 x i8*]*, [3 x i8*]** [[FPBPADDR:%[^,]+]], align +// CHECK-DAG: [[PADDR]] = load [3 x i8*]*, [3 x i8*]** [[FPPADDR:%[^,]+]], align +// CHECK-DAG: [[SIZEADDR]] = load [3 x i64]*, [3 x i64]** [[FPSIZEADDR:%[^,]+]], align +// CHECK-DAG: [[MAPPERADDR]] = load [3 x i8*]*, [3 x i8*]** [[FPMAPPERADDR:%[^,]+]], align +// CHECK-DAG: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, i16** %{{.+}}, [3 x i8*]** [[FPBPADDR]], [3 x i8*]** [[FPPADDR]], [3 x i64]** [[FPSIZEADDR]], [3 x i8*]** [[FPMAPPERADDR]]) +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT0]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) // Create stack storage and store argument in there.