diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9426,7 +9426,8 @@ assert(OutlinedFn && "Invalid outlined function!"); - const bool RequiresOuterTask = D.hasClausesOfKind(); + const bool RequiresOuterTask = D.hasClausesOfKind() || + D.hasClausesOfKind(); llvm::SmallVector CapturedVars; const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp --- a/clang/test/OpenMP/declare_mapper_codegen.cpp +++ b/clang/test/OpenMP/declare_mapper_codegen.cpp @@ -22,6 +22,13 @@ #ifdef CK0 // Mapper function code generation and runtime interface. +// CK0: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CK0: [[ANON_T:%.+]] = type { %class.C* } +// CK0: [[ANON_T_0:%.+]] = type { %class.C* } +// CK0: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]], [[KMP_PRIVATES_T:%.+]] } +// CK0: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}} } +// CK0: [[KMP_TASK_T_WITH_PRIVATES_1:%.+]] = type { [[KMP_TASK_T]], [[KMP_PRIVATES_T_1:%.+]] } + // CK0-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}.region_id = weak constant i8 0 // CK0-64: [[SIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16] // CK0-32: [[SIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8] @@ -248,25 +255,18 @@ // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]] // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]] // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]] - // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]]) + // CK0: call void [[KERNEL_1:@.+]](%class.C* [[VAL]]) #pragma omp target map(mapper(id),tofrom: c) { ++c.a; } - // CK0-DAG: call i32 @__tgt_target_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[NWSIZES]]{{.+}}, {{.+}}[[NWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]]) - // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 - // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 - // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8** - // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 - // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 - // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0 - // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C** - // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C** - // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]] - // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]] - // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]] - // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]]) + // CK0-32: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{@.+}}, i32 {{%.+}}, i32 1, i32 40, i32 4, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CK0-64: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{@.+}}, i32 {{%.+}}, i32 1, i64 72, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CK0: [[TASK_CAST:%.+]] = bitcast i8* [[TASK]] to [[KMP_TASK_T_WITH_PRIVATES]]* + // CK0: [[TASK_CAST_GET:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_CAST]], i32 0, i32 0 + // CK0: {{.+}} = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_CAST_GET]], i32 0, i32 0 + // CK0: {{.+}} = call i32 @__kmpc_omp_task([[IDENT_T]]* @1, i32 {{.+}}, i8* [[TASK]]) #pragma omp target map(mapper(id),tofrom: c) nowait { ++c.a; @@ -284,25 +284,18 @@ // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]] // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]] // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]] - // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]]) + // CK0: call void [[KERNEL_3:@.+]](%class.C* [[VAL]]) #pragma omp target teams map(mapper(id),to: c) { ++c.a; } - // CK0-DAG: call i32 @__tgt_target_teams_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[TEAMNWSIZES]]{{.+}}, {{.+}}[[TEAMNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]], i32 0, i32 0) - // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 - // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 - // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8** - // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 - // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 - // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0 - // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C** - // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C** - // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]] - // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]] - // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]] - // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]]) + // CK0-32: [[TASK_1:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{@.+}}, i32 {{%.+}}, i32 1, i32 40, i32 4, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES_1]]*)* [[TASK_ENTRY_1:@.+]] to i32 (i32, i8*)*), i64 -1) + // CK0-64: [[TASK_1:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{@.+}}, i32 {{%.+}}, i32 1, i64 72, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES_1]]*)* [[TASK_ENTRY_1:@.+]] to i32 (i32, i8*)*), i64 -1) + // CK0: [[TASK_CAST_1:%.+]] = bitcast i8* [[TASK_1]] to [[KMP_TASK_T_WITH_PRIVATES_1]]* + // CK0: [[TASK_CAST_GET_1:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES_1]], [[KMP_TASK_T_WITH_PRIVATES_1]]* [[TASK_CAST_1]], i32 0, i32 0 + // CK0: {{.+}} = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_CAST_GET_1]], i32 0, i32 0 + // CK0: {{.+}} = call i32 @__kmpc_omp_task([[IDENT_T]]* @1, i32 {{.+}}, i8* [[TASK_1]]) #pragma omp target teams map(mapper(id),to: c) nowait { ++c.a; @@ -408,7 +401,7 @@ } -// CK0: define internal void [[KERNEL]](%class.C* {{.+}}[[ARG:%.+]]) +// CK0: define internal void [[KERNEL_1]](%class.C* {{.+}}[[ARG:%.+]]) // CK0: [[ADDR:%.+]] = alloca %class.C*, // CK0: store %class.C* [[ARG]], %class.C** [[ADDR]] // CK0: [[CADDR:%.+]] = load %class.C*, %class.C** [[ADDR]] @@ -417,6 +410,105 @@ // CK0: {{.+}} = add nsw i32 [[VAL]], 1 // CK0: } +// CK0: define internal void [[KERNEL_2:@.+]](%class.C* {{.+}}[[ARG:%.+]]) +// CK0: [[ADDR:%.+]] = alloca %class.C*, +// CK0: store %class.C* [[ARG]], %class.C** [[ADDR]] +// CK0: [[CADDR:%.+]] = load %class.C*, %class.C** [[ADDR]] +// CK0: [[CAADDR:%.+]] = getelementptr inbounds %class.C, %class.C* [[CADDR]], i32 0, i32 0 +// CK0: [[VAL:%[^,]+]] = load i32, i32* [[CAADDR]] +// CK0: {{.+}} = add nsw i32 [[VAL]], 1 +// CK0: } + +// CK0: define internal void [[OUTLINED:@.+]](i32 {{.*}}%.global_tid.{{.+}}, [[ANON_T]]* noalias [[CTXARG:%.+]]) +// CK0-DAG: call i32 @__tgt_target_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZEGEP:%[0-9]+]], {{.+}}[[NWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]]) +// CK0-32-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i32 0, i32 0 +// CK0-64-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i64 0, i64 0 +// CK0-32-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i32 0, i32 0 +// CK0-64-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i64 0, i64 0 +// CK0-32-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i32 0, i32 0 +// CK0-64-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i64 0, i64 0 +// CK0-32-DAG: [[MPRGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[MPRFPADDR:%.+]], i32 0, i32 0 +// CK0-64-DAG: [[MPRGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[MPRFPADDR:%.+]], i64 0, i64 0 +// CK0-32-DAG: [[BPFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_BP:%.+]], align 4 +// CK0-64-DAG: [[BPFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_BP:%.+]], align 8 +// CK0-32-DAG: [[PFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_P:%.+]], align 4 +// CK0-64-DAG: [[PFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_P:%.+]], align 8 +// CK0-32-DAG: [[SIZEFPADDR]] = load [1 x i64]*, [1 x i64]** [[FPPTRADDR_SIZE:%.+]], align 4 +// CK0-64-DAG: [[SIZEFPADDR]] = load [1 x i64]*, [1 x i64]** [[FPPTRADDR_SIZE:%.+]], align 8 +// CK0-32-DAG: [[MPRFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_MPR:%.+]], align 4 +// CK0-64-DAG: [[MPRFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_MPR:%.+]], align 8 +// CK0-DAG: call void (i8*, ...) %1(i8* %2, {{.+}}[[FPPTRADDR_BP]], {{.+}}[[FPPTRADDR_P]], {{.+}}[[FPPTRADDR_SIZE]], {{.+}}[[FPPTRADDR_MPR]]) +// CK0-DAG: call void [[KERNEL_2:@.+]](%class.C* [[KERNELARG:%.+]]) +// CK0-32-DAG: [[KERNELARG]] = load %class.C*, %class.C** [[KERNELARGGEP:%.+]], align 4 +// CK0-64-DAG: [[KERNELARG]] = load %class.C*, %class.C** [[KERNELARGGEP:%.+]], align 8 +// CK0-DAG: [[KERNELARGGEP]] = getelementptr inbounds [[ANON_T]], [[ANON_T]]* [[CTX:%.+]], i32 0, i32 0 +// CK0-32-DAG: [[CTX]] = load [[ANON_T]]*, [[ANON_T]]** [[CTXADDR:%.+]], align 4 +// CK0-64-DAG: [[CTX]] = load [[ANON_T]]*, [[ANON_T]]** [[CTXADDR:%.+]], align 8 +// CK0-32-DAG: store [[ANON_T]]* [[CTXARG]], [[ANON_T]]** [[CTXADDR]], align 4 +// CK0-64-DAG: store [[ANON_T]]* [[CTXARG]], [[ANON_T]]** [[CTXADDR]], align 8 +// CK0: } + +// CK0: define internal {{.*}}i32 [[TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CK0-32: store [[KMP_TASK_T_WITH_PRIVATES]]* %1, [[KMP_TASK_T_WITH_PRIVATES]]** [[ADDR:%.+]], align 4 +// CK0-64: store [[KMP_TASK_T_WITH_PRIVATES]]* %1, [[KMP_TASK_T_WITH_PRIVATES]]** [[ADDR:%.+]], align 8 +// CK0-32: [[TASK_T_WITH_PRIVATES:%.+]] = load [[KMP_TASK_T_WITH_PRIVATES]]*, [[KMP_TASK_T_WITH_PRIVATES]]** [[ADDR]], align 4 +// CK0-64: [[TASK_T_WITH_PRIVATES:%.+]] = load [[KMP_TASK_T_WITH_PRIVATES]]*, [[KMP_TASK_T_WITH_PRIVATES]]** [[ADDR]], align 8 +// CK0: [[TASKGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_T_WITH_PRIVATES]], i32 0, i32 0 +// CK0: [[SHAREDSGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASKGEP]], i32 0, i32 0 +// CK0-32: [[SHAREDS:%.+]] = load i8*, i8** [[SHAREDSGEP]], align 4 +// CK0-64: [[SHAREDS:%.+]] = load i8*, i8** [[SHAREDSGEP]], align 8 +// CK0: [[ANON:%.+]] = bitcast i8* [[SHAREDS]] to [[ANON_T]]* +// CK0: [[PRIVATESGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_T_WITH_PRIVATES]], i32 0, i32 1 +// CK0: [[PRIVATES:%.+]] = bitcast [[KMP_PRIVATES_T]]* [[PRIVATESGEP]] to i8* +// CK0: [[TASK_WITH_PRIVATES:%.+]] = bitcast [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_T_WITH_PRIVATES]] to i8* +// CK0: call void [[OUTLINED]](i32 {{%.+}}, i32* {{%.+}}, i8* [[PRIVATES]], {{.+}}, i8* [[TASK_WITH_PRIVATES]], [[ANON_T]]* [[ANON]]) +// CK0: } + +// CK0: define internal void [[OUTLINE_1:@.+]](i32 {{.*}}%.global_tid.{{.+}}, [[ANON_T_0]]* noalias [[CTXARG:%.+]]) +// CK0-DAG: call i32 @__tgt_target_teams_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZEGEP:%[0-9]+]], {{.+}}[[TEAMNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]], i32 0, i32 0) +// CK0-32-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i32 0, i32 0 +// CK0-64-DAG: [[BPGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPFPADDR:%.+]], i64 0, i64 0 +// CK0-32-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i32 0, i32 0 +// CK0-64-DAG: [[PGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PFPADDR:%.+]], i64 0, i64 0 +// CK0-32-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i32 0, i32 0 +// CK0-64-DAG: [[SIZEGEP]] = getelementptr inbounds [1 x i64], [1 x i64]* [[SIZEFPADDR:%.+]], i64 0, i64 0 +// CK0-32-DAG: [[MPRGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[MPRFPADDR:%.+]], i32 0, i32 0 +// CK0-64-DAG: [[MPRGEP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[MPRFPADDR:%.+]], i64 0, i64 0 +// CK0-32-DAG: [[BPFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_BP:%.+]], align 4 +// CK0-64-DAG: [[BPFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_BP:%.+]], align 8 +// CK0-32-DAG: [[PFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_P:%.+]], align 4 +// CK0-64-DAG: [[PFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_P:%.+]], align 8 +// CK0-32-DAG: [[SIZEFPADDR]] = load [1 x i64]*, [1 x i64]** [[FPPTRADDR_SIZE:%.+]], align 4 +// CK0-64-DAG: [[SIZEFPADDR]] = load [1 x i64]*, [1 x i64]** [[FPPTRADDR_SIZE:%.+]], align 8 +// CK0-32-DAG: [[MPRFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_MPR:%.+]], align 4 +// CK0-64-DAG: [[MPRFPADDR]] = load [1 x i8*]*, [1 x i8*]** [[FPPTRADDR_MPR:%.+]], align 8 +// CK0-DAG: call void (i8*, ...) %1(i8* %2, {{.+}}[[FPPTRADDR_BP]], {{.+}}[[FPPTRADDR_P]], {{.+}}[[FPPTRADDR_SIZE]], {{.+}}[[FPPTRADDR_MPR]]) +// CK0-DAG: call void [[KERNEL_2:@.+]](%class.C* [[KERNELARG:%.+]]) +// CK0-32-DAG: [[KERNELARG]] = load %class.C*, %class.C** [[KERNELARGGEP:%.+]], align 4 +// CK0-64-DAG: [[KERNELARG]] = load %class.C*, %class.C** [[KERNELARGGEP:%.+]], align 8 +// CK0-DAG: [[KERNELARGGEP]] = getelementptr inbounds [[ANON_T_0]], [[ANON_T_0]]* [[CTX:%.+]], i32 0, i32 0 +// CK0-32-DAG: [[CTX]] = load [[ANON_T_0]]*, [[ANON_T_0]]** [[CTXADDR:%.+]], align 4 +// CK0-64-DAG: [[CTX]] = load [[ANON_T_0]]*, [[ANON_T_0]]** [[CTXADDR:%.+]], align 8 +// CK0-32-DAG: store [[ANON_T_0]]* [[CTXARG]], [[ANON_T_0]]** [[CTXADDR]], align 4 +// CK0-64-DAG: store [[ANON_T_0]]* [[CTXARG]], [[ANON_T_0]]** [[CTXADDR]], align 8 +// CK0: } + +// CK0: define internal {{.*}}i32 [[TASK_ENTRY_1]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES_1]]* noalias %1) +// CK0-32: store [[KMP_TASK_T_WITH_PRIVATES_1]]* %1, [[KMP_TASK_T_WITH_PRIVATES_1]]** [[ADDR:%.+]], align 4 +// CK0-64: store [[KMP_TASK_T_WITH_PRIVATES_1]]* %1, [[KMP_TASK_T_WITH_PRIVATES_1]]** [[ADDR:%.+]], align 8 +// CK0-32: [[TASK_T_WITH_PRIVATES:%.+]] = load [[KMP_TASK_T_WITH_PRIVATES_1]]*, [[KMP_TASK_T_WITH_PRIVATES_1]]** [[ADDR]], align 4 +// CK0-64: [[TASK_T_WITH_PRIVATES:%.+]] = load [[KMP_TASK_T_WITH_PRIVATES_1]]*, [[KMP_TASK_T_WITH_PRIVATES_1]]** [[ADDR]], align 8 +// CK0: [[TASKGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES_1]], [[KMP_TASK_T_WITH_PRIVATES_1]]* [[TASK_T_WITH_PRIVATES]], i32 0, i32 0 +// CK0: [[SHAREDSGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASKGEP]], i32 0, i32 0 +// CK0-32: [[SHAREDS:%.+]] = load i8*, i8** [[SHAREDSGEP]], align 4 +// CK0-64: [[SHAREDS:%.+]] = load i8*, i8** [[SHAREDSGEP]], align 8 +// CK0: [[ANON:%.+]] = bitcast i8* [[SHAREDS]] to [[ANON_T_0]]* +// CK0: [[PRIVATESGEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES_1]], [[KMP_TASK_T_WITH_PRIVATES_1]]* [[TASK_T_WITH_PRIVATES]], i32 0, i32 1 +// CK0: [[PRIVATES:%.+]] = bitcast [[KMP_PRIVATES_T_1]]* [[PRIVATESGEP]] to i8* +// CK0: [[TASK_WITH_PRIVATES:%.+]] = bitcast [[KMP_TASK_T_WITH_PRIVATES_1]]* [[TASK_T_WITH_PRIVATES]] to i8* +// CK0: call void [[OUTLINE_1]](i32 {{%.+}}, i32* {{%.+}}, i8* [[PRIVATES]], {{.+}}, i8* [[TASK_WITH_PRIVATES]], [[ANON_T_0]]* [[ANON]]) +// CK0: } + #endif // CK0 diff --git a/clang/test/OpenMP/target_codegen.cpp b/clang/test/OpenMP/target_codegen.cpp --- a/clang/test/OpenMP/target_codegen.cpp +++ b/clang/test/OpenMP/target_codegen.cpp @@ -54,10 +54,16 @@ #ifndef HEADER #define HEADER +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]], [[KMP_PRIVATES_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, {{%.+}}, {{%.+}} } // CHECK-DAG: [[TT:%.+]] = type { i64, i8 } // CHECK-DAG: [[S1:%.+]] = type { double } // CHECK-DAG: [[S2:%.+]] = type { i32, i32, i32 } // CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[ANON_T:%.+]] = type { i[[SZ]]*, i32, i32 } +// CHECK-32-DAG: [[KMP_PRIVATES_T]] = type { [2 x i64], i32*, i32, [2 x i8*], [2 x i8*], [2 x i8*] } +// CHECK-64-DAG: [[KMP_PRIVATES_T]] = type { i64*, [2 x i8*], [2 x i8*], [2 x i64], [2 x i8*], i32 } // TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } @@ -114,6 +120,9 @@ // CHECK: define {{.*}}[[FOO:@.+]]( int foo(int n) { + // CHECK: [[OFFLOADBPTR:%.+]] = alloca [2 x i8*], align + // CHECK: [[OFFLOADPTR:%.+]] = alloca [2 x i8*], align + // CHECK: [[OFFLOADMAPPER:%.+]] = alloca [2 x i8*], align int a = 0; short aa = 0; float b[10]; @@ -138,33 +147,38 @@ { } - // CHECK-DAG: [[ADD:%.+]] = add nsw i32 - // CHECK-DAG: store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]], - // CHECK-DAG: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], - // CHECK-DAG: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** null) - // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 - // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 - - // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 - // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 - // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]** - // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]** - // CHECK-DAG: store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]] - // CHECK-DAG: store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]] - - // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 - // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 - // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* - // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* - // CHECK-DAG: store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]] - // CHECK-DAG: store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]] - // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT0_:@.+]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]]) - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK: [[BPRGEP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOADBPTR]], i32 0, i32 0 + // CHECK: [[PRGEP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOADPTR]], i32 0, i32 0 + // CHECK: [[BPRGEP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOADBPTR]], i32 0, i32 0 + // CHECK: [[PRGEP:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOADPTR]], i32 0, i32 0 + // CHECK: [[DEVICE:%.+]] = sext i32 {{%.+}} to i64 + // CHECK-32: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{.+}}, i32 %0, i32 1, i32 68, i32 12, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 [[DEVICE]]) + // CHECK-64: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{.+}}, i32 %0, i32 1, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 [[DEVICE]]) + // CHECK: [[TASK_WITH_PRIVATES:%.+]] = bitcast i8* [[TASK]] to [[KMP_TASK_T_WITH_PRIVATES]]* + // CHECK: [[TASK_WITH_PRIVATES_GEP:%.+]] = getelementptr inbounds [[KMP_TASK_T_WITH_PRIVATES]], [[KMP_TASK_T_WITH_PRIVATES]]* [[TASK_WITH_PRIVATES]], i32 0, i32 1 + // CHECK-32: [[SIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 0 + // CHECK-32: [[SIZEADDR:%.+]] = bitcast [2 x i64]* [[SIZEGEP]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[SIZEADDR]], i8* align 4 bitcast ([2 x i64]* [[SIZET]] to i8*), i32 16, i1 false) + // CHECK-32: [[FPBPRGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 3 + // CHECK-32: [[FPBPRCAST:%.+]] = bitcast [2 x i8*]* [[FPBPRGEP]] to i8* + // CHECK-32: [[BPRCAST:%.+]] = bitcast i8** [[BPRGEP]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPBPRCAST]], i8* align 4 [[BPRCAST]], i32 8, i1 false) + // CHECK-32: [[FPPRGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 4 + // CHECK-32: [[FPPRCAST:%.+]] = bitcast [2 x i8*]* [[FPPRGEP]] to i8* + // CHECK-32: [[PRCAST:%.+]] = bitcast i8** [[PRGEP]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[FPPRCAST]], i8* align 4 [[PRCAST]], i32 8, i1 false) + // CHECK-64: [[FPBPRGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 1 + // CHECK-64: [[FPBPRCAST:%.+]] = bitcast [2 x i8*]* [[FPBPRGEP]] to i8* + // CHECK-64: [[BPR_CAST:%.+]] = bitcast i8** [[BPRGEP]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPBPRCAST]], i8* align 8 [[BPR_CAST]], i64 16, i1 false) + // CHECK-64: [[FPPRGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 2 + // CHECK-64: [[FPPRCAST:%.+]] = bitcast [2 x i8*]* [[FPPRGEP]] to i8* + // CHECK-64: [[PR_CAST:%.+]] = bitcast i8** [[PRGEP]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[FPPRCAST]], i8* align 8 [[PR_CAST]], i64 16, i1 false) + // CHECK-64: [[SIZEGEP:%.+]] = getelementptr inbounds [[KMP_PRIVATES_T]], [[KMP_PRIVATES_T]]* [[TASK_WITH_PRIVATES_GEP]], i32 0, i32 3 + // CHECK-64: [[SIZE_CAST:%.+]] = bitcast [2 x i64]* [[SIZEGEP]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SIZE_CAST]], i8* align 8 bitcast ([2 x i64]* [[SIZET]] to i8*), i64 16, i1 false) + // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* {{.+}}, i32 {{.+}}, i8* [[TASK]]) #pragma omp target device(global + a) nowait { static int local1; @@ -378,6 +392,36 @@ // CHECK: define internal void [[HVT0]]() +// CHECK: define internal void [[HVT0_:@.+]](i[[SZ]]* {{%[^,]+}}, i[[SZ]] {{%[^,]+}}) +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE:%.+]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SIZE:%.+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[MAPPER:%.+]]) +// CHECK-DAG: [[DEVICE]] = sext i32 [[DEV:%.+]] to i64 +// CHECK-DAG: [[DEV]] = load i32, i32* [[DEVADDR:%.+]], align +// CHECK-DAG: [[DEVADDR]] = getelementptr inbounds [[ANON_T]], [[ANON_T]]* %12, i32 0, i32 2 +// CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BPRADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PRADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[SIZE]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZEADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[MAPPER]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[MAPPERADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK-DAG: [[BPRADDR]] = load [2 x i8*]*, [2 x i8*]** [[FPPTR_BPR:%.+]], align +// CHECK-DAG: [[PRADDR]] = load [2 x i8*]*, [2 x i8*]** [[FPPTR_PR:%.+]], align +// CHECK-DAG: [[SIZEADDR]] = load [2 x i64]*, [2 x i64]** [[FPPTR_SIZE:%.+]], align +// CHECK-DAG: [[MAPPERADDR]] = load [2 x i8*]*, [2 x i8*]** [[FPPTR_MAPPER:%.+]], align +// CHECK-DAG: call void (i8*, ...) {{%[0-9]+}}(i8* {{%[^,]+}}, i[[SZ]]*** [[FPPTR_PLOCAL:%.+]], i32** [[FPPTR_GLOBAL:%.+]], [2 x i8*]** [[FPPTR_BPR]], [2 x i8*]** [[FPPTR_PR]], [2 x i64]** [[FPPTR_SIZE]], [2 x i8*]** [[FPPTR_MAPPER]]) +// CHECK-DAG: [[PLOCALADDR:%.+]] = load i[[SZ]]**, i[[SZ]]*** [[FPPTR_PLOCAL]], align +// CHECK-DAG: {{%.+}} = load i32*, i32** [[FPPTR_GLOBAL:%.+]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: [[PLOCAL:%.+]] = load i[[SZ]]*, i[[SZ]]** [[PLOCALADDR]], align +// CHECK: [[GLOBAL:%.+]] = load i32, i32* {{@.+}}, align +// CHECK-64: [[CONVI:%.+]] = bitcast i64* [[GLOBALCAST:%.+]] to i32* +// CHECK-32: store i32 [[GLOBAL]], i32* [[GLOBALCAST:%.+]], align +// CHECK-64: store i32 [[GLOBAL]], i32* [[CONVI]], align +// CHECK: [[GLOBAL:%.+]] = load i[[SZ]], i[[SZ]]* [[GLOBALCAST]], align +// CHECK: call void [[HVT0_]](i[[SZ]]* [[PLOCAL]], i[[SZ]] [[GLOBAL]]) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) // Create stack storage and store argument in there. // CHECK: [[AA_ADDR:%.+]] = alloca i[[SZ]], align @@ -706,7 +750,7 @@ // CHECK: [[IFEND]] -// OMP45: define internal void @__omp_offloading_{{.+}}_{{.+}}bar{{.+}}_l838(i[[SZ]] %{{.+}}) +// OMP45: define internal void @__omp_offloading_{{.+}}_{{.+}}bar{{.+}}_l{{[0-9]+}}(i[[SZ]] %{{.+}}) // OMP45: define {{.*}}@{{.*}}zee{{.*}} @@ -805,7 +849,7 @@ // CHECK-DAG: load i16, i16* [[REF_AA]] // CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2 -// OMP50: define internal void @__omp_offloading_{{.+}}_{{.+}}bar{{.+}}_l838(i[[SZ]] %{{.+}}) +// OMP50: define internal void @__omp_offloading_{{.+}}_{{.+}}bar{{.+}}_l{{[0-9]+}}(i[[SZ]] %{{.+}}) // OMP50: define {{.*}}@{{.*}}zee{{.*}} diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -74,8 +74,11 @@ #ifndef HEADER #define HEADER -// CHECK-DAG: %struct.ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[ANON_T:%.+]] = type { i8 } +// CHECK-DAG: [[KMP_TASK_T_WITH_PRIVATES:%.+]] = type { [[KMP_TASK_T:%.+]] } +// CHECK-DAG: [[KMP_TASK_T]] = type { i8*, i32 (i32, i8*)*, i32, {{%[^,]+}}, {{%[^,]+}} } // CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // CHECK-DAG: [[TT:%.+]] = type { i64, i8 } @@ -134,13 +137,9 @@ double cn[5][n]; TT d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0) - // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 - // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] - // CHECK: [[FAIL]] - // CHECK: call void [[HVT0:@.+]]() - // CHECK-NEXT: br label %[[END]] - // CHECK: [[END]] + // CHECK-32: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{[^,]+}}, i32 %0, i32 1, i32 20, i32 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES:%.+]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK-64: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc([[IDENT_T]]* {{[^,]+}}, i32 %0, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T_WITH_PRIVATES:%.+]]*)* [[OMP_TASK_ENTRY:@.+]] to i32 (i32, i8*)*), i64 -1) + // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* {{[^,]+}}, i32 {{%[^,]+}}, i8* [[TASK]]) #pragma omp target parallel nowait { } @@ -346,7 +345,7 @@ // Check that the offloading functions are emitted and that the arguments are // correct and loaded correctly for the target regions in foo(). -// CHECK: define internal void [[HVT0]]() +// CHECK: define internal void [[HVT0:@.+]]() // CHECK: call {{.*}}void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEF_LOC]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*)) // // @@ -354,6 +353,14 @@ // CHECK: ret void // CHECK-NEXT: } +// CHECK: define internal {{.*}}i32 [[OMP_TASK_ENTRY]](i32 {{.*}}%0, [[KMP_TASK_T_WITH_PRIVATES]]* noalias %1) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* {{@[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0) +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT0]]() +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] // CHECK: define internal void [[HVT1]](i[[SZ]] %{{.+}}) // Create stack storage and store argument in there.