diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3784,9 +3784,9 @@ bool IsTargetTask = isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || isOpenMPTargetExecutionDirective(D.getDirectiveKind()); - // For target-based directives skip 3 firstprivate arrays BasePointersArray, - // PointersArray and SizesArray. The original variables for these arrays are - // not captured and we get their addresses explicitly. + // For target-based directives skip 4 firstprivate arrays BasePointersArray, + // PointersArray, SizesArray, and MappersArray. The original variables for + // these arrays are not captured and we get their addresses explicitly. if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || (IsTargetTask && KmpTaskSharedsPtr.isValid())) { SrcBase = CGF.MakeAddrLValue( @@ -3809,7 +3809,7 @@ if (const VarDecl *Elem = Pair.second.PrivateElemInit) { const VarDecl *OriginalVD = Pair.second.Original; // Check if the variable is the target-based BasePointersArray, - // PointersArray or SizesArray. + // PointersArray, SizesArray, or MappersArray. LValue SharedRefLValue; QualType Type = PrivateLValue.getType(); const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); @@ -8866,6 +8866,17 @@ } } +namespace { +/// Additional arguments for emitOffloadingArraysArgument function. +struct ArgumentsOptions { + bool ForEndCall = false; + bool IsTask = false; + ArgumentsOptions() = default; + ArgumentsOptions(bool ForEndCall, bool IsTask) + : ForEndCall(ForEndCall), IsTask(IsTask) {} +}; +} // namespace + /// Emit the arguments to be passed to the runtime library based on the /// arrays of base pointers, pointers, sizes, map types, and mappers. If /// ForEndCall, emit map types to be passed for the end of the region instead of @@ -8874,8 +8885,9 @@ CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg, - CGOpenMPRuntime::TargetDataInfo &Info, bool ForEndCall = false) { - assert((!ForEndCall || Info.separateBeginEndCalls()) && + CGOpenMPRuntime::TargetDataInfo &Info, + const ArgumentsOptions &Options = ArgumentsOptions()) { + assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && "expected region end call to runtime only when end call is separate"); CodeGenModule &CGM = CGF.CGM; if (Info.NumberOfPtrs) { @@ -8893,14 +8905,17 @@ /*Idx0=*/0, /*Idx1=*/0); MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), - ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd - : Info.MapTypesArray, + Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd + : Info.MapTypesArray, /*Idx0=*/0, /*Idx1=*/0); - MappersArrayArg = - Info.HasMapper - ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy) - : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); + // Always emit the mapper array address in case of a target task for + // privatization. + if (!Options.IsTask && !Info.HasMapper) + MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); + else + MappersArrayArg = + CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); } else { BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); @@ -9648,9 +9663,11 @@ TargetDataInfo Info; // Fill up the arrays and create the arguments. emitOffloadingArrays(CGF, CombinedInfo, Info); + bool HasDependClauses = D.hasClausesOfKind(); emitOffloadingArraysArgument(CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info.MappersArray, Info); + Info.MapTypesArray, Info.MappersArray, Info, + {/*ForEndTask=*/false, HasDependClauses}); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.BasePointersArray, CGM.getPointerAlign()); @@ -10261,7 +10278,7 @@ llvm::Value *MappersArrayArg = nullptr; emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg, - MappersArrayArg, Info, /*ForEndCall=*/false); + MappersArrayArg, Info); // Emit device ID if any. llvm::Value *DeviceID = nullptr; @@ -10301,7 +10318,8 @@ llvm::Value *MappersArrayArg = nullptr; emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, SizesArrayArg, MapTypesArrayArg, - MappersArrayArg, Info, /*ForEndCall=*/true); + MappersArrayArg, Info, + {/*ForEndCall=*/true, /*IsTask=*/false}); // Emit device ID if any. llvm::Value *DeviceID = nullptr; @@ -10499,9 +10517,11 @@ TargetDataInfo Info; // Fill up the arrays and create the arguments. emitOffloadingArrays(CGF, CombinedInfo, Info); + bool HasDependClauses = D.hasClausesOfKind(); emitOffloadingArraysArgument(CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info.MappersArray, Info); + Info.MapTypesArray, Info.MappersArray, Info, + {/*ForEndTask=*/false, HasDependClauses}); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.BasePointersArray, CGM.getPointerAlign()); @@ -10511,7 +10531,7 @@ Address(Info.SizesArray, CGM.getPointerAlign()); InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); MapTypesArray = Info.MapTypesArray; - if (D.hasClausesOfKind()) + if (HasDependClauses) CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); else emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); diff --git a/clang/test/OpenMP/target_depend_codegen.cpp b/clang/test/OpenMP/target_depend_codegen.cpp --- a/clang/test/OpenMP/target_depend_codegen.cpp +++ b/clang/test/OpenMP/target_depend_codegen.cpp @@ -43,8 +43,8 @@ // TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } -// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [2 x i64] [i64 0, i64 4] -// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [2 x i64] [i64 544, i64 800] +// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 {{16|12}}] +// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 544, i64 800, i64 3] // CHECK-DAG: @{{.*}} = weak constant i8 0 // TCHECK: @{{.+}} = weak constant [[ENTTY]] @@ -61,6 +61,9 @@ ty Y; }; +#pragma omp declare mapper(id \ + : TT \ + s) map(s.X, s.Y) int global; extern int global; @@ -102,29 +105,75 @@ // CHECK: [[BOOL:%.+]] = icmp ne i32 %{{.+}}, 0 // CHECK: br i1 [[BOOL]], label %[[THEN:.+]], label %[[ELSE:.+]] // CHECK: [[THEN]]: - // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%.+]], i32 0, i32 0 - // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%.+]], i32 0, i32 0 + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 + // CHECK-DAG: [[MADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[M:%.+]], i[[SZ]] 0, i[[SZ]] 0 // CHECK-DAG: [[CBPADDR0:%.+]] = bitcast i8** [[BPADDR0]] to i[[SZ]]** // CHECK-DAG: [[CPADDR0:%.+]] = bitcast i8** [[PADDR0]] to i[[SZ]]** // CHECK-DAG: store i[[SZ]]* [[BP0:%[^,]+]], i[[SZ]]** [[CBPADDR0]] // CHECK-DAG: store i[[SZ]]* [[BP0]], i[[SZ]]** [[CPADDR0]] + // CHECK-DAG: store i8* null, i8** [[MADDR0]], - // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1 - // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1 + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1 + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1 + // CHECK-DAG: [[MADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[M]], i[[SZ]] 0, i[[SZ]] 1 // CHECK-DAG: [[CBPADDR1:%.+]] = bitcast i8** [[BPADDR1]] to i[[SZ]]* // CHECK-DAG: [[CPADDR1:%.+]] = bitcast i8** [[PADDR1]] to i[[SZ]]* // CHECK-DAG: store i[[SZ]] [[BP1:%[^,]+]], i[[SZ]]* [[CBPADDR1]] // CHECK-DAG: store i[[SZ]] [[BP1]], i[[SZ]]* [[CPADDR1]] - // CHECK-DAG: getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0 - // CHECK-DAG: getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0 + // CHECK-DAG: store i8* null, i8** [[MADDR1]], + + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2 + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2 + // CHECK-DAG: [[MADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[M]], i[[SZ]] 0, i[[SZ]] 2 + // CHECK-DAG: [[CBPADDR2:%.+]] = bitcast i8** [[BPADDR2]] to [[STRUCT_TT:%.+]]** + // CHECK-DAG: [[CPADDR2:%.+]] = bitcast i8** [[PADDR2]] to [[STRUCT_TT]]** + // CHECK-DAG: store [[STRUCT_TT]]* [[D_ADDR:%.+]], [[STRUCT_TT]]** [[CBPADDR2]] + // CHECK-DAG: store [[STRUCT_TT]]* [[D_ADDR]], [[STRUCT_TT]]** [[CPADDR2]] + // CHECK-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MAPPER_ID:@.+]] to i8*), i8** [[MADDR2]], + + // CHECK-DAG: [[BP_START:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0 + // CHECK-DAG: [[P_START:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0 + // CHECK-DAG: [[M_START:%.+]] = bitcast [3 x i8*]* [[M]] to i8** // CHECK: [[GEP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2 // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], // CHECK: store i32 [[DEV]], i32* [[GEP]], // CHECK: [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]], // CHECK: [[DEV2:%.+]] = sext i32 [[DEV1]] to i64 - // CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{.*}}, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]]) + // CHECK: [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @{{.*}}, i32 [[GTID]], i32 1, i[[SZ]] {{152|88}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]]) // CHECK: [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]* + // CHECK: [[BASE:%.+]] = getelementptr inbounds [[TASK_TY1_]], [[TASK_TY1_]]* [[BC_TASK]], i32 0, i32 1 + // CHECK-64: [[BP_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY:%.+]], [[PRIVS_TY:%.+]]* [[BASE]], i32 0, i32 1 + // CHECK-64: [[BP_CAST:%.+]] = bitcast [3 x i8*]* [[BP_BASE]] to i8* + // CHECK-64: [[BP_SRC:%.+]] = bitcast i8** [[BP_START]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[BP_CAST]], i8* align 8 [[BP_SRC]], i64 24, i1 false) + // CHECK-64: [[P_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 2 + // CHECK-64: [[P_CAST:%.+]] = bitcast [3 x i8*]* [[P_BASE]] to i8* + // CHECK-64: [[P_SRC:%.+]] = bitcast i8** [[P_START]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[P_CAST]], i8* align 8 [[P_SRC]], i64 24, i1 false) + // CHECK-64: [[SZ_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 3 + // CHECK-64: [[SZ_CAST:%.+]] = bitcast [3 x i64]* [[SZ_BASE]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SZ_CAST]], i8* align 8 bitcast ([3 x i64]* [[SIZET]] to i8*), i64 24, i1 false) + // CHECK-64: [[M_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 4 + // CHECK-64: [[M_CAST:%.+]] = bitcast [3 x i8*]* [[M_BASE]] to i8* + // CHECK-64: [[M_SRC:%.+]] = bitcast i8** [[M_START]] to i8* + // CHECK-64: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[M_CAST]], i8* align 8 [[M_SRC]], i64 24, i1 false) + // CHECK-32: [[SZ_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY:%.+]], [[PRIVS_TY:%.+]]* [[BASE]], i32 0, i32 0 + // CHECK-32: [[SZ_CAST:%.+]] = bitcast [3 x i64]* [[SZ_BASE]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[SZ_CAST]], i8* align 4 bitcast ([3 x i64]* [[SIZET]] to i8*), i32 24, i1 false) + // CHECK-32: [[BP_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 3 + // CHECK-32: [[BP_CAST:%.+]] = bitcast [3 x i8*]* [[BP_BASE]] to i8* + // CHECK-32: [[BP_SRC:%.+]] = bitcast i8** [[BP_START]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[BP_CAST]], i8* align 4 [[BP_SRC]], i32 12, i1 false) + // CHECK-32: [[P_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 4 + // CHECK-32: [[P_CAST:%.+]] = bitcast [3 x i8*]* [[P_BASE]] to i8* + // CHECK-32: [[P_SRC:%.+]] = bitcast i8** [[P_START]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[P_CAST]], i8* align 4 [[P_SRC]], i32 12, i1 false) + // CHECK-32: [[M_BASE:%.+]] = getelementptr inbounds [[PRIVS_TY]], [[PRIVS_TY]]* [[BASE]], i32 0, i32 5 + // CHECK-32: [[M_CAST:%.+]] = bitcast [3 x i8*]* [[M_BASE]] to i8* + // CHECK-32: [[M_SRC:%.+]] = bitcast i8** [[M_START]] to i8* + // CHECK-32: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[M_CAST]], i8* align 4 [[M_SRC]], i32 12, i1 false) // CHECK: getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START:%.+]], i[[SZ]] 1 // CHECK: getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 2 // CHECK: [[DEP:%.+]] = bitcast %struct.kmp_depend_info* [[DEP_START]] to i8* @@ -148,8 +197,9 @@ // CHECK: br label %[[EXIT:.+]] // CHECK: [[EXIT]]: -#pragma omp target device(global + a) nowait depend(inout \ - : global, a, bn) if (a) +#pragma omp target device(global + a) nowait depend(inout \ + : global, a, bn) if (a) map(mapper(id), tofrom \ + : d) { static int local1; *plocal = global; @@ -193,13 +243,22 @@ // CHECK: define internal void [[HVT1:@.+]](i[[SZ]]* %{{.+}}, i[[SZ]] %{{.+}}) -// CHECK: define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias %1) -// CHECK: call void (i8*, ...) % -// CHECK: [[SZT:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* %{{.+}}, i[[SZ]] 0, i[[SZ]] 0 +// CHECK: define internal void [[MAPPER_ID]](i8* %{{.+}}, i8* %{{.+}}, i8* %{{.+}}, i64 %{{.+}}, i64 %{{.+}}) + +// CHECK: define internal{{.*}} i32 [[TASK_ENTRY1_]](i32{{.*}}, [[TASK_TY1_]]* noalias %{{.+}}) +// CHECK: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, i[[SZ]]*** %{{.+}}, i32** %{{.+}}, [3 x i8*]** [[BPTR_ADDR:%.+]], [3 x i8*]** [[PTR_ADDR:%.+]], [3 x i64]** [[SZ_ADDR:%.+]], [3 x i8*]** [[M_ADDR:%.+]]) +// CHECK: [[BPTR_REF:%.+]] = load [3 x i8*]*, [3 x i8*]** [[BPTR_ADDR]], +// CHECK: [[PTR_REF:%.+]] = load [3 x i8*]*, [3 x i8*]** [[PTR_ADDR]], +// CHECK: [[SZ_REF:%.+]] = load [3 x i64]*, [3 x i64]** [[SZ_ADDR]], +// CHECK: [[M_REF:%.+]] = load [3 x i8*]*, [3 x i8*]** [[M_ADDR]], +// CHECK: [[BPR:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPTR_REF]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK: [[PR:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_REF]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK: [[SZT:%.+]] = getelementptr inbounds [3 x i64], [3 x i64]* [[SZ_REF]], i[[SZ]] 0, i[[SZ]] 0 +// CHECK: [[M:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[M_REF]], i[[SZ]] 0, i[[SZ]] 0 // CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2 // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], // CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 -// CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]]) +// CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 3, i8** [[BPR]], i8** [[PR]], i64* [[SZT]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M]]) // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]