diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -423,7 +423,8 @@ llvm::Optional SecondaryLocalVarData = llvm::None; EscapedParamsTy EscapedParameters; llvm::SmallVector EscapedVariableLengthDecls; - llvm::SmallVector EscapedVariableLengthDeclsAddrs; + llvm::SmallVector, 4> + EscapedVariableLengthDeclsAddrs; llvm::Value *IsInSPMDModeFlag = nullptr; std::unique_ptr MappedParams; }; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1437,7 +1437,9 @@ CGM.getModule(), OMPRTL___kmpc_alloc_shared), AllocArgs, VD->getName()); - I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(VoidPtr); + I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back( + std::pair( + {VoidPtr, CGF.getTypeSize(VD->getType())})); LValue Base = CGF.MakeAddrLValue(VoidPtr, VD->getType(), CGM.getContext().getDeclAlign(VD), AlignmentSource::Decl); @@ -1456,19 +1458,22 @@ const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); if (I != FunctionGlobalizedDecls.end()) { // Deallocate the memory for each globalized VLA object - for (llvm::Value *Addr : + for (auto AddrSizePair : llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_free_shared), - Addr); + {AddrSizePair.first, AddrSizePair.second}); } // Deallocate the memory for each globalized value for (auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) { + const auto *VD = cast(Rec.first); I->getSecond().MappedParams->restore(CGF); + llvm::Value *FreeArgs[] = {Rec.second.GlobalizedVal, + CGF.getTypeSize(VD->getType())}; CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_free_shared), - {Rec.second.GlobalizedVal}); + FreeArgs); } } } diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -81,6 +81,6 @@ // CHECK1-NEXT: [[A:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A]] to i32* // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[A_ON_STACK]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A]], i64 4) // CHECK1-NEXT: ret i32 [[CALL]] // diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -418,8 +418,8 @@ // CHECK-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 // CHECK-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** // CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP9]], i64 2) -// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[B]]) -// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[A]]) +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[B]], i64 4) +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[A]], i64 4) // CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK-NEXT: ret void // CHECK: worker.exit: diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -3174,7 +3174,7 @@ // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i64 40) // CHECK4-NEXT: ret void // // @@ -3503,7 +3503,7 @@ // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: -// CHECK5-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK5-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i32 40) // CHECK5-NEXT: ret void // // @@ -3826,7 +3826,7 @@ // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: -// CHECK6-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK6-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i32 40) // CHECK6-NEXT: ret void // // diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -1645,7 +1645,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[A_ON_STACK]], align 4 -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A1]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i64 4) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -1917,7 +1917,7 @@ // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK2-NEXT: store i32 [[INC]], i32* [[A_ON_STACK]], align 4 -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[A1]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[A1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -486,7 +486,7 @@ // CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 -// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[D]]) +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[D]], i64 4) // CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK-NEXT: ret void // CHECK: worker.exit: diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -432,7 +432,7 @@ // CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP6]], i64 2) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[F]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[F]], i64 4) // CHECK1-NEXT: ret i32 [[TMP7]] // // @@ -816,7 +816,7 @@ // CHECK2-NEXT: [[TMP6:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP6]], i32 2) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[F]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[F]], i32 4) // CHECK2-NEXT: ret i32 [[TMP7]] // // @@ -1199,7 +1199,7 @@ // CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP6]], i32 2) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[F]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[F]], i32 4) // CHECK3-NEXT: ret i32 [[TMP7]] // // diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -173,9 +173,9 @@ // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) // CHECK1-NEXT: ret void // // @@ -740,9 +740,9 @@ // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) // CHECK1-NEXT: ret void // // @@ -1407,9 +1407,9 @@ // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] // CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) // CHECK2-NEXT: ret void // // @@ -1974,9 +1974,9 @@ // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] // CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) // CHECK2-NEXT: ret void // // @@ -2641,9 +2641,9 @@ // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] // CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) // CHECK3-NEXT: ret void // // @@ -3208,9 +3208,9 @@ // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR1]] // CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -657,7 +657,7 @@ // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[I]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[I]], i64 4) // CHECK1-NEXT: ret void // // @@ -779,7 +779,7 @@ // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[I]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[I]], i32 4) // CHECK2-NEXT: ret void // // @@ -901,7 +901,7 @@ // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[I]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[I]], i32 4) // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18656,7 +18656,7 @@ // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L2]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L2]], i64 4) // CHECK1-NEXT: ret void // // @@ -20250,7 +20250,7 @@ // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[L2]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[L2]], i64 4) // CHECK2-NEXT: ret void // // @@ -21829,7 +21829,7 @@ // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[L1]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[L1]], i32 4) // CHECK3-NEXT: ret void // // @@ -23363,7 +23363,7 @@ // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[L1]]) +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[L1]], i32 4) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9581,7 +9581,7 @@ // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L2]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L2]], i64 4) // CHECK1-NEXT: ret void // // @@ -10655,7 +10655,7 @@ // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[L1]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[L1]], i32 4) // CHECK2-NEXT: ret void // // @@ -11693,7 +11693,7 @@ // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[L1]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[L1]], i32 4) // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -910,7 +910,7 @@ // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 4) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -950,7 +950,7 @@ // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -990,7 +990,7 @@ // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1030,7 +1030,7 @@ // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1077,7 +1077,7 @@ // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC3]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC3]], i64 4) // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1123,7 +1123,7 @@ // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC2]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC2]], i64 8) // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1167,7 +1167,7 @@ // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR1:[0-9]+]] -// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]]) +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -1211,7 +1211,7 @@ // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR1]] -// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]]) +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK4-NEXT: ret void // CHECK4: worker.exit: diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -4226,7 +4226,7 @@ // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E_ON_STACK]]) #[[ATTR1:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -4268,7 +4268,7 @@ // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i64 8) // CHECK1-NEXT: ret void // // @@ -4531,8 +4531,8 @@ // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR1]] -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D3]]) -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C2]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -4593,8 +4593,8 @@ // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D2]]) -// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D2]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i64 1) // CHECK1-NEXT: ret void // // @@ -5587,7 +5587,7 @@ // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[E1]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i32 8) // CHECK2-NEXT: ret void // // @@ -5850,8 +5850,8 @@ // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR1]] -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D3]]) -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C2]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -5912,8 +5912,8 @@ // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D2]]) -// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D2]], i32 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i32 1) // CHECK2-NEXT: ret void // // @@ -6905,7 +6905,7 @@ // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[E1]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i32 8) // CHECK3-NEXT: ret void // // @@ -7168,8 +7168,8 @@ // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR1]] -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D3]]) -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C2]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C2]], i32 1) // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -7230,8 +7230,8 @@ // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D2]]) -// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D2]], i32 4) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C1]], i32 1) // CHECK3-NEXT: ret void // // diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -332,7 +332,7 @@ /// void __kmpc_alloc_shared(size_t nbyte); TLI_DEFINE_ENUM_INTERNAL(__kmpc_alloc_shared) TLI_DEFINE_STRING_INTERNAL("__kmpc_alloc_shared") -/// void __kmpc_free_shared(void *ptr); +/// void __kmpc_free_shared(void *ptr, size_t nbyte); TLI_DEFINE_ENUM_INTERNAL(__kmpc_free_shared) TLI_DEFINE_STRING_INTERNAL("__kmpc_free_shared") /// double __log10_finite(double x); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -430,7 +430,7 @@ __OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16) __OMP_RTL(__kmpc_alloc_shared, false, VoidPtr, SizeTy) -__OMP_RTL(__kmpc_free_shared, false, Void, VoidPtr) +__OMP_RTL(__kmpc_free_shared, false, Void, VoidPtr, SizeTy) __OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) __OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) __OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -434,7 +434,6 @@ bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) { unsigned ExpectedNumParams; if (TLIFn == LibFunc_free || - TLIFn == LibFunc___kmpc_free_shared || // OpenMP Offloading RTL free TLIFn == LibFunc_ZdlPv || // operator delete(void*) TLIFn == LibFunc_ZdaPv || // operator delete[](void*) TLIFn == LibFunc_msvc_delete_ptr32 || // operator delete(void*) @@ -457,7 +456,8 @@ TLIFn == LibFunc_msvc_delete_array_ptr32_int || // delete[](void*, uint) TLIFn == LibFunc_msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong) TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) - TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow) + TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow || // delete[](void*, nothrow) + TLIFn == LibFunc___kmpc_free_shared) // OpenMP Offloading RTL free ExpectedNumParams = 2; else if (TLIFn == LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t || // delete(void*, align_val_t, nothrow) TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t || // delete[](void*, align_val_t, nothrow) diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1048,8 +1048,10 @@ case LibFunc_mktime: case LibFunc_times: case LibFunc_vec_free: - case LibFunc___kmpc_free_shared: return (NumParams != 0 && FTy.getParamType(0)->isPointerTy()); + case LibFunc___kmpc_free_shared: + return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() && + IsSizeTTy(FTy.getParamType(1))); case LibFunc_fopen: return (NumParams == 2 && FTy.getReturnType()->isPointerTy() && diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -24,6 +24,7 @@ ; CHECK-NEXT: call void @bar() #[[ATTR0]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true) ; CHECK-NEXT: ret void +; entry: %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 true, i1 true) call void @foo() @@ -42,7 +43,7 @@ entry: %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12 call void @use(i8* %0) - call void @__kmpc_free_shared(i8* %0) + call void @__kmpc_free_shared(i8* %0, i64 4) ret void } @@ -52,13 +53,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]] ; CHECK-NEXT: call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR3:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]]) #[[ATTR0]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[TMP0]], i64 noundef 4) #[[ATTR0]] ; CHECK-NEXT: ret void ; entry: %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !13 call void @share(i8* %0) - call void @__kmpc_free_shared(i8* %0) + call void @__kmpc_free_shared(i8* %0, i64 4) ret void } @@ -94,15 +95,15 @@ entry: %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !14 call void @use(i8* %0) - call void @__kmpc_free_shared(i8* %0) + call void @__kmpc_free_shared(i8* %0, i64 4) ret void } ; CHECK: declare i8* @__kmpc_alloc_shared(i64) declare i8* @__kmpc_alloc_shared(i64) -; CHECK: declare void @__kmpc_free_shared(i8* nocapture) -declare void @__kmpc_free_shared(i8*) +; CHECK: declare void @__kmpc_free_shared(i8* nocapture, i64) +declare void @__kmpc_free_shared(i8*, i64) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4, !6, !7} diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -25,7 +25,7 @@ %x_on_stack = bitcast i8* %x to i32* %0 = bitcast i32* %x_on_stack to i8* call void @use(i8* %0) - call void @__kmpc_free_shared(i8* %x) + call void @__kmpc_free_shared(i8* %x, i64 4) call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true) ret void } @@ -50,7 +50,7 @@ %x_on_stack = bitcast i8* %x to [4 x i32]* %0 = bitcast [4 x i32]* %x_on_stack to i8* call void @use(i8* %0) - call void @__kmpc_free_shared(i8* %x) + call void @__kmpc_free_shared(i8* %x, i64 16) br label %exit exit: ret void @@ -67,7 +67,7 @@ %y_on_stack = bitcast i8* %y to [4 x i32]* %1 = bitcast [4 x i32]* %y_on_stack to i8* call void @use(i8* %1) - call void @__kmpc_free_shared(i8* %y) + call void @__kmpc_free_shared(i8* %y, i64 4) br label %exit exit: ret void @@ -79,11 +79,11 @@ %0 = icmp eq i32 %call, -1 br i1 %0, label %master, label %exit master: - %y = call i8* @__kmpc_alloc_shared(i64 6), !dbg !12 + %y = call i8* @__kmpc_alloc_shared(i64 24), !dbg !12 %y_on_stack = bitcast i8* %y to [6 x i32]* %1 = bitcast [6 x i32]* %y_on_stack to i8* call void @use(i8* %1) - call void @__kmpc_free_shared(i8* %y) + call void @__kmpc_free_shared(i8* %y, i64 24) br label %exit exit: ret void @@ -98,7 +98,7 @@ declare i8* @__kmpc_alloc_shared(i64) -declare void @__kmpc_free_shared(i8*) +declare void @__kmpc_free_shared(i8*, i64) declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -589,7 +589,7 @@ // These functions are OpenMP Offloading allocation / free routines "declare i8* @__kmpc_alloc_shared(i64)\n" - "declare void @__kmpc_free_shared(i8*)\n" + "declare void @__kmpc_free_shared(i8*, i64)\n" ); for (unsigned FI = 0; FI != LibFunc::NumLibFuncs; ++FI) { diff --git a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu --- a/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/data_sharing.cu @@ -95,7 +95,7 @@ return __kmpc_alloc_for_warp(AllocGlobal, Bytes, WarpBytes); } -EXTERN void __kmpc_free_shared(void *Ptr) { +EXTERN void __kmpc_free_shared(void *Ptr, size_t /* Bytes */) { __kmpc_impl_lanemask_t CurActive = __kmpc_impl_activemask(); unsigned LeaderID = __kmpc_impl_ffs(CurActive) - 1; bool IsWarpLeader = diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -467,7 +467,8 @@ /// Deallocate \p Ptr. Needs to be called balanced with __kmpc_alloc_shared like /// a stack (push/pop). Can be called by any thread. \p Ptr must be allocated by -/// __kmpc_alloc_shared by the same thread. -EXTERN void __kmpc_free_shared(void *Ptr); +/// __kmpc_alloc_shared by the same thread. \p Bytes contains the size of the +/// paired allocation to make memory management easier. +EXTERN void __kmpc_free_shared(void *Ptr, size_t Bytes); #endif