diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -2042,13 +2042,11 @@ // All args are references. std::vector args(arg_num); - std::vector ptrs(arg_num); DP("Arg_num: %d\n", arg_num); for (int32_t i = 0; i < arg_num; ++i) { - ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]); - args[i] = &ptrs[i]; - DP("Offseted base: arg[%d]:" DPxMOD "\n", i, DPxPTR(ptrs[i])); + args[i] = &tgt_args[i]; + DP("Offseted base: arg[%d]:" DPxMOD "\n", i, DPxPTR(tgt_args[i])); } KernelTy *KernelInfo = (KernelTy *)tgt_entry_ptr; diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -1063,12 +1063,8 @@ // All args are references. std::vector Args(ArgNum); - std::vector Ptrs(ArgNum); - - for (int I = 0; I < ArgNum; ++I) { - Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]); - Args[I] = &Ptrs[I]; - } + for (int I = 0; I < ArgNum; ++I) + Args[I] = &TgtArgs[I]; KernelTy *KernelInfo = reinterpret_cast(TgtEntryPtr); diff --git a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp --- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp @@ -301,12 +301,9 @@ // All args are references. std::vector args_types(arg_num, &ffi_type_pointer); std::vector args(arg_num); - std::vector ptrs(arg_num); - for (int32_t i = 0; i < arg_num; ++i) { - ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]); - args[i] = &ptrs[i]; - } + for (int32_t i = 0; i < arg_num; ++i) + args[i] = &tgt_args[i]; ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num, &ffi_type_void, &args_types[0]); diff --git a/openmp/libomptarget/plugins/remote/src/Client.cpp b/openmp/libomptarget/plugins/remote/src/Client.cpp --- a/openmp/libomptarget/plugins/remote/src/Client.cpp +++ b/openmp/libomptarget/plugins/remote/src/Client.cpp @@ -546,10 +546,6 @@ Request->add_tgt_args((uint64_t)*ArgPtr); } - char *OffsetPtr = (char *)TgtOffsets; - for (auto I = 0; I < ArgNum; I++, OffsetPtr++) - Request->add_tgt_offsets((uint64_t)*OffsetPtr); - Request->set_arg_num(ArgNum); Request->set_team_num(TeamNum); Request->set_thread_limit(ThreadLimit); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -1425,12 +1425,11 @@ assert(TargetTable && "Global data has not been mapped\n"); std::vector TgtArgs; - std::vector TgtOffsets; - PrivateArgumentManagerTy PrivateArgumentManager(Device, AsyncInfo); int Ret; if (ArgNum) { + std::vector TgtOffsets; // Process data, such as data mapping, before launching the kernel Ret = processDataBefore(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, TgtArgs, @@ -1439,6 +1438,9 @@ REPORT("Failed to process data before launching the kernel.\n"); return OFFLOAD_FAIL; } + + for (int I = 0; I < TgtArgs.size(); ++I) + TgtArgs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]); } // Get loop trip count @@ -1453,11 +1455,11 @@ TIMESCOPE_WITH_NAME_AND_IDENT( IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", loc); if (IsTeamConstruct) - Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0], - TgtArgs.size(), TeamNum, ThreadLimit, - LoopTripCount, AsyncInfo); + Ret = Device.runTeamRegion( + TgtEntryPtr, &TgtArgs[0], nullptr /* TgtOffsets */, TgtArgs.size(), + TeamNum, ThreadLimit, LoopTripCount, AsyncInfo); else - Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0], + Ret = Device.runRegion(TgtEntryPtr, &TgtArgs[0], nullptr /* TgtOffsets */, TgtArgs.size(), AsyncInfo); }