Changeset View
Changeset View
Standalone View
Standalone View
openmp/libomptarget/src/rtl.cpp
Show First 20 Lines • Show All 243 Lines • ▼ Show 20 Lines | |||||
//////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////// | ||||
// Functionality for registering Ctors/Dtors | // Functionality for registering Ctors/Dtors | ||||
static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc, | static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc, | ||||
__tgt_device_image *img, | __tgt_device_image *img, | ||||
RTLInfoTy *RTL) { | RTLInfoTy *RTL) { | ||||
for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) { | for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) { | ||||
DeviceTy &Device = PM->Devices[RTL->Idx + i]; | DeviceTy &Device = *PM->Devices[RTL->Idx + i]; | ||||
Device.PendingGlobalsMtx.lock(); | Device.PendingGlobalsMtx.lock(); | ||||
Device.HasPendingGlobals = true; | Device.HasPendingGlobals = true; | ||||
for (__tgt_offload_entry *entry = img->EntriesBegin; | for (__tgt_offload_entry *entry = img->EntriesBegin; | ||||
entry != img->EntriesEnd; ++entry) { | entry != img->EntriesEnd; ++entry) { | ||||
if (entry->flags & OMP_DECLARE_TARGET_CTOR) { | if (entry->flags & OMP_DECLARE_TARGET_CTOR) { | ||||
DP("Adding ctor " DPxMOD " to the pending list.\n", | DP("Adding ctor " DPxMOD " to the pending list.\n", | ||||
DPxPTR(entry->addr)); | DPxPTR(entry->addr)); | ||||
Device.PendingCtorsDtors[desc].PendingCtors.push_back(entry->addr); | Device.PendingCtorsDtors[desc].PendingCtors.push_back(entry->addr); | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | void RTLsTy::RegisterRequires(int64_t flags) { | ||||
DP("New requires flags %" PRId64 " compatible with existing %" PRId64 "!\n", | DP("New requires flags %" PRId64 " compatible with existing %" PRId64 "!\n", | ||||
flags, RequiresFlags); | flags, RequiresFlags); | ||||
} | } | ||||
void RTLsTy::initRTLonce(RTLInfoTy &R) { | void RTLsTy::initRTLonce(RTLInfoTy &R) { | ||||
// If this RTL is not already in use, initialize it. | // If this RTL is not already in use, initialize it. | ||||
if (!R.isUsed && R.NumberOfDevices != 0) { | if (!R.isUsed && R.NumberOfDevices != 0) { | ||||
// Initialize the device information for the RTL we are about to use. | // Initialize the device information for the RTL we are about to use. | ||||
DeviceTy device(&R); | const size_t Start = PM->Devices.size(); | ||||
size_t Start = PM->Devices.size(); | PM->Devices.reserve(Start + R.NumberOfDevices); | ||||
PM->Devices.resize(Start + R.NumberOfDevices, device); | |||||
for (int32_t device_id = 0; device_id < R.NumberOfDevices; device_id++) { | for (int32_t device_id = 0; device_id < R.NumberOfDevices; device_id++) { | ||||
PM->Devices.push_back(std::make_unique<DeviceTy>(&R)); | |||||
tianshilei1992: what about `emplace_back`? | |||||
ye-luoAuthorUnsubmitted Both push_back and emplace_back do the same thing of invoking the move constructor of unique_ptr. However, emplace_back is less verbose in readability. So push_back is preferred here. ye-luo: Both push_back and emplace_back do the same thing of invoking the move constructor of… | |||||
// global device ID | // global device ID | ||||
PM->Devices[Start + device_id].DeviceID = Start + device_id; | PM->Devices[Start + device_id]->DeviceID = Start + device_id; | ||||
// RTL local device ID | // RTL local device ID | ||||
PM->Devices[Start + device_id].RTLDeviceID = device_id; | PM->Devices[Start + device_id]->RTLDeviceID = device_id; | ||||
} | } | ||||
// Initialize the index of this RTL and save it in the used RTLs. | // Initialize the index of this RTL and save it in the used RTLs. | ||||
R.Idx = (UsedRTLs.empty()) | R.Idx = (UsedRTLs.empty()) | ||||
? 0 | ? 0 | ||||
: UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices; | : UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices; | ||||
assert((size_t)R.Idx == Start && | assert((size_t)R.Idx == Start && | ||||
"RTL index should equal the number of devices used so far."); | "RTL index should equal the number of devices used so far."); | ||||
▲ Show 20 Lines • Show All 94 Lines • ▼ Show 20 Lines | for (auto *R : UsedRTLs) { | ||||
DP("Image " DPxMOD " is compatible with RTL " DPxMOD "!\n", | DP("Image " DPxMOD " is compatible with RTL " DPxMOD "!\n", | ||||
DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); | DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); | ||||
FoundRTL = R; | FoundRTL = R; | ||||
// Execute dtors for static objects if the device has been used, i.e. | // Execute dtors for static objects if the device has been used, i.e. | ||||
// if its PendingCtors list has been emptied. | // if its PendingCtors list has been emptied. | ||||
for (int32_t i = 0; i < FoundRTL->NumberOfDevices; ++i) { | for (int32_t i = 0; i < FoundRTL->NumberOfDevices; ++i) { | ||||
DeviceTy &Device = PM->Devices[FoundRTL->Idx + i]; | DeviceTy &Device = *PM->Devices[FoundRTL->Idx + i]; | ||||
Device.PendingGlobalsMtx.lock(); | Device.PendingGlobalsMtx.lock(); | ||||
if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { | if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { | ||||
AsyncInfoTy AsyncInfo(Device); | AsyncInfoTy AsyncInfo(Device); | ||||
for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { | for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { | ||||
int rc = target(nullptr, Device, dtor, 0, nullptr, nullptr, nullptr, | int rc = target(nullptr, Device, dtor, 0, nullptr, nullptr, nullptr, | ||||
nullptr, nullptr, nullptr, 1, 1, true /*team*/, | nullptr, nullptr, nullptr, 1, 1, true /*team*/, | ||||
AsyncInfo); | AsyncInfo); | ||||
if (rc != OFFLOAD_SUCCESS) { | if (rc != OFFLOAD_SUCCESS) { | ||||
▲ Show 20 Lines • Show All 54 Lines • Show Last 20 Lines |
what about emplace_back?