Index: llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll =================================================================== --- llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll +++ llvm/test/Transforms/OpenMP/spmdization_constant_prop.ll @@ -25,6 +25,7 @@ @__omp_rtl_debug_kind = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 @__omp_rtl_assume_no_thread_state = weak_odr hidden local_unnamed_addr addrspace(1) constant i32 0 @omptarget_device_environment = weak protected addrspace(4) global %struct.DeviceEnvironmentTy undef, align 4 +@omptarget_device_time_profile = weak protected addrspace(4) global %struct.DeviceTimeProfileTy undef, align 4 @IsSPMDMode = weak hidden addrspace(3) global i32 undef, align 4 @.str.12 = private unnamed_addr addrspace(4) constant [47 x i8] c"ValueRAII initialization with wrong old value!\00", align 1 @_ZN12_GLOBAL__N_122SharedMemorySmartStackE = internal addrspace(3) global %"struct.(anonymous namespace)::SharedMemorySmartStackTy" undef, align 16 @@ -34,6 +35,7 @@ @_ZL29SharedMemVariableSharingSpace = internal unnamed_addr addrspace(3) global [64 x ptr] undef, align 16 @G = global i32 undef @llvm.used = appending addrspace(1) global [2 x ptr] [ptr addrspacecast (ptr addrspace(3) @IsSPMDMode to ptr), ptr addrspacecast (ptr addrspace(4) @omptarget_device_environment to ptr)], section "llvm.metadata" +@llvm.used = appending addrspace(1) global [2 x ptr] [ptr addrspacecast (ptr addrspace(3) @IsSPMDMode to ptr), ptr addrspacecast (ptr addrspace(4) @omptarget_device_time_profile to ptr)], section "llvm.metadata" @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__omp_offloading_20_11e3950_main_l12_exec_mode to ptr)], section "llvm.metadata" ; Function Attrs: alwaysinline convergent norecurse nounwind Index: openmp/libomptarget/DeviceRTL/include/Configuration.h =================================================================== --- openmp/libomptarget/DeviceRTL/include/Configuration.h +++ openmp/libomptarget/DeviceRTL/include/Configuration.h @@ -13,9 +13,19 @@ #ifndef OMPTARGET_CONFIGURATION_H #define OMPTARGET_CONFIGURATION_H +#include "DeviceTimeProfile.h" #include "Types.h" +#include "targetprofile.h" namespace _OMP { + +namespace targetprof { + +int getNumRecords(); +_OMP::prof::Record *getRecords(); + +} // namespace targetprof + namespace config { enum DebugKind : uint32_t { Index: openmp/libomptarget/DeviceRTL/include/Interface.h =================================================================== --- openmp/libomptarget/DeviceRTL/include/Interface.h +++ openmp/libomptarget/DeviceRTL/include/Interface.h @@ -132,6 +132,8 @@ int omp_get_team_num(); +int omp_get_num_timeprofile_records(); + int omp_get_initial_device(void); void *llvm_omp_target_dynamic_shared_alloc(); Index: openmp/libomptarget/DeviceRTL/src/Configuration.cpp =================================================================== --- openmp/libomptarget/DeviceRTL/src/Configuration.cpp +++ openmp/libomptarget/DeviceRTL/src/Configuration.cpp @@ -13,6 +13,7 @@ #include "Configuration.h" #include "DeviceEnvironment.h" +#include "DeviceTimeProfile.h" #include "State.h" #include "Types.h" @@ -31,6 +32,15 @@ DeviceEnvironmentTy CONSTANT(omptarget_device_environment) __attribute__((used, retain, weak, visibility("protected"))); +DeviceTimeProfileTy omptarget_device_time_profile + __attribute__((used, retain, weak, visibility("protected"))); + +int targetprof::getNumRecords() { return omptarget_device_time_profile.StartC; } + +_OMP::prof::Record *targetprof::getRecords() { + return omptarget_device_time_profile.Records; +} + uint32_t config::getDebugKind() { return __omp_rtl_debug_kind & omptarget_device_environment.DebugKind; } Index: openmp/libomptarget/DeviceRTL/src/Kernel.cpp =================================================================== --- openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "Debug.h" +#include "DeviceTimeProfile.h" #include "Interface.h" #include "Mapping.h" #include "State.h" @@ -21,6 +22,44 @@ #pragma omp begin declare target device_type(nohost) +// CLOCKINIT(); +extern DeviceTimeProfileTy omptarget_device_time_profile; + +namespace _OMP { +namespace prof { + +__attribute__((used)) int clockIn(const char *FuncName, int Idx) { + if (Idx < OMP_ARRAY_SIZE) { +#pragma omp parallel + { + int thread_ = omp_get_thread_num(); + if (thread_ < OMP_THREADS) { + omptarget_device_time_profile.Records[Idx].Start[thread_] = + omp_get_wtime(); + omptarget_device_time_profile.Records[Idx].Thread[thread_] = thread_; + } + } + return Idx; + } else { + return OMP_ARRAY_SIZE; + } +} + +__attribute__((used)) void clockOut(const char *FuncName, int Idx) { + if (Idx < OMP_ARRAY_SIZE) { +#pragma omp parallel + { + int thread_ = omp_get_thread_num(); + if (thread_ < OMP_THREADS) { + omptarget_device_time_profile.Records[Idx].Stop[thread_] = + omp_get_wtime(); + } + } + } +} +} // namespace prof +} // namespace _OMP + static void inititializeRuntime(bool IsSPMD) { // Order is important here. synchronize::init(IsSPMD); @@ -78,6 +117,7 @@ // code and workers will run into a barrier right away. } + CLOCKIN(); if (IsSPMD) { state::assumeInitialState(IsSPMD); return -1; @@ -127,6 +167,7 @@ /// void __kmpc_target_deinit(IdentTy *Ident, int8_t Mode, bool) { FunctionTracingRAII(); + CLOCKOUT(); const bool IsSPMD = Mode & OMP_TGT_EXEC_MODE_SPMD; state::assumeInitialState(IsSPMD); if (IsSPMD) Index: openmp/libomptarget/DeviceRTL/src/State.cpp =================================================================== --- openmp/libomptarget/DeviceRTL/src/State.cpp +++ openmp/libomptarget/DeviceRTL/src/State.cpp @@ -403,6 +403,14 @@ int omp_get_team_num() { return mapping::getBlockId(); } +int omp_get_num_target_timeprofile_records() { + return targetprof::getNumRecords(); +} + +_OMP::prof::Record *omp_get_target_timeprofile_records() { + return targetprof::getRecords(); +} + int omp_get_initial_device(void) { return -1; } } Index: openmp/libomptarget/include/device.h =================================================================== --- openmp/libomptarget/include/device.h +++ openmp/libomptarget/include/device.h @@ -350,6 +350,8 @@ // Return true if data can be copied to DstDevice directly bool isDataExchangable(const DeviceTy &DstDevice); + int32_t getTimeProfilePtr(void **ptr); + /// Lookup the mapping of \p HstPtrBegin in \p HDTTMap. The accessor ensures /// exclusive access to the HDTT map. LookupResult lookupMapping(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, Index: openmp/libomptarget/include/omptargetplugin.h =================================================================== --- openmp/libomptarget/include/omptargetplugin.h +++ openmp/libomptarget/include/omptargetplugin.h @@ -191,6 +191,7 @@ int32_t __tgt_rtl_init_async_info(int32_t ID, __tgt_async_info **AsyncInfoPtr); int32_t __tgt_rtl_init_device_info(int32_t ID, __tgt_device_info *DeviceInfoPtr, const char **ErrStr); +int32_t __tgt_rtl_get_timeprofile_data(int32_t DeviceId, void **DataPtr); #ifdef __cplusplus } Index: openmp/libomptarget/include/rtl.h =================================================================== --- openmp/libomptarget/include/rtl.h +++ openmp/libomptarget/include/rtl.h @@ -70,6 +70,7 @@ typedef int32_t(record_event_ty)(int32_t, void *, __tgt_async_info *); typedef int32_t(wait_event_ty)(int32_t, void *, __tgt_async_info *); typedef int32_t(sync_event_ty)(int32_t, void *); + typedef int32_t(time_profile_ptr_ty)(int32_t, void **); typedef int32_t(destroy_event_ty)(int32_t, void *); typedef int32_t(release_async_info_ty)(int32_t, __tgt_async_info *); typedef int32_t(init_async_info_ty)(int32_t, __tgt_async_info **); @@ -117,6 +118,7 @@ supports_empty_images_ty *supports_empty_images = nullptr; set_info_flag_ty *set_info_flag = nullptr; print_device_info_ty *print_device_info = nullptr; + time_profile_ptr_ty *time_profile_ptr = nullptr; create_event_ty *create_event = nullptr; record_event_ty *record_event = nullptr; wait_event_ty *wait_event = nullptr; Index: openmp/libomptarget/plugins/amdgpu/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -38,6 +38,7 @@ #include "rt.h" #include "DeviceEnvironment.h" +#include "DeviceTimeProfile.h" #include "get_elf_mach_gfx_name.h" #include "omptargetplugin.h" #include "print_tracing.h" @@ -1788,6 +1789,110 @@ } }; +struct DeviceTimeProfile { + // initialise an DeviceTimeProfileTy in the deviceRTL + // patches around differences in the deviceRTL between trunk, aomp, + // rocmcc. Over time these differences will tend to zero and this class + // simplified. + // Symbol may be in .data or .bss, and may be missing fields, todo: + // review aomp/trunk/rocm and simplify the following + + // The symbol may also have been deadstripped because the device side + // accessors were unused. + + // If the symbol is in .data (aomp, rocm) it can be written directly. + // If it is in .bss, we must wait for it to be allocated space on the + // gpu (trunk) and initialize after loading. + const char *sym() { return "omptarget_device_time_profile"; } + + DeviceTimeProfileTy HostDeviceTimeProfile; + SymbolInfo SI; + bool Valid = false; + + __tgt_device_image *Image; + const size_t ImgSize; + + DeviceTimeProfile(int DeviceId, _OMP::prof::Record records_[OMP_ARRAY_SIZE], + int startc_, int idx_, __tgt_device_image *Image, + const size_t ImgSize) + : Image(Image), ImgSize(ImgSize) { + + HostDeviceTimeProfile.StartC = startc_; + HostDeviceTimeProfile.Idx = idx_; + HostDeviceTimeProfile.DeviceId = DeviceId; + for (int i = 0; i < startc_; i++) { + for (int j = 0; j < OMP_THREADS; j++) { + HostDeviceTimeProfile.Records[i].Thread[j] = records_[i].Thread[j]; + // HostDeviceTimeProfile.Records[i].Event[j] = records_[i].Event[j]; + HostDeviceTimeProfile.Records[i].Stop[j] = records_[i].Stop[j]; + HostDeviceTimeProfile.Records[i].Start[j] = records_[i].Start[j]; + } + } + /* + if (char *EnvStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) + HostDeviceTimeProfile.DebugKind = std::stoi(EnvStr); + */ + int Rc = getSymbolInfoWithoutLoading((char *)Image->ImageStart, ImgSize, + sym(), &SI); + if (Rc != 0) { + DP("Finding global device time profile '%s' - symbol missing.\n", sym()); + return; + } + + if (SI.Size > sizeof(HostDeviceTimeProfile)) { + DP("Symbol '%s' has size %u, expected at most %zu.\n", sym(), SI.Size, + sizeof(HostDeviceTimeProfile)); + return; + } + + Valid = true; + } + + bool inImage() { return SI.ShType != SHT_NOBITS; } + + hsa_status_t beforeLoading(void *Data, size_t Size) { + if (Valid) { + if (inImage()) { + DP("Setting global device environment before load (%u bytes)\n", + SI.Size); + uint64_t Offset = (char *)SI.Addr - (char *)Image->ImageStart; + void *Pos = (char *)Data + Offset; + memcpy(Pos, &HostDeviceTimeProfile, SI.Size); + } + } + return HSA_STATUS_SUCCESS; + } + + hsa_status_t afterLoading() { + if (Valid) { + if (!inImage()) { + DP("Setting global device environment after load (%u bytes)\n", + SI.Size); + int DeviceId = HostDeviceTimeProfile.DeviceId; + auto &SymbolInfo = DeviceInfo().SymbolInfoTable[DeviceId]; + void *StatePtr; + uint32_t StatePtrSize; + hsa_status_t Err = interop_hsa_get_symbol_info( + SymbolInfo, DeviceId, sym(), &StatePtr, &StatePtrSize); + if (Err != HSA_STATUS_SUCCESS) { + DP("failed to find %s in loaded image\n", sym()); + return Err; + } + + if (StatePtrSize != SI.Size) { + DP("Symbol had size %u before loading, %u after\n", StatePtrSize, + SI.Size); + return HSA_STATUS_ERROR; + } + + return DeviceInfo().freesignalpoolMemcpyH2D( + StatePtr, &HostDeviceTimeProfile, StatePtrSize, DeviceId); + } + } + return HSA_STATUS_SUCCESS; + } +}; + hsa_status_t implCalloc(void **RetPtr, size_t Size, int DeviceId) { uint64_t Rounded = 4 * ((Size + 3) / 4); void *Ptr; Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp =================================================================== --- openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -24,6 +24,7 @@ #include "Debug.h" #include "DeviceEnvironment.h" +#include "DeviceTimeProfile.h" #include "omptarget.h" #include "omptargetplugin.h" @@ -34,6 +35,9 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" +DeviceTimeProfileTy DeviceTimeProf; +CUdeviceptr DeviceTimeProfPtr; + using namespace llvm; // Utility for retrieving and printing CUDA error string. @@ -119,6 +123,11 @@ return OFFLOAD_SUCCESS; } +int32_t getTimeProfilePtr(void **Ptr) { + *Ptr = (void *)DeviceTimeProfPtr; + return CUDA_SUCCESS; +} + int recordEvent(void *EventPtr, __tgt_async_info *AsyncInfo) { CUstream Stream = reinterpret_cast(AsyncInfo->Queue); CUevent Event = reinterpret_cast(EventPtr); @@ -964,6 +973,32 @@ } } + { + // TODO: The device ID used here is not the real device ID used by OpenMP. + + const char *DeviceTimeProfName = "omptarget_device_time_profile"; + size_t CUSize; + + Err = cuModuleGetGlobal(&DeviceTimeProfPtr, &CUSize, Module, + DeviceTimeProfName); + if (Err == CUDA_SUCCESS) { + if (CUSize != sizeof(DeviceTimeProfileTy)) { + REPORT( + "Global device_time_profile '%s' - size mismatch (%zu != %zu)\n", + DeviceTimeProfName, CUSize, sizeof(DeviceTimeProfileTy)); + CUDA_ERR_STRING(Err); + return nullptr; + } + + DP("Sending global device time profile data %zu bytes\n", CUSize); + } else { + DP("Finding global device time profile '%s' - symbol missing.\n", + DeviceTimeProfName); + DP("Continue, considering this is a device RTL which does not accept " + "environment setting.\n"); + } + } + return getOffloadEntriesTable(DeviceId); } @@ -1869,6 +1904,11 @@ return DeviceRTL.initDeviceInfo(DeviceId, DeviceInfoPtr, ErrStr); } +int32_t __tgt_rtl_get_timeprofile_data(int32_t DeviceId, void **DataPtr) { + // assert(!data_ptr && "data_ptr is not nullptr"); + return getTimeProfilePtr(DataPtr); +} + #ifdef __cplusplus } #endif Index: openmp/libomptarget/src/device.cpp =================================================================== --- openmp/libomptarget/src/device.cpp +++ openmp/libomptarget/src/device.cpp @@ -11,17 +11,143 @@ //===----------------------------------------------------------------------===// #include "device.h" +#include "DeviceTimeProfile.h" #include "omptarget.h" #include "private.h" #include "rtl.h" +#include "targetprofile.h" +#include +#include + +#include #include +#include #include +#include #include #include +#include #include #include +CLOCKDEC(); + +void makeJsonRecordEntry(llvm::raw_fd_ostream &OS, + _OMP::prof::Record recs[OMP_ARRAY_SIZE], + int total_entries) { + // SecToUsFactor * time to convert seconds to microseconds as mentioned by the + // chrome trace format + const int32_t SecToUsFactor = 1000000; + llvm::json::OStream J(OS); + J.objectBegin(); + J.attributeBegin("traceEvents"); + J.arrayBegin(); + + auto writeEvent = [&](_OMP::prof::ThreadRecord rec) { + J.object([&] { + J.attribute("pid", -1); + J.attribute("tid", int64_t(rec.Thread)); + J.attribute("ph", "X"); + J.attribute("ts", rec.Start); + J.attribute("dur", rec.Stop - rec.Start); + J.attribute("name", rec.Event); + }); + }; + auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, + llvm::StringRef arg) { + J.object([&] { + J.attribute("cat", "Target Event"); + J.attribute("pid", -1); + J.attribute("tid", int64_t(Tid)); + J.attribute("ts", 0); + J.attribute("ph", "M"); + J.attribute("name", Name); + J.attributeObject("args", [&] { J.attribute("name", arg); }); + }); + }; + + for (int i = 0; i < total_entries; i++) { + for (int j = 0; j < OMP_THREADS; j++) { + _OMP::prof::ThreadRecord t; + t.Stop = recs[i].Stop[j] * SecToUsFactor; + t.Start = recs[i].Start[j] * SecToUsFactor; + t.Thread = recs[i].Thread[j]; + // sprintf(t.Event, "%d-Target Thread#%d", i, t.Thread); + // sprintf(t.Event, "Target Event#%d", t.Thread); + sprintf(t.Event, "Target Event"); + writeEvent(t); + writeMetadataEvent("thread_name", t.Thread, t.Event); + } + } + + writeMetadataEvent("process_name", -1, + "clang-libomptarget-offloading-process"); + writeMetadataEvent("process_name", -1, + "clang-libomptarget-offloading-thread"); + + J.arrayEnd(); + J.attributeEnd(); + + double BeginningOfTime = (recs[0].Start[0] - 100) * SecToUsFactor; + J.attribute("beginningOfTime", BeginningOfTime); + + J.objectEnd(); +} + +void sideloadTimeProfileEntries(_OMP::prof::Record Recs[OMP_ARRAY_SIZE], + int total_entries) { + auto makeRecEntry = [](const _OMP::prof::Record R, size_t Idx) { + char TargetDetail[64]; + sprintf(TargetDetail, "Target Thread#%d", R.Thread[Idx]); + const long long int Multiplier = 1000000000; + long long int Start_ = R.Start[Idx] * Multiplier; + std::chrono::duration> Start_dur( + Start_); + long long int Stop_ = R.Stop[Idx] * Multiplier; + std::chrono::duration> Stop_dur( + Stop_); + + llvm::Entry Temp( + std::chrono::time_point(Start_dur), + std::chrono::time_point(Stop_dur), + std::string("Target Op"), std::string(TargetDetail)); + return Temp; + }; + + bool TPINotInit = false; + llvm::TimeTraceProfiler *TPI = llvm::getTimeTraceProfilerInstance(); + if (TPI == nullptr) { + llvm::TimeTraceProfiler TPV = + llvm::TimeTraceProfiler(0, "Target Operations"); + TPI = &TPV; + TPINotInit = true; + } + for (size_t i = 0; i < total_entries; i++) { + for (size_t j = 0; j < OMP_THREADS; j++) { + TPI->Entries.emplace_back(makeRecEntry(Recs[i], j)); + } + } + if (TPINotInit) + llvm::Instances.List.emplace_back(TPI); +} + +void clockFinalize(DeviceTy *Device) { + int32_t Idx = omptarget_device_time_profile.Idx; + assert(Idx >= 0 && Idx < OMP_ARRAY_SIZE && "Out of beyond bounds"); + // TODO: use env var + if (Idx > 0) { + llvm::StringRef out("Target-profile-only.json"); + std::error_code EC; + llvm::raw_fd_ostream OS(out, EC); + llvm::json::OStream J(OS); + sideloadTimeProfileEntries(omptarget_device_time_profile.Records, Idx); + makeJsonRecordEntry(OS, omptarget_device_time_profile.Records, Idx); + OS.close(); + } else + printf("No Target\n"); +} + int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const { // First, check if the user disabled atomic map transfer/malloc/dealloc. @@ -55,6 +181,7 @@ PendingGlobalsMtx(), ShadowMtx() {} DeviceTy::~DeviceTy() { + CLOCKFINALIZE(); if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) return; @@ -503,6 +630,7 @@ /// Thread-safe method to initialize the device only once. int32_t DeviceTy::initOnce() { std::call_once(InitFlag, &DeviceTy::init, this); + CLOCKINIT(); // At this point, if IsInit is true, then either this thread or some other // thread in the past successfully initialized the device, so we can return @@ -556,6 +684,12 @@ AsyncInfo); } +int32_t DeviceTy::getTimeProfilePtr(void **ptr) { + if (RTL->time_profile_ptr) + return RTL->time_profile_ptr(RTLDeviceID, ptr); + return OFFLOAD_SUCCESS; +} + // Retrieve data from device int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size, AsyncInfoTy &AsyncInfo) { Index: openmp/libomptarget/src/interface.cpp =================================================================== --- openmp/libomptarget/src/interface.cpp +++ openmp/libomptarget/src/interface.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "DeviceTimeProfile.h" #include "device.h" #include "omptarget.h" #include "private.h" @@ -21,6 +22,8 @@ #include #include +extern DeviceTimeProfileTy omptarget_device_time_profile; + //////////////////////////////////////////////////////////////////////////////// /// adds requires flags EXTERN void __tgt_register_requires(int64_t Flags) { @@ -111,6 +114,19 @@ ArgSizes, ArgTypes, ArgNames, ArgMappers); } +// At device deconstructor context gets destroyed. To work around this the +// collected data is synced target -> host after every kernel deinit. +void clockSync(DeviceTy *Device) { + AsyncInfoTy async_info(*Device); + void *tgt_ptr = nullptr; + void *hst_ptr = &omptarget_device_time_profile; + Device->getTimeProfilePtr(&tgt_ptr); + Device->retrieveData(hst_ptr, tgt_ptr, sizeof(DeviceTimeProfileTy), + async_info); + int32_t Idx = omptarget_device_time_profile.Idx; + assert(Idx > 0 && Idx < OMP_ARRAY_SIZE && "Out of beyond bounds"); +} + /// passes data from the target, releases target memory and destroys /// the host-target mapping (top entry from the stack of data maps) /// created by the last __tgt_target_data_begin. @@ -142,6 +158,7 @@ #endif AsyncInfoTy AsyncInfo(Device); + CLOCKSYNC(); int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo); if (Rc == OFFLOAD_SUCCESS) Index: openmp/libomptarget/src/rtl.cpp =================================================================== --- openmp/libomptarget/src/rtl.cpp +++ openmp/libomptarget/src/rtl.cpp @@ -244,6 +244,8 @@ DynLibrary->getAddressOfSymbol("__tgt_rtl_init_async_info"); *((void **)&RTL.init_device_info) = DynLibrary->getAddressOfSymbol("__tgt_rtl_init_device_info"); + *((void **)&RTL.time_profile_ptr) = + DynLibrary->getAddressOfSymbol("__tgt_rtl_get_timeprofile_data"); RTL.LibraryHandler = std::move(DynLibrary);