Index: llvm/include/llvm/Transforms/IPO/OpenMPOpt.h =================================================================== --- llvm/include/llvm/Transforms/IPO/OpenMPOpt.h +++ llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -21,7 +21,6 @@ #include "llvm/Analysis/MemorySSA.h" namespace llvm { - namespace omp { using namespace types; @@ -133,6 +132,48 @@ DenseMap> UsesMap; }; + /// Used to store information about a runtime call that involves + /// host to device memory offloading. + struct MemoryTransfer { + struct OffloadArray { + AllocaInst *Array; + SmallVector LastAccesses; + SmallVector StoredValues; + InformationCache &InfoCache; + + OffloadArray(AllocaInst *Array, InformationCache &InfoCache) + : Array(Array), InfoCache(InfoCache) {} + + bool getValues(Instruction *Before = nullptr); + + private: + bool getLastAccessesToOfflArray(Instruction *Before = nullptr); + + bool getLastStoresInOfflArray(); + + void initialize(unsigned Size, Value *Values = nullptr) { + LastAccesses.assign(Size, Values); + StoredValues.assign(Size, Values); + } + unsigned numValues() { return LastAccesses.size(); } + static bool isFilled(const SmallVectorImpl &V); + }; + + CallBase *RuntimeCall; + InformationCache &InfoCache; + std::unique_ptr BasePtrs; + std::unique_ptr Ptrs; + std::unique_ptr Sizes; + + MemoryTransfer(CallBase *RuntimeCall, InformationCache &InfoCache) : + RuntimeCall{RuntimeCall}, InfoCache{InfoCache} + {} + + /// Gets the values stored in the offload arrays. Returns false if some of + /// the values couldn't be found. + bool getValuesInOfflArrays(); + }; + /// The slice of the module we are allowed to look at. SmallPtrSetImpl &ModuleSlice; @@ -166,6 +207,7 @@ struct OpenMPOpt { + using MemoryTransfer = OMPInformationCache::MemoryTransfer; using OptimizationRemarkGetter = function_ref; @@ -188,6 +230,12 @@ static CallInst *getCallIfRegularCall( Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr); + /// Returns the integer representation of \p V. + static uint64_t getIntLiteral(const Value *V) { + assert(V && "Getting Integer value of nullptr"); + return (dyn_cast(V))->getZExtValue(); + } + private: /// Try to delete parallel regions if possible. bool deleteParallelRegions(); @@ -195,6 +243,9 @@ /// Try to eliminiate runtime calls by reusing existing ones. bool deduplicateRuntimeCalls(); + /// Splits a runtime call that involves a host to device transfer into its "" + bool splitMemoryTransfer(MemoryTransfer &MT); + static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, bool GlobalOnly, bool &SingleChoice); @@ -213,6 +264,10 @@ OMPInformationCache::RuntimeFunctionInfo &RFI, Value *ReplVal = nullptr); + /// Tries to hide the latency of runtime calls that involve host to + /// device memory transfers. + bool hideMemTransfersLatency(); + /// Collect arguments that represent the global thread id in \p GTIdArgs. void collectGlobalThreadIdArguments(SmallSetVector >IdArgs); Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -26,6 +26,8 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/MemorySSA.h" using namespace llvm; using namespace omp; @@ -95,6 +97,9 @@ // Definitions of the OMPInformationCache helper structure. //===----------------------------------------------------------------------===// +using MemoryTransfer = OMPInformationCache::MemoryTransfer; +using OffloadArray = MemoryTransfer::OffloadArray; + void OMPInformationCache::RuntimeFunctionInfo::foreachUse( function_ref CB, Function *F, UseVector *Uses) { SmallVector ToBeDeleted; @@ -223,7 +228,7 @@ if (F->arg_size() != RTFArgTypes.size()) return false; - auto RTFTyIt = RTFArgTypes.begin(); + auto *RTFTyIt = RTFArgTypes.begin(); for (Argument &Arg : F->args()) { if (Arg.getType() != *RTFTyIt) return false; @@ -234,6 +239,156 @@ return true; } +//===----------------------------------------------------------------------===// +// Definitions of the MemoryTransfer helper structure. +//===----------------------------------------------------------------------===// + +bool MemoryTransfer::getValuesInOfflArrays() { + const unsigned BasePtrsArgNum = 2; // **offload_baseptrs. + const unsigned PtrsArgNum = 3; // **offload_ptrs. + const unsigned SizesArgNum = 4; // **offload_sizes. + auto *BasePtrsArg = RuntimeCall->arg_begin() + BasePtrsArgNum; + auto *PtrsArg = RuntimeCall->arg_begin() + PtrsArgNum; + auto *SizesArg = RuntimeCall->arg_begin() + SizesArgNum; + const auto &DL = InfoCache.getDL(); + + // Get values stored in **offload_baseptrs. + auto *V = GetUnderlyingObject(BasePtrsArg->get(), DL); + if (auto *Array = dyn_cast(V)) { + BasePtrs = std::make_unique(Array, InfoCache); + bool Success = BasePtrs->getValues(RuntimeCall); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get offload_baseptrs in call to " + << RuntimeCall->getName() << " in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + } + + // Get values stored in **offload_ptrs. + V = GetUnderlyingObject(PtrsArg->get(), DL); + if (auto *Array = dyn_cast(V)) { + Ptrs = std::make_unique(Array, InfoCache); + bool Success = Ptrs->getValues(RuntimeCall); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get offload_ptrs in call to " + << RuntimeCall->getName() << " in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + } + + // Get values stored in **offload_sizes. + V = GetUnderlyingObject(SizesArg->get(), DL); + if (auto *Array = dyn_cast(V)) { + Sizes = std::make_unique(Array, InfoCache); + bool Success = Sizes->getValues(RuntimeCall); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get offload_sizes in call to " + << RuntimeCall->getName() << " in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + } + return true; +} + +bool OffloadArray::getValues(Instruction *Before) { + const uint64_t NumValues = + Array->getAllocatedType()->getArrayNumElements(); + initialize(NumValues); + + return getLastAccessesToOfflArray(Before) && getLastStoresInOfflArray(); +} + +bool OffloadArray::getLastAccessesToOfflArray(Instruction *Before) { + auto *DT = + InfoCache.getAnalysisResultForFunction( + *Array->getFunction()); + + if (Before && !DT) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't find DominatorTreeAnalysis.\n"); + return false; + } + + Array->reverseUseList(); // To traverse users from top to bottom. + for (auto *Usr : Array->users()) { + if (!isa(Usr)) + continue; + + auto *UsrInst = cast(Usr); + + // If reached lower limit. + if (Before && !DT->dominates(UsrInst, Before)) + break; + + if (auto *Access = dyn_cast(UsrInst)) { + auto *ArrayIdx = Access->idx_begin() + 1; + if (ArrayIdx == Access->idx_end()) + continue; + + const uint64_t IdxLiteral = OpenMPOpt::getIntLiteral(ArrayIdx->get()); + LastAccesses[IdxLiteral] = UsrInst; + } else if (UsrInst->isCast()){ + // If the access is directly a cast, it means it didn't need the + // GEP to the array, which means, an access to the first position + // of the array. + LastAccesses[0] = UsrInst; + } else { + LLVM_DEBUG(dbgs() << TAG << "Unrecognized access pattern.\n"); + return false; + } + } + + if (!isFilled(LastAccesses)) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get last accesses to offload array.\n"); + return false; + } + return true; +} + +bool OffloadArray::getLastStoresInOfflArray() { + assert(isFilled(LastAccesses) && "LastAccesses must be filled!"); + + const auto &DL = InfoCache.getDL(); + unsigned NumValues = numValues(); + for (unsigned It = 0; It < NumValues; ++It) { + auto *Accs = LastAccesses[It]; + Accs->reverseUseList(); + auto AccsUsr = Accs->user_begin(); + if (AccsUsr == Accs->user_end()) { + LLVM_DEBUG(dbgs() << TAG << "Useless access to offload array.\n"); + return false; + } + + auto *I = cast(*AccsUsr); + if (I->isCast()) + AccsUsr = I->user_begin(); + + if (!isa(*AccsUsr)) { + LLVM_DEBUG(dbgs() << TAG << "Unrecognized access pattern.\n"); + return false; + } + + StoredValues[It] = + GetUnderlyingObject(AccsUsr->getOperand(0), DL); + } + + if (!isFilled(StoredValues)) { + LLVM_DEBUG(dbgs() << TAG << "Didn't get last stores to offload array.\n"); + return false; + } + return true; +} + +bool OffloadArray::isFilled(const SmallVectorImpl &V) { + for (auto *E : V) + if (!E) + return false; + + return true; +} + //===----------------------------------------------------------------------===// // Declarations and definitions of AAICVTracker. //===----------------------------------------------------------------------===// @@ -443,6 +598,7 @@ Changed |= runAttributor(); Changed |= deduplicateRuntimeCalls(); Changed |= deleteParallelRegions(); + Changed |= hideMemTransfersLatency(); return Changed; } @@ -558,6 +714,38 @@ return Changed; } +bool OpenMPOpt::hideMemTransfersLatency() { + OMPInformationCache::RuntimeFunctionInfo &RFI = + OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin]; + + bool Changed = false; + auto SplitDataTransfer = [&] (Use &U, Function &Decl) { + auto *RTCall = getCallIfRegularCall(U, &RFI); + if (!RTCall) + return false; + + MemoryTransfer MT(RTCall, OMPInfoCache); + Changed = splitMemoryTransfer(MT); + return Changed; + }; + + RFI.foreachUse(SplitDataTransfer); + return Changed; +} + +bool OpenMPOpt::splitMemoryTransfer(MemoryTransfer &MT) { + bool Changed = false; + bool Success = MT.getValuesInOfflArrays(); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get offload arrays in call to " + << MT.RuntimeCall->getName() << " in function " + << MT.RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + + return Changed; +} + Value *OpenMPOpt::combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, bool GlobalOnly, bool &SingleChoice) { if (CurrentIdent == NextIdent)