Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -26,6 +26,8 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ValueTracking.h" using namespace llvm; using namespace omp; @@ -185,6 +187,28 @@ DenseMap<Function *, std::unique_ptr<UseVector>> UsesMap; }; + /// Used to store/manipualte information about a runtime call that involves + /// host to device memory offloading. + struct MemoryTransfer { + struct OffloadArray { + SmallVector<Value *, 8> LastAccesses; + SmallVector<Value *, 8> StoredAddresses; + }; + + CallBase *RuntimeCall; + MemorySSA &MSSA; + std::unique_ptr<OffloadArray> BasePtrs; + std::unique_ptr<OffloadArray> Ptrs; + std::unique_ptr<OffloadArray> Sizes; + + MemoryTransfer(CallBase *RuntimeCall, MemorySSA &MSSA) : + RuntimeCall{RuntimeCall}, MSSA{MSSA}, + BasePtrs {std::make_unique<OffloadArray>()}, + Ptrs {std::make_unique<OffloadArray>()}, + Sizes {std::make_unique<OffloadArray>()} + {} + }; + /// The slice of the module we are allowed to look at. SmallPtrSetImpl<Function *> &ModuleSlice; @@ -367,6 +391,7 @@ Changed |= deduplicateRuntimeCalls(); Changed |= deleteParallelRegions(); + Changed |= hideMemTransfersLatency(); return Changed; } @@ -394,6 +419,9 @@ } private: + /// Helper types. + using MemoryTransfer = OMPInformationCache::MemoryTransfer; + /// Try to delete parallel regions if possible. bool deleteParallelRegions() { const unsigned CallbackCalleeOperand = 2; @@ -489,6 +517,173 @@ return Changed; } + /// Tries to hide the latency of runtime calls that involve host to + /// device memory transfers. + bool hideMemTransfersLatency() { + OMPInformationCache::RuntimeFunctionInfo &RFI = + OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin]; + + bool Changed = false; + auto SplitDataTransfer = [&] (Use &U, Function &Decl) { + auto *RTCall = getCallIfRegularCall(U, &RFI); + if (!RTCall) + return false; + + auto *MSSAResult = + OMPInfoCache.getAnalysisResultForFunction<MemorySSAAnalysis>( + *RTCall->getCaller()); + if (!MSSAResult) + return false; + + auto &MSSA = MSSAResult->getMSSA(); + MemoryTransfer MT(RTCall, MSSA); + Changed = splitMemoryTransfer(MT); + return Changed; + }; + + RFI.foreachUse(SplitDataTransfer); + return Changed; + } + + bool splitMemoryTransfer(MemoryTransfer &MT) { + bool Changed = false; + bool Success = getValuesInOfflArrays(MT); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get offload arrays in call to " + << MT.RuntimeCall->getName() << " in function " + << MT.RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + + return Changed; + } + + bool getValuesInOfflArrays(MemoryTransfer &MT) { + auto *RuntimeCall = MT.RuntimeCall; + auto *BasePtrsArg = RuntimeCall->arg_begin() + 2; // **offload_baseptrs. + auto *PtrsArg = RuntimeCall->arg_begin() + 3; // **offload_ptrs. + auto *SizesArg = RuntimeCall->arg_begin() + 4; // **offload_sizes. + auto DL = OMPInfoCache.getDL(); + + // Get values stored in **offload_baseptrs. + auto *V = GetUnderlyingObject(BasePtrsArg->get(), DL); + bool Success = getValuesInOfflArray(V, *MT.BasePtrs, RuntimeCall); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get offload_baseptrs in call to " + << RuntimeCall->getName() << " in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + + // Get values stored in **offload_ptrs. + V = GetUnderlyingObject(PtrsArg->get(), DL); + Success = getValuesInOfflArray(V, *MT.Ptrs, RuntimeCall); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get offload_ptrs in call to " + << RuntimeCall->getName() << " in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + + // Get values stored in **offload_sizes. + V = GetUnderlyingObject(SizesArg->get(), DL); + Success = getValuesInOfflArray(V, *MT.Sizes, RuntimeCall); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get offload_sizes in call to " + << RuntimeCall->getName() << " in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + + return true; + } + + /// Gets the values stored in \p OfflArray and stores them in \p Dst. + /// \p Before serves as a lower bound, so don't look at accesses after that. + bool getValuesInOfflArray(Value *OfflArray, + MemoryTransfer::OffloadArray &Dst, + User *Before = nullptr) { + assert(OfflArray && "Can't get values in nullptr!"); + + if (!isa<AllocaInst>(OfflArray)) { + LLVM_DEBUG(dbgs() << TAG << "Only alloca arrays supported.\n"); + return false; + } + + auto *ArrayAlloc = cast<AllocaInst>(OfflArray); + const uint64_t NumValues = + ArrayAlloc->getAllocatedType()->getArrayNumElements(); + + auto &LastAccesses = Dst.LastAccesses; + auto &StoredAddresses = Dst.StoredAddresses; + LastAccesses.assign(NumValues, nullptr); + StoredAddresses.assign(NumValues, nullptr); + + // Get last accesses to the array right before Before. + for (auto *Usr : OfflArray->users()) { + // If reached lower limit. + if (Before && Usr == Before) + break; + + auto *Access = dyn_cast<GetElementPtrInst>(Usr); + if (!Access) + continue; + + auto *ArrayIdx = Access->idx_begin() + 1; + if (ArrayIdx == Access->idx_end()) + continue; + + const uint64_t IdxLiteral = getIntLiteral(ArrayIdx->get()); + LastAccesses[IdxLiteral] = Usr; + } + + // Get stored addresses. + for (unsigned It = 0; It < NumValues; ++It) { + auto *Accs = LastAccesses[It]; + auto AccsUsr = Accs->user_begin(); + if (AccsUsr == Accs->user_end()) { + LLVM_DEBUG(dbgs() << TAG << "Useless access to offload array.\n"); + return false; + } + + auto *I = cast<Instruction>(*AccsUsr); + if (I->isCast()) + AccsUsr = I->user_begin(); + + if (!isa<StoreInst>(*AccsUsr)) { + LLVM_DEBUG(dbgs() << TAG << "Unrecognized access pattern.\n"); + return false; + } + + StoredAddresses[It] = + GetUnderlyingObject(AccsUsr->getOperand(0), OMPInfoCache.getDL()); + } + + if (!isFilled(Dst)) { + LLVM_DEBUG(dbgs() << TAG << "Didn't get all values in offload array.\n"); + return false; + } + + return true; + } + + bool isFilled(MemoryTransfer::OffloadArray &OA) { + for (auto *Acc : OA.LastAccesses) + if (!Acc) + return false; + + for (auto *Addr : OA.StoredAddresses) + if (!Addr) + return false; + return true; + } + + /// Returns the integer representation of \p V. + static uint64_t getIntLiteral(const Value *V) { + assert(V && "Getting Integer value of nullptr"); + return (dyn_cast<ConstantInt>(V))->getZExtValue(); + } + static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, bool GlobalOnly, bool &SingleChoice) { if (CurrentIdent == NextIdent)