Index: llvm/include/llvm/Transforms/IPO/OpenMPOpt.h =================================================================== --- llvm/include/llvm/Transforms/IPO/OpenMPOpt.h +++ llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -144,6 +144,7 @@ struct OffloadArray { AllocaInst &Array; /// Physical array (in the IR). SmallVector StoredValues; /// Mapped values. + SmallVector LastAccesses; InformationCache &InfoCache; /// Factory function for creating and initializing the OffloadArray with @@ -161,16 +162,17 @@ private: /// Traverses the BasicBlocks collecting the stores made to - /// OffloadArray::Array, leaving OffloadArray::StoredValues with the - /// values stored before the instruction \p Before is reached. + /// Array, leaving StoredValues with the values stored before + /// the instruction \p Before is reached. bool getValues(Instruction &Before); - /// Returns the index of OffloadArray::Array where the store is being + /// Returns the index of Array where the store is being /// made. Returns -1 if the index can't be deduced. int32_t getAccessedIdx(StoreInst &S); - /// Returns true all values in \p V are not nullptrs. - static bool isFilled(const SmallVectorImpl &V); + /// Returns true if all values in StoredValues and + /// LastAccesses are not nullptrs. + bool isFilled(); }; CallBase *RuntimeCall; /// Call that involves a memotry transfer. @@ -178,9 +180,13 @@ /// These help mapping the values in offload_baseptrs, offload_ptrs, and /// offload_sizes, respectively. - std::unique_ptr BasePtrs; - std::unique_ptr Ptrs; - std::unique_ptr Sizes; + std::unique_ptr BasePtrs = nullptr; + std::unique_ptr Ptrs = nullptr; + std::unique_ptr Sizes = nullptr; + + /// Set of instructions that compose the argument setup for the call + /// RuntimeCall. + SetVector Issue; MemoryTransfer(CallBase *RuntimeCall, InformationCache &InfoCache) : RuntimeCall{RuntimeCall}, InfoCache{InfoCache} @@ -188,11 +194,30 @@ /// Maps the values physically (the IR) stored in the offload arrays /// offload_baseptrs, offload_ptrs, offload_sizes to their corresponding - /// members, MemoryTransfer::BasePtrs, MemoryTransfer::Ptrs, - /// MemoryTransfer::Sizes. + /// members, BasePtrs, Ptrs, Sizes. /// Returns false if one of the arrays couldn't be processed or some of the /// values couldn't be found. bool getValuesInOffloadArrays(); + + /// Groups the instructions that compose the argument setup for the call + /// RuntimeCall. + bool detectIssue(); + + /// Returns true if \p I might modify some of the values in the + /// offload arrays. + bool mayBeModifiedBy(Instruction *I); + + private: + /// Gets the setup instructions for each of the values in \p OA. These + /// instructions are stored into Issue. + bool getSetupInstructions(std::unique_ptr &OA); + /// Gets the setup instructions for the pointer operand of \p S. + bool getPointerSetupInstructions(StoreInst *S); + /// Gets the setup instructions for the value operand of \p S. + bool getValueSetupInstructions(StoreInst *S); + + /// Returns true if \p I may modify one of the values in \p Values. + bool mayModify(Instruction *I, SmallVectorImpl &Values); }; /// The slice of the module we are allowed to look at. @@ -272,6 +297,10 @@ /// handle for the memory transfer to finish. bool hideMemTransfersLatency(); + /// Returns a pointer to the instruction where the "issue" of \p MT can be + /// moved. Returns nullptr if the movement is not possible, or not worth it. + Instruction *canBeMovedUpwards(MemoryTransfer &MT); + static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, bool GlobalOnly, bool &SingleChoice); Index: llvm/lib/Transforms/IPO/OpenMPOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -28,6 +28,7 @@ #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/AliasAnalysis.h" using namespace llvm; using namespace omp; @@ -325,6 +326,175 @@ return true; } +bool MemoryTransfer::detectIssue() { + assert(BasePtrs && Ptrs && "No offload arrays to look at!"); + + bool Success = getSetupInstructions(BasePtrs); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get setup instructions of " + << "offload_baseptrs. In call to " + << RuntimeCall->getName() << " in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + + Success = getSetupInstructions(Ptrs); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get setup instructions of " + << "offload_ptrs. In call to " + << RuntimeCall->getName() << " in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + + if (Sizes) { + Success = getSetupInstructions(Sizes); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get setup instructions of " + << "offload_sizes. In call to " + << RuntimeCall->getName() << " in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + } + + return true; +} + +bool MemoryTransfer::getSetupInstructions(std::unique_ptr &OA) { + for (auto *S : OA->LastAccesses) { + if (!getValueSetupInstructions(S)) + return false; + + if (!getPointerSetupInstructions(S)) + return false; + + Issue.insert(S); + } + return true; +} + +bool MemoryTransfer::getPointerSetupInstructions(StoreInst *S) { + auto *P = S->getPointerOperand(); + + // TODO: P might be a global value. Make it general. + if (!isa(P)) + return false; + + auto *DstInst = cast(P); + if (isa(DstInst)) { + Issue.insert(DstInst); + + } else if (DstInst->isCast()) { + auto *Casted = DstInst->getOperand(0); + + // TODO: Casted might be a global value. Make it general. + if (!isa(Casted)) + return false; + + if (auto *GEP = dyn_cast(Casted)) + Issue.insert(GEP); + + Issue.insert(DstInst); + } + + return true; +} + +bool MemoryTransfer::getValueSetupInstructions(StoreInst *S) { + auto *V = S->getValueOperand(); + // Auxiliary storage to later insert the found instructions in the order + // needed. + SmallVector TempStorage; + bool Success = false; + unsigned MaxLookup = 6; + for (unsigned I = 0; I < MaxLookup; ++I) { + if (isa(V) || isa(V) || isa(V) || + isa(V)) { + Success = true; + break; + } + + if (!isa(V)) { + Success = false; + break; + } + + auto *Inst = cast(V); + TempStorage.push_back(Inst); + + // FIXME: Inst might depend on more instructions through its second operand. + V = Inst->getOperand(0); + } + + if (Success) + while (!TempStorage.empty()) + Issue.insert(TempStorage.pop_back_val()); + + return Success; +} + +bool MemoryTransfer::mayBeModifiedBy(Instruction *I) { + assert(BasePtrs && Ptrs && "No offload addresses to analyze!"); + if (Issue.count(I)) + return false; + + if (mayModify(I, BasePtrs->StoredValues)) + return true; + if (mayModify(I, Ptrs->StoredValues)) + return true; + if (Sizes) { + if (mayModify(I, Sizes->StoredValues)) + return true; + } + + return false; +} + +bool MemoryTransfer::mayModify(Instruction *I, + SmallVectorImpl &Values) { + assert(I && "Can't analyze nullptr!"); + auto *AAResults = InfoCache.getAnalysisResultForFunction( + *RuntimeCall->getCaller()); + if (!AAResults) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get AAManager in function " + << RuntimeCall->getCaller()->getName() << "\n"); + return true; + } + + const DataLayout &DL = InfoCache.getDL(); + + if (isa(I)) { + auto *Dst = GetUnderlyingObject(I->getOperand(1), DL); + for (auto *V : Values) { + if (Dst == V) { + return true; + } + } + } else if (isa(I)) { + for (auto *V : Values) { + // FIXME: This usage of the AAResults is not working properly. It always + // returns that the call instruction I may modify a value V. + // For example: + // define i32 @func(double* noalias %a) { + // ... + // %1 = call i32 @rand() + // ... + // } + // The getModRefInfo always returns that rand() modifies %a, even + // though it has the noalias attribute. + auto ModRefResult = AAResults->getModRefInfo( + I, MemoryLocation(V, LocationSize::precise( + V->getType()->getPrimitiveSizeInBits())) + ); + if (isModSet(ModRefResult)) + return true; + } + } + + return true; +} + std::unique_ptr OffloadArray::initialize( AllocaInst &Array, Instruction &Before, InformationCache &InfoCache) { if (!Array.getAllocatedType()->isArrayTy()) { @@ -347,6 +517,7 @@ const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements(); StoredValues.assign(NumValues, nullptr); + LastAccesses.assign(NumValues, nullptr); // TODO: This assumes the instruction \p Before is in the same BasicBlock // as OffloadArray::Array. Make it general, for any control flow graph. @@ -361,22 +532,23 @@ for (auto &I : *BB) { if (&I == &Before) break; - if (isa(&I)) { - auto *Dst = GetUnderlyingObject(I.getOperand(1), DL); + if (auto *S = dyn_cast(&I)) { + auto *Dst = GetUnderlyingObject(S->getPointerOperand(), DL); if (Dst == &Array) { - int32_t AccessedIdx = getAccessedIdx(*cast(&I)); - if (AccessedIdx < 0) { + int32_t Idx = getAccessedIdx(*S); + if (Idx < 0) { LLVM_DEBUG(dbgs() << TAG << "Unexpected StoreInst\n"); return false; } - StoredValues[AccessedIdx] = GetUnderlyingObject(I.getOperand(0), DL); + StoredValues[Idx] = GetUnderlyingObject(S->getValueOperand(), DL); + LastAccesses[Idx] = S; } } } - return isFilled(StoredValues); + return isFilled(); } int32_t OffloadArray::getAccessedIdx(StoreInst &S) { @@ -411,10 +583,12 @@ return OpenMPOpt::getIntLiteral(ArrayIdx->get()); } -bool OffloadArray::isFilled(const SmallVectorImpl &V) { - for (auto *E : V) - if (!E) +bool OffloadArray::isFilled() { + const unsigned NumValues = StoredValues.size(); + for (unsigned I = 0; I < NumValues; ++I) { + if (!StoredValues[I] || !LastAccesses[I]) return false; + } return true; } @@ -754,6 +928,7 @@ if (!RTCall) return false; + errs() << "---------------------- In function\n"; RTCall->getCaller()->print(errs()); errs() << "\n"; MemoryTransfer MT(RTCall, OMPInfoCache); bool Success = MT.getValuesInOffloadArrays(); if (!Success) { @@ -762,6 +937,18 @@ << MT.RuntimeCall->getCaller()->getName() << "\n"); return false; } + + Success = MT.detectIssue(); + if (!Success) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't detect issue in call to " + << MT.RuntimeCall->getName() << " in function " + << MT.RuntimeCall->getCaller()->getName() << "\n"); + return false; + } + + if (auto *I = canBeMovedUpwards(MT)) { + // TODO: Split call and move "issue" below I. + } return false; }; @@ -769,6 +956,38 @@ return Changed; } +Instruction *OpenMPOpt::canBeMovedUpwards(MemoryTransfer &MT) { + assert(MT.Issue.size() > 0 && "There's not set of instructions to be moved!"); + + CallBase *RC = MT.RuntimeCall; + auto *MSSAResult = + OMPInfoCache.getAnalysisResultForFunction( + *RC->getCaller()); + if (!MSSAResult) { + LLVM_DEBUG(dbgs() << TAG << "Couldn't get MemorySSAAnalysis in function " + << RC->getCaller()->getName() << "\n"); + return nullptr; + } + + auto &MSSA = MSSAResult->getMSSA(); + auto *MSSAWalker = MSSA.getWalker(); + const auto *LiveOnEntry = MSSA.getLiveOnEntryDef(); + auto *MemAccess = MSSAWalker->getClobberingMemoryAccess(RC); + + while (MemAccess != LiveOnEntry) { + if (!isa(MemAccess)) + continue; + + auto *MemInst = (cast(MemAccess))->getMemoryInst(); + if (MT.mayBeModifiedBy(MemInst)) + return MemInst; + + MemAccess = MSSAWalker->getClobberingMemoryAccess(MemAccess); + } + + return nullptr; +} + Value *OpenMPOpt::combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, bool GlobalOnly, bool &SingleChoice) { if (CurrentIdent == NextIdent)