diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -26,6 +26,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" +#include "llvm/Analysis/ValueTracking.h" using namespace llvm; using namespace omp; @@ -442,6 +443,117 @@ SmallPtrSetImpl &Kernels; }; +/// Used to map the values physically (in the IR) stored in an offload +/// array, to a vector in memory. +struct OffloadArray { + AllocaInst &Array; /// Physical array (in the IR). + SmallVector StoredValues; /// Mapped values. + SmallVector LastAccesses; + + OffloadArray(AllocaInst &Array) : Array(Array) {} + + /// Factory function for creating and initializing the OffloadArray with + /// the values stored in \p Array before the instruction \p Before is + /// reached. + /// This MUST be used instead of the constructor. + static std::unique_ptr initialize( + AllocaInst &Array, Instruction &Before) { + if (!Array.getAllocatedType()->isArrayTy()) + return nullptr; + + auto OA = std::make_unique(Array); + if (!OA->getValues(Before)) + return nullptr; + + return OA; + } + + /// Traverses the BasicBlock where Array is, collecting the stores made to + /// Array, leaving StoredValues with the values stored before the instruction + /// \p Before is reached. + bool getValues(Instruction &Before) { + // Initialize container. + const uint64_t NumValues = + Array.getAllocatedType()->getArrayNumElements(); + StoredValues.assign(NumValues, nullptr); + LastAccesses.assign(NumValues, nullptr); + + // TODO: This assumes the instruction \p Before is in the same + // BasicBlock as Array. Make it general, for any control flow graph. + auto *BB = Array.getParent(); + if (BB != Before.getParent()) + return false; + + for (auto &I : *BB) { + if (&I == &Before) break; + + if (!isa(&I)) + continue; + + auto *S = cast(&I); + auto *Dst = getUnderlyingObject(S->getPointerOperand()); + if (Dst == &Array) { + int64_t Idx = getAccessedIdx(*S); + // Unexpected StoreInst. + if (Idx < 0) + return false; + + StoredValues[Idx] = getUnderlyingObject(S->getValueOperand()); + LastAccesses[Idx] = S; + } + } + + return isFilled(); + } + + /// Returns the Array's index where the store is being made + /// Returns -1 if the index can't be deduced. Assumes \p S as a store + /// to Array. + int64_t getAccessedIdx(StoreInst &S) { + auto *Dst = S.getOperand(1); + if (!isa(Dst)) + return -1; // Unrecognized store pattern. + + auto *DstInst = cast(Dst); + Value *Access = DstInst; + if (DstInst->isCast()) { + Access = DstInst->getOperand(0); + + // Direct cast from the AllocaInst, which means a store to the + // first position of the array. + if (Access == &Array) return 0; + } + + if (!isa(Access)) + return -1; // Unrecognized store pattern. + + auto *GEPInst = cast(Access); + if (!GEPInst->hasIndices()) + return -1; // Unrecognized store pattern. + + auto *ArrayIdx = GEPInst->idx_begin() + 1; + if (ArrayIdx == GEPInst->idx_end()) + return -1; // Unrecognized store pattern. + + cast(ArrayIdx->get())->getValue(); + return cast(ArrayIdx->get())->getZExtValue(); + } + + /// Returns true if all values in StoredValues and + /// LastAccesses are not nullptrs. + bool isFilled() { + const unsigned NumValues = StoredValues.size(); + for (unsigned I = 0; I < NumValues; ++I) { + if (!StoredValues[I] || !LastAccesses[I]) + return false; + } + + return true; + } +}; + +using OffloadArrayPtr = std::unique_ptr; + struct OpenMPOpt { using OptimizationRemarkGetter = @@ -652,6 +764,10 @@ if (!RTCall) return false; + OffloadArrayPtr OffloadArrays[3]; + if (!getValuesInOffloadArrays(*RTCall, OffloadArrays)) + return false; + // TODO: Check if can be moved upwards. bool WasSplit = false; Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); @@ -666,6 +782,59 @@ return Changed; } + /// Maps the values stored in the offload arrays passed as arguments to + /// \p RuntimeCall into the offload arrays in \p OAs. + bool getValuesInOffloadArrays(CallInst &RuntimeCall, OffloadArrayPtr OAs[3]) { + // A runtime call that involves memory offloading looks something like: + // call void @__tgt_target_data_begin_mapper(arg0, arg1, + // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes, + // ...) + // So, the idea is to access the allocas that allocate space for these + // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes. + // Therefore: + // i8** %offload_baseptrs. + const unsigned BasePtrsArgNum = 2; + Value *BasePtrsArg = RuntimeCall.getArgOperand(BasePtrsArgNum); + // i8** %offload_ptrs. + const unsigned PtrsArgNum = 3; + Value *PtrsArg = RuntimeCall.getArgOperand(PtrsArgNum); + // i8** %offload_sizes. + const unsigned SizesArgNum = 4; + Value *SizesArg = RuntimeCall.getArgOperand(SizesArgNum); + + // Get values stored in **offload_baseptrs. + auto *V = getUnderlyingObject(BasePtrsArg); + if (!isa(V)) + return false; + auto *BasePtrsArray = cast(V); + OAs[0] = OffloadArray::initialize(*BasePtrsArray, RuntimeCall); + if (!OAs[0]) + return false; + + // Get values stored in **offload_baseptrs. + V = getUnderlyingObject(PtrsArg); + if (!isa(V)) + return false; + auto *PtrsArray = cast(V); + OAs[1] = OffloadArray::initialize(*PtrsArray, RuntimeCall); + if (!OAs[1]) + return false; + + // Get values stored in **offload_baseptrs. + V = getUnderlyingObject(SizesArg); + if (!isa(V)) { + if (!isa(V)) + return false; + + auto *SizesArray = cast(V); + OAs[2] = OffloadArray::initialize(*SizesArray, RuntimeCall); + if (!OAs[2]) + return false; + } + + return true; + } + /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be /// moved. Returns nullptr if the movement is not possible, or not worth it. Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {