Index: /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp =================================================================== --- /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp +++ /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp @@ -40,7 +40,7 @@ namespace { struct PointerOffsetPair { Value *Pointer; - uint64_t Offset; + int64_t Offset; }; struct LoadPOPPair { @@ -82,7 +82,16 @@ PointerOffsetPair getPointerOffsetPair(LoadInst &); bool combineLoads(DenseMap> &); bool aggregateLoads(SmallVectorImpl &); - bool combineLoads(SmallVectorImpl &); + bool combineLoads(SmallVectorImpl &, int = 0); + bool combineLoadsSection(SmallVectorImpl &, unsigned, unsigned, + unsigned); + bool + checkStoreInstAlias(Value *, + DenseMap> &); + bool + checkGenericInstAlias(Instruction *, + DenseMap> &); + const Value *getTopLevelPointer(const Value *); }; } @@ -102,7 +111,7 @@ unsigned BitWidth = DL.getPointerTypeSizeInBits(GEP->getType()); APInt Offset(BitWidth, 0); if (GEP->accumulateConstantOffset(DL, Offset)) - POP.Offset += Offset.getZExtValue(); + POP.Offset += Offset.getSExtValue(); else // Can't handle GEPs with variable indices. return POP; @@ -138,10 +147,10 @@ LoadInst *BaseLoad = nullptr; SmallVector AggregateLoads; bool Combined = false; - uint64_t PrevOffset = -1ull; - uint64_t PrevSize = 0; + int64_t PrevOffset = LLONG_MAX; + int64_t PrevSize = 0; for (auto &L : Loads) { - if (PrevOffset == -1ull) { + if (PrevOffset == LLONG_MAX) { BaseLoad = L.Load; PrevOffset = L.POP.Offset; PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( @@ -156,7 +165,7 @@ if (combineLoads(AggregateLoads)) Combined = true; AggregateLoads.clear(); - PrevOffset = -1; + PrevOffset = LLONG_MAX; continue; } if (L.POP.Offset != PrevOffset + PrevSize) @@ -174,16 +183,44 @@ } /// \brief Given a list of combinable load. Combine the maximum number of them. -bool LoadCombine::combineLoads(SmallVectorImpl &Loads) { +bool LoadCombine::combineLoads(SmallVectorImpl &Loads, + int Offset) { + bool Combined = false; + + // Set the count of LoadPOPPairs to consider. + unsigned LoadCount = Loads.size(); + // Remove loads from the end while the size is not a power of 2. unsigned TotalSize = 0; - for (const auto &L : Loads) - TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); - while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) - TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); - if (Loads.size() < 2) - return false; + auto LoadsEnd = Loads.end(); + for (auto L = Loads.begin() + Offset; L != LoadsEnd; ++L) + TotalSize += L->Load->getType()->getPrimitiveSizeInBits(); + while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) { + --LoadCount; + TotalSize -= Loads[LoadCount].Load->getType()->getPrimitiveSizeInBits(); + } + + // Combine the first section of loads and make sure that we are combining more + // than 1. + if (LoadCount - Offset > 1 && + combineLoadsSection(Loads, Offset, LoadCount - Offset, TotalSize)) + Combined = true; + + // Check for extra loads to consider. + if (Loads.size() - LoadCount < 2) + return Combined; + + // Consider the next section of loads. + if (combineLoads(Loads, LoadCount)) + Combined = true; + + return Combined; +} +// Combine a subsection of a loads array. +bool LoadCombine::combineLoadsSection(SmallVectorImpl &Loads, + unsigned Offset, unsigned Count, + unsigned TotalSize) { DEBUG({ dbgs() << "***** Combining Loads ******\n"; for (const auto &L : Loads) { @@ -191,41 +228,128 @@ } }); + // Begin and end iterators depending on the Offset and count. + const auto LoadsBegin = Loads.begin() + Offset; + const auto LoadsEnd = LoadsBegin + Count; + // Find first load. This is where we put the new load. LoadPOPPair FirstLP; FirstLP.InsertOrder = -1u; - for (const auto &L : Loads) - if (L.InsertOrder < FirstLP.InsertOrder) - FirstLP = L; + for (auto L = LoadsBegin; L != LoadsEnd; ++L) + if (L->InsertOrder < FirstLP.InsertOrder) + FirstLP = *L; + // Get the address space. unsigned AddressSpace = FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); + // The offset into the base pointer. + int64_t PtrOffset = Loads[Offset].POP.Offset; + + // Create a new gep for this pointer and a loadinst for the larger load. Builder->SetInsertPoint(FirstLP.Load); Value *Ptr = Builder->CreateConstGEP1_64( - Builder->CreatePointerCast(Loads[0].POP.Pointer, + Builder->CreatePointerCast(Loads[Offset].POP.Pointer, Builder->getInt8PtrTy(AddressSpace)), - Loads[0].POP.Offset); + PtrOffset); LoadInst *NewLoad = new LoadInst( Builder->CreatePointerCast( Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), Ptr->getType()->getPointerAddressSpace())), - Twine(Loads[0].Load->getName()) + ".combined", false, - Loads[0].Load->getAlignment(), FirstLP.Load); - - for (const auto &L : Loads) { + Twine(Loads[Offset].Load->getName()) + ".combined", false, + Loads[Offset].Load->getAlignment(), FirstLP.Load); + // Create extracts for each of the loads. + for (auto LIter = LoadsBegin; LIter != LoadsEnd; ++LIter) { + auto &L = *LIter; Builder->SetInsertPoint(L.Load); Value *V = Builder->CreateExtractInteger( L.Load->getModule()->getDataLayout(), NewLoad, - cast(L.Load->getType()), - L.POP.Offset - Loads[0].POP.Offset, "combine.extract"); + cast(L.Load->getType()), L.POP.Offset - PtrOffset, + "combine.extract"); L.Load->replaceAllUsesWith(V); } - NumLoadsCombined = NumLoadsCombined + Loads.size(); + NumLoadsCombined = NumLoadsCombined + Count; return true; } +// Handle an alias from a store inst. +bool LoadCombine::checkStoreInstAlias( + Value *SIPtrOp, + DenseMap> &LoadMap) { + // Get the top level pointer parent. + const Value *Parent = getTopLevelPointer(SIPtrOp); + if (!Parent) + return false; + + // Check the load map for uses. + auto Loads = LoadMap.find(Parent); + if (Loads == LoadMap.end()) + return false; + + bool Combined = false; + + // Check for single load. + if (Loads->second.size() > 1) { + // Combine the loads and clear the maps. + if (aggregateLoads(Loads->second)) + Combined = true; + } + + LoadMap.erase(Loads); + return Combined; +} + +// Get the top level element pointer. +const Value *LoadCombine::getTopLevelPointer(const Value *V) { + if (auto *GEP = dyn_cast(V)) + return getTopLevelPointer(GEP->getPointerOperand()); + if (auto *BC = dyn_cast(V)) + return getTopLevelPointer(BC->getOperand(0)); + if (V->getType()->isPointerTy() == false) + return nullptr; + return V; +} + +bool LoadCombine::checkGenericInstAlias( + Instruction *V, + DenseMap> &LoadMap) { + // If we combined or not. + bool Combined = false; + + // Loop over each load and check to see if it is aliased by + // this instruction. + auto LoadMapEnd = LoadMap.end(); + for (auto Loads = LoadMap.begin(); Loads != LoadMapEnd;) { + // Check the AA results for an alias. + // FIXME:We could calculate the size for the memory location instead of + // leaving it unknown. + auto RefInfo = + AA->getModRefInfo(V, MemoryLocation(getTopLevelPointer(Loads->first))); + + // Check the result. + if (RefInfo == ModRefInfo::MRI_Mod || RefInfo == ModRefInfo::MRI_ModRef) { + // Check the load count and try to aggregate. + if (Loads->second.size() > 1 && aggregateLoads(Loads->second)) + Combined = true; + + // Get the current iterator. + auto CurLoads = Loads++; + + // Remove the loads. + LoadMap.erase(CurLoads); + + // Update the end iterator and continue. + LoadMapEnd = LoadMap.end(); + continue; + } + + // Update the iterator. + ++Loads; + } + return Combined; +} + bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { if (skipBasicBlock(BB)) return false; @@ -237,16 +361,33 @@ Builder = &TheBuilder; DenseMap> LoadMap; - AliasSetTracker AST(*AA); bool Combined = false; unsigned Index = 0; for (auto &I : BB) { - if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) { + // If this instruction may throw then we need to combine the loadmap now. + if (I.mayThrow()) { if (combineLoads(LoadMap)) Combined = true; LoadMap.clear(); - AST.clear(); + continue; + } + // Check if the instruction might write to memory. + if (I.mayWriteToMemory()) { + // FIXME:Could add specializations for the other simple mayWrite ops. + // Check to see if this is a store instance. Provide a special check for + // store instances because they are the most common and can be handled + // quickly. + if (StoreInst *SI = dyn_cast(&I)) { + // Check to see if we are using a pointer operand that is aliasing our + // loadmap. + if (checkStoreInstAlias(SI->getPointerOperand(), LoadMap)) + Combined = true; + continue; + } + // Check for an alias from generic analysis. + if (checkGenericInstAlias(&I, LoadMap)) + Combined = true; continue; } LoadInst *LI = dyn_cast(&I); @@ -259,7 +400,6 @@ if (!POP.Pointer) continue; LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++)); - AST.add(LI); } if (combineLoads(LoadMap)) Combined = true; @@ -268,10 +408,10 @@ char LoadCombine::ID = 0; -BasicBlockPass *llvm::createLoadCombinePass() { - return new LoadCombine(); +BasicBlockPass *llvm::createLoadCombinePass() { + return new LoadCombine(); } INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) +INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) \ No newline at end of file