Index: /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp =================================================================== --- /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp +++ /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp @@ -83,6 +83,14 @@ bool combineLoads(DenseMap> &); bool aggregateLoads(SmallVectorImpl &); bool combineLoads(SmallVectorImpl &); + + bool + checkPointerAlias(Value *, + DenseMap> &); + bool + checkGenericInstAlias(Instruction *, + DenseMap> &); + const Value *getTopLevelPointer(const Value *); }; } @@ -226,6 +234,83 @@ return true; } +// Handle an alias from a store inst. +bool LoadCombine::checkPointerAlias( + Value *SIPtrOp, + DenseMap> &LoadMap) { + // Get the top level pointer parent. + const Value *Parent = getTopLevelPointer(SIPtrOp); + if (!Parent) + return false; + + // Check the load map for uses. + auto Loads = LoadMap.find(Parent); + if (Loads == LoadMap.end()) + return false; + + bool Combined = false; + + // Check for single load. + if (Loads->second.size() > 1) { + // Combine the loads and clear the maps. + if (aggregateLoads(Loads->second)) + Combined = true; + } + + LoadMap.erase(Loads); + return Combined; +} + +// Get the top level element pointer. +const Value *LoadCombine::getTopLevelPointer(const Value *V) { + if (auto *GEP = dyn_cast(V)) + return getTopLevelPointer(GEP->getPointerOperand()); + if (auto *BC = dyn_cast(V)) + return getTopLevelPointer(BC->getOperand(0)); + if (V->getType()->isPointerTy() == false) + return nullptr; + return V; +} + +bool LoadCombine::checkGenericInstAlias( + Instruction *V, + DenseMap> &LoadMap) { + // If we combined or not. + bool Combined = false; + + // Loop over each load and check to see if it is aliased by + // this instruction. + auto LoadMapEnd = LoadMap.end(); + for (auto Loads = LoadMap.begin(); Loads != LoadMapEnd;) { + // Check the AA results for an alias. + // FIXME:We could calculate the size for the memory location instead of + // leaving it unknown. + auto RefInfo = + AA->getModRefInfo(V, MemoryLocation(getTopLevelPointer(Loads->first))); + + // Check the result. + if (RefInfo == ModRefInfo::MRI_Mod || RefInfo == ModRefInfo::MRI_ModRef) { + // Check the load count and try to aggregate. + if (Loads->second.size() > 1 && aggregateLoads(Loads->second)) + Combined = true; + + // Get the current iterator. + auto CurLoads = Loads++; + + // Remove the loads. + LoadMap.erase(CurLoads); + + // Update the end iterator and continue. + LoadMapEnd = LoadMap.end(); + continue; + } + + // Update the iterator. + ++Loads; + } + return Combined; +} + bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { if (skipBasicBlock(BB)) return false; @@ -237,16 +322,33 @@ Builder = &TheBuilder; DenseMap> LoadMap; - AliasSetTracker AST(*AA); bool Combined = false; unsigned Index = 0; for (auto &I : BB) { - if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) { + // If this instruction may throw then we need to combine the loadmap now. + if (I.mayThrow()) { if (combineLoads(LoadMap)) Combined = true; LoadMap.clear(); - AST.clear(); + continue; + } + // Check if the instruction might write to memory. + if (I.mayWriteToMemory()) { + // FIXME:Could add specializations for the other simple mayWrite ops. + // Check to see if this is a store instance. Provide a special check for + // store instances because they are the most common and can be handled + // quickly. + if (StoreInst *SI = dyn_cast(&I)) { + // Check to see if we are using a pointer operand that is aliasing our + // loadmap. + if (checkPointerAlias(SI->getPointerOperand(), LoadMap)) + Combined = true; + continue; + } + // Check for an alias from generic analysis. + if (checkGenericInstAlias(&I, LoadMap)) + Combined = true; continue; } LoadInst *LI = dyn_cast(&I); @@ -259,7 +361,6 @@ if (!POP.Pointer) continue; LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++)); - AST.add(LI); } if (combineLoads(LoadMap)) Combined = true; @@ -274,4 +375,4 @@ INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) +INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) \ No newline at end of file Index: /Users/rriddle/Desktop/llvm/llvm/test/Transforms/LoadCombine/load-combine-alias.ll =================================================================== --- /Users/rriddle/Desktop/llvm/llvm/test/Transforms/LoadCombine/load-combine-alias.ll +++ /Users/rriddle/Desktop/llvm/llvm/test/Transforms/LoadCombine/load-combine-alias.ll @@ -0,0 +1,32 @@ +; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +%struct.Str = type { i32 } + +define i32 @Load_i128(i32* %i, %struct.Str** %str) { +entry: + %0 = load i32, i32* %i, align 4 + %1 = load %struct.Str*, %struct.Str** %str, align 8 + %i1 = getelementptr inbounds %struct.Str, %struct.Str* %1, i64 0, i32 0 + store i32 %0, i32* %i1, align 4 + %add.ptr = getelementptr inbounds i32, i32* %i, i64 1 + %2 = load i32, i32* %add.ptr, align 4 + %arrayidx = getelementptr inbounds %struct.Str*, %struct.Str** %str, i64 1 + %3 = load %struct.Str*, %struct.Str** %arrayidx, align 8 + %i2 = getelementptr inbounds %struct.Str, %struct.Str* %3, i64 0, i32 0 + store i32 %2, i32* %i2, align 4 + %add.ptr3 = getelementptr inbounds i32, i32* %i, i64 2 + %4 = load i32, i32* %add.ptr3, align 4 + %arrayidx4 = getelementptr inbounds %struct.Str*, %struct.Str** %str, i64 2 + %5 = load %struct.Str*, %struct.Str** %arrayidx4, align 8 + %i5 = getelementptr inbounds %struct.Str, %struct.Str* %5, i64 0, i32 0 + store i32 %4, i32* %i5, align 4 + %add.ptr6 = getelementptr inbounds i32, i32* %i, i64 3 + %6 = load i32, i32* %add.ptr6, align 4 + ret i32 %6 +; CHECK-LABEL: @Load_i128( +; CHECK: load i128, i128* %{{.*}}, align 4 +; CHECK-NOT: load i32 +} +