Index: /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp =================================================================== --- /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp +++ /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp @@ -83,6 +83,9 @@ bool combineLoads(DenseMap> &); bool aggregateLoads(SmallVectorImpl &); bool combineLoads(SmallVectorImpl &); + bool + checkGenericInstAlias(Instruction *, + DenseMap> &); }; } @@ -117,12 +120,6 @@ DenseMap> &LoadMap) { bool Combined = false; for (auto &Loads : LoadMap) { - if (Loads.second.size() < 2) - continue; - std::sort(Loads.second.begin(), Loads.second.end(), - [](const LoadPOPPair &A, const LoadPOPPair &B) { - return A.POP.Offset < B.POP.Offset; - }); if (aggregateLoads(Loads.second)) Combined = true; } @@ -134,7 +131,14 @@ /// It is guaranteed that no writes occur between any of the loads. All loads /// have the same base pointer. There are at least two loads. bool LoadCombine::aggregateLoads(SmallVectorImpl &Loads) { - assert(Loads.size() >= 2 && "Insufficient loads!"); + if(Loads.size() < 2) + return false; + + std::sort(Loads.begin(), Loads.end(), + [](const LoadPOPPair &A, const LoadPOPPair &B) { + return A.POP.Offset < B.POP.Offset; + }); + LoadInst *BaseLoad = nullptr; SmallVector AggregateLoads; bool Combined = false; @@ -226,6 +230,39 @@ return true; } +bool LoadCombine::checkGenericInstAlias( + Instruction *V, + DenseMap> &LoadMap) { + bool Combined = false; + + // Loop over each load and check to see if it is aliased by + // this instruction. + auto LoadMapEnd = LoadMap.end(); + for (auto Loads = LoadMap.begin(); Loads != LoadMapEnd;) { + // Check the AA results for an alias. + // FIXME:We could calculate the size for the memory location instead of + // leaving it unknown. + auto RefInfo = + AA->getModRefInfo(V, MemoryLocation(Loads->first)); + + if (RefInfo & ModRefInfo::MRI_Mod) { + // Try to aggregate. + if(aggregateLoads(Loads->second)) + Combined = true; + + // Get the current iterator. + auto CurLoads = Loads++; + + // Remove the loads. + LoadMap.erase(CurLoads); + continue; + } + + ++Loads; + } + return Combined; +} + bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { if (skipBasicBlock(BB)) return false; @@ -237,16 +274,22 @@ Builder = &TheBuilder; DenseMap> LoadMap; - AliasSetTracker AST(*AA); bool Combined = false; unsigned Index = 0; for (auto &I : BB) { - if (I.mayThrow() || (I.mayWriteToMemory() && AST.containsUnknown(&I))) { + // If this instruction may throw then we need to combine the loadmap now. + if (I.mayThrow()) { if (combineLoads(LoadMap)) Combined = true; LoadMap.clear(); - AST.clear(); + continue; + } + // Check if the instruction might write to memory. + if (I.mayWriteToMemory()) { + // Check for an alias from generic analysis. + if (checkGenericInstAlias(&I, LoadMap)) + Combined = true; continue; } LoadInst *LI = dyn_cast(&I); @@ -259,7 +302,6 @@ if (!POP.Pointer) continue; LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++)); - AST.add(LI); } if (combineLoads(LoadMap)) Combined = true; Index: /Users/rriddle/Desktop/llvm/llvm/test/Transforms/LoadCombine/load-combine-alias.ll =================================================================== --- /Users/rriddle/Desktop/llvm/llvm/test/Transforms/LoadCombine/load-combine-alias.ll +++ /Users/rriddle/Desktop/llvm/llvm/test/Transforms/LoadCombine/load-combine-alias.ll @@ -0,0 +1,46 @@ +; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + + +define void @load_combine(i32* noalias %a, i32* noalias %b, i32* noalias %c){ +entry: + %arrayidx = getelementptr inbounds i32, i32* %b,i64 0 + %0 = load i32, i32* %arrayidx, align 16 + %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 0 + store i32 %0, i32* %arrayidx1, align 16 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 1 + %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 1 + %c.gep.1 = getelementptr inbounds i32, i32* %c, i64 0 + %c.load.1 = load i32, i32* %c.gep.1, align 8 + store i32 %1, i32* %c.gep.1, align 4 + store i32 %c.load.1, i32* %arrayidx3, align 4 + %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 2 + %2 = load i32, i32* %arrayidx4, align 8 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 2 + store i32 %2, i32* %arrayidx5, align 8 + %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 3 + %3 = load i32, i32* %arrayidx6, align 4 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 3 + store i32 %3, i32* %arrayidx7, align 4 + ret void +; CHECK-LABEL:@load_combine( +; CHECK-NOT: load i64 +} + +define void @load_nocombine(i32* %a, i32* %b) { +entry: + %arrayidx = getelementptr inbounds i32, i32* %b, i64 0 + %0 = load i32, i32* %arrayidx, align 16 + %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 0 + store i32 %0, i32* %arrayidx1, align 16 + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 1 + %1 = load i32, i32* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 1 + store i32 %1, i32* %arrayidx3, align 4 + ret void +; CHECK-LABEL:@load_nocombine( +; CHECK-NOT: load i64, i64 +} +