Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -522,26 +523,110 @@ // such an instruction is found, we try to promote there instead // of at the store position. Instruction *P = SI; - for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator(); + for (auto I = ++LI->getIterator(), E = SI->getIterator(); I != E; ++I) { if (!(AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod)) continue; - // We found an instruction that may write to the loaded memory. - // We can try to promote at this position instead of the store - // position if nothing alias the store memory after this and the store - // destination is not in the range. P = &*I; - for (; I != E; ++I) { - MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (&*I == SI->getOperand(1) || - AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) { - P = nullptr; - break; + } + + // We found an instruction that may write to the loaded memory. + // We can try to promote at this position instead of the store + // position if nothing alias the store memory after this and the store + // destination is not in the range. + if (P && P != SI) { + // If the store alias this position, early bail out. + MemoryLocation StoreLoc = MemoryLocation::get(SI); + if (AA.getModRefInfo(P, StoreLoc) != MRI_NoModRef) + P = nullptr; + else { + // Keep track of the store's argument so that we move them up + // as well if possible and necessary. + DenseSet Args; + if (auto *Ptr = dyn_cast(SI->getPointerOperand())) + if (Ptr->getParent() == SI->getParent()) + Args.insert(Ptr); + + // Instruction to lift before P. + SmallVector ToLift; + + // Memory locations of lifted instructions. + SmallVector MemLocs; + MemLocs.push_back(StoreLoc); + + // Lifted callsites. + SmallVector CallSites; + + for (auto I = --SI->getIterator(), E = P->getIterator(); + I != E; --I) { + auto *C = &*I; + + bool MayAlias = AA.getModRefInfo(C) != MRI_NoModRef; + + bool NeedLift = false; + if (Args.erase(C)) + NeedLift = true; + else if (MayAlias) { + for (auto ML : MemLocs) { + if (AA.getModRefInfo(C, ML)) { + NeedLift = true; + break; + } + } + + if (!NeedLift) + for (auto CS : CallSites) { + if (AA.getModRefInfo(C, CS)) { + NeedLift = true; + break; + } + } + } + + if (NeedLift) { + if (MayAlias) { + if (auto CS = ImmutableCallSite(C)) { + // If we can't lift this before P, it's game over. + if (AA.getModRefInfo(P, CS) != MRI_NoModRef) { + P = nullptr; + break; + } + + CallSites.push_back(CS); + } else if (isa(C) || isa(C) || + isa(C)) { + // If we can't lift this before P, it's game over. + auto ML = MemoryLocation::get(C); + if (AA.getModRefInfo(P, ML) != MRI_NoModRef) { + P = nullptr; + break; + } + + MemLocs.push_back(ML); + } else { + // We don't know how to lift this instruction. + P = nullptr; + break; + } + } + + ToLift.push_back(C); + for(unsigned k = 0, e = C->getNumOperands(); k != e; ++k) + if (auto *A = dyn_cast(C->getOperand(k))) + if (A->getParent() == SI->getParent()) + Args.insert(A); + } } - } - break; + // We made it, we need to lift + if (P) { + for (auto *I : reverse(ToLift)) { + DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); + I->moveBefore(P); + } + } + } } // If a valid insertion position is found, then we can promote Index: test/Transforms/MemCpyOpt/fca2memcpy.ll =================================================================== --- test/Transforms/MemCpyOpt/fca2memcpy.ll +++ test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -73,16 +73,38 @@ ret void } - -; The GEP is present after the aliasing store, preventing to move the memcpy before -; (without further analysis/transformation) -define void @copyaliaswithproducerinbetween(%S* %src, %S* %dst) { -; CHECK-LABEL: copyalias -; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src -; CHECK-NOT: call +; If the store address is computed ina complex manner, make +; sure we lift the computation as well if needed and possible. +define void @addrproducer(%S* %src, %S* %dst) { +; CHECK-LABEL: addrproducer +; CHECK: %dst2 = getelementptr %S, %S* %dst, i64 1 +; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 +; CHECK-NEXT: store %S undef, %S* %dst +; CHECK-NEXT: ret void %1 = load %S, %S* %src store %S undef, %S* %dst %dst2 = getelementptr %S , %S* %dst, i64 1 store %S %1, %S* %dst2 ret void } + +define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) { +; CHECK-LABEL: aliasaddrproducer + %1 = load %S, %S* %src + store %S undef, %S* %dst + %dstindex = load i32, i32* %dstidptr + %dst2 = getelementptr %S , %S* %dst, i32 %dstindex + store %S %1, %S* %dst2 + ret void +} + +define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) { +; CHECK-LABEL: noaliasaddrproducer + %1 = load %S, %S* %src + store %S undef, %S* %src + %2 = load i32, i32* %dstidptr + %dstindex = or i32 %2, 1 + %dst2 = getelementptr %S , %S* %dst, i32 %dstindex + store %S %1, %S* %dst2 + ret void +}