Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -481,6 +481,17 @@ return AMemSet; } +static unsigned findCommonAlignement(const DataLayout &DL, const StoreInst *SI, + const LoadInst *LI) { + unsigned storeAlign = SI->getAlignment(); + if (!storeAlign) + storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); + unsigned loadAlign = LI->getAlignment(); + if (!loadAlign) + loadAlign = DL.getABITypeAlignment(LI->getType()); + + return std::min(storeAlign, loadAlign); +} bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; @@ -496,12 +507,66 @@ const DataLayout &DL = SI->getModule()->getDataLayout(); - // Detect cases where we're performing call slot forwarding, but - // happen to be using a load-store pair to implement it, rather than - // a memcpy. + // Load to tore forwarding can be interpreted as memcpy. if (LoadInst *LI = dyn_cast(SI->getOperand(0))) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { + + DEBUG(dbgs() << "Forwarding from " << *LI << " to " << *SI << "\n"); + + auto *T = LI->getType(); + if (T->isAggregateType()) { + AliasAnalysis &AA = getAnalysis().getAAResults(); + MemoryLocation LoadLoc = MemoryLocation::get(LI); + Instruction *AI = nullptr; + for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator(); + I != E; ++I) { + if (AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod) { + AI = &*I; + break; + } + } + + if (!AI) { + // If the dest of the second might alias the source of the first, then the + // source and dest might overlap. We still want to eliminate the intermediate + // value, but we have to generate a memmove instead of memcpy. + bool UseMemMove = false; + if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc)) + UseMemMove = true; + + unsigned Align = findCommonAlignement(DL, SI, LI); + unsigned Size = DL.getTypeStoreSize(T); + + IRBuilder<> Builder(SI); + Instruction *M; + if (UseMemMove) + M = Builder.CreateMemMove(SI->getPointerOperand(), + LI->getPointerOperand(), Size, + Align, SI->isVolatile()); + else + M = Builder.CreateMemCpy(SI->getPointerOperand(), + LI->getPointerOperand(), Size, + Align, SI->isVolatile()); + + DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI + << " => " << *M << "\n"); + + MD->removeInstruction(SI); + SI->eraseFromParent(); + MD->removeInstruction(LI); + LI->eraseFromParent(); + ++NumMemCpyInstr; + + // Make sure we do not invalidate the iterrator. + BBI = M->getIterator(); + return true; + } + } + + // Detect cases where we're performing call slot forwarding, but + // happen to be using a load-store pair to implement it, rather than + // a memcpy. MemDepResult ldep = MD->getDependency(LI); CallInst *C = nullptr; if (ldep.isClobber() && !isa(ldep.getInst())) @@ -522,18 +587,11 @@ } if (C) { - unsigned storeAlign = SI->getAlignment(); - if (!storeAlign) - storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); - unsigned loadAlign = LI->getAlignment(); - if (!loadAlign) - loadAlign = DL.getABITypeAlignment(LI->getType()); - bool changed = performCallSlotOptzn( LI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), - std::min(storeAlign, loadAlign), C); + findCommonAlignement(DL, SI, LI), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); Index: test/Transforms/MemCpyOpt/fca2memcpy.ll =================================================================== --- /dev/null +++ test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -0,0 +1,47 @@ +; RUN: opt -memcpyopt -S < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +%S = type { i8*, i32 } + +define void @copy(%S* %src, %S* %dst) { +; CHECK-LABEL: copy +; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 +; CHECK-NEXT: ret void + %1 = load %S, %S* %src + store %S %1, %S* %dst + ret void +} + +define void @noaliassrc(%S* noalias %src, %S* %dst) { +; CHECK-LABEL: noaliassrc +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NEXT: ret void + %1 = load %S, %S* %src + store %S %1, %S* %dst + ret void +} + +define void @noaliasdst(%S* %src, %S* noalias %dst) { +; CHECK-LABEL: noaliasdst +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NEXT: ret void + %1 = load %S, %S* %src + store %S %1, %S* %dst + ret void +} + +define void @copyalias(%S* %src, %S* %dst) { +; CHECK-LABEL: copyalias +; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src +; CHECK-NOT: load +; CHECK: call void @llvm.memmove.p0i8.p0i8.i64 +; CHECK-NEXT: store %S [[LOAD]], %S* %dst +; CHECK-NEXT: ret void + %1 = load %S, %S* %src + %2 = load %S, %S* %src + store %S %1, %S* %dst + store %S %2, %S* %dst + ret void +}