diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -43,6 +43,7 @@ AliasAnalysis *AA = nullptr; AssumptionCache *AC = nullptr; DominatorTree *DT = nullptr; + MemorySSA *MSSA = nullptr; MemorySSAUpdater *MSSAU = nullptr; public: diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -67,7 +67,6 @@ #define DEBUG_TYPE "memcpyopt" -// TODO: Actually implement MemorySSA-based MemCpyOpt. static cl::opt EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(false), cl::Hidden, cl::desc("Use MemorySSA-backed MemCpyOpt.")); @@ -283,7 +282,8 @@ AU.addPreserved(); AU.addPreserved(); AU.addRequired(); - AU.addRequired(); + if (!EnableMemorySSA) + AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); @@ -330,10 +330,37 @@ void MemCpyOptPass::eraseInstruction(Instruction *I) { if (MSSAU) MSSAU->removeMemoryAccess(I); - MD->removeInstruction(I); + if (MD) + MD->removeInstruction(I); I->eraseFromParent(); } +// Check for mod or ref of Loc between Start and End, excluding both boundaries. +// Start and End must be in the same block +static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, + const MemoryUseOrDef *Start, + const MemoryUseOrDef *End) { + assert(Start->getBlock() == End->getBlock() && "Only local supported"); + for (const MemoryAccess &MA : + make_range(++Start->getIterator(), End->getIterator())) { + if (isModOrRefSet(AA.getModRefInfo(cast(MA).getMemoryInst(), + Loc))) + return true; + } + return false; +} + +// Check for mod of Loc between Start and End, excluding both boundaries. +// Start and End can be in different blocks. +static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, + const MemoryUseOrDef *Start, + const MemoryUseOrDef *End) { + // TODO: Only walk until we hit Start. + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + End->getDefiningAccess(), Loc); + return !MSSA->dominates(Clobber, Start); +} + /// When scanning forward over instructions, we look for some other patterns to /// fold away. In particular, this looks for stores to neighboring locations of /// memory. If it sees enough consecutive ones, it attempts to merge them @@ -645,6 +672,7 @@ // the memory we load from in between the load and the store. If // such an instruction is found, we try to promote there instead // of at the store position. + // TODO: Can use MSSA for this. Instruction *P = SI; for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { if (isModSet(AA->getModRefInfo(&I, LoadLoc))) { @@ -709,20 +737,37 @@ // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than // a memcpy. - MemDepResult ldep = MD->getDependency(LI); CallInst *C = nullptr; - if (ldep.isClobber() && !isa(ldep.getInst())) - C = dyn_cast(ldep.getInst()); + if (EnableMemorySSA) { + if (auto *LoadClobber = dyn_cast( + MSSA->getWalker()->getClobberingMemoryAccess(LI))) { + // The load most post-dom the call. Limit to the same block for now. + // TODO: Support non-local call-slot optimization? + if (LoadClobber->getBlock() == SI->getParent()) + C = dyn_cast_or_null(LoadClobber->getMemoryInst()); + } + } else { + MemDepResult ldep = MD->getDependency(LI); + if (ldep.isClobber() && !isa(ldep.getInst())) + C = dyn_cast(ldep.getInst()); + } if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. MemoryLocation StoreLoc = MemoryLocation::get(SI); - for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator(); - I != E; --I) { - if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) { + if (EnableMemorySSA) { + if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), + MSSA->getMemoryAccess(SI))) C = nullptr; - break; + } else { + for (BasicBlock::iterator I = --SI->getIterator(), + E = C->getIterator(); + I != E; --I) { + if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) { + C = nullptr; + break; + } } } } @@ -972,7 +1017,8 @@ // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. - MD->removeInstruction(C); + if (MD) + MD->removeInstruction(C); // Update AA metadata // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be @@ -1020,14 +1066,21 @@ // // TODO: If the code between M and MDep is transparent to the destination "c", // then we could still perform the xform by moving M up to the first memcpy. - // - // NOTE: This is conservative, it will stop on any read from the source loc, - // not just the defining memcpy. - MemDepResult SourceDep = - MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, - M->getIterator(), M->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; + if (EnableMemorySSA) { + // TODO: It would be sufficient to check the MDep source up to the memcpy + // size of M, rather than MDep. + if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) + return false; + } else { + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = + MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, + M->getIterator(), M->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + } // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate @@ -1095,12 +1148,24 @@ LocationSize::precise(1)))) return false; - // Check that there are no other dependencies on the memset destination. - MemDepResult DstDepInfo = - MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false, - MemCpy->getIterator(), MemCpy->getParent()); - if (DstDepInfo.getInst() != MemSet) - return false; + if (EnableMemorySSA) { + // We know that dst up to src_size is not written. We now need to make sure + // that dst up to dst_size is not accessed. (If we did not move the memset, + // checking for reads would be sufficient.) + if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet), + MSSA->getMemoryAccess(MemSet), + MSSA->getMemoryAccess(MemCpy))) { + return false; + } + } else { + // We have already checked that dst up to src_size is not accessed. We + // need to make sure that there are no accesses up to dst_size either. + MemDepResult DstDepInfo = MD->getPointerDependencyFrom( + MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(), + MemCpy->getParent()); + if (DstDepInfo.getInst() != MemSet) + return false; + } // Use the same i8* dest as the memcpy, killing the memset dest if different. Value *Dest = MemCpy->getRawDest(); @@ -1172,6 +1237,24 @@ return false; } +static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, + MemoryDef *Def, ConstantInt *Size) { + if (MSSA->isLiveOnEntryDef(Def)) + return isa(getUnderlyingObject(V)); + + if (IntrinsicInst *II = + dyn_cast_or_null(Def->getMemoryInst())) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + ConstantInt *LTSize = cast(II->getArgOperand(0)); + if (AA->isMustAlias(V, II->getArgOperand(1)) && + LTSize->getZExtValue() >= Size->getZExtValue()) + return true; + } + } + + return false; +} + /// Transform memcpy to memset when its source was just memset. /// In other words, turn: /// \code @@ -1207,12 +1290,24 @@ // interested in the bytes from MemSetSize..CopySize here, but as we can't // easily represent this location, we use the full 0..CopySize range. MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); - MemDepResult DepInfo = MD->getPointerDependencyFrom( - MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent()); - if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) - CopySize = MemSetSize; - else + bool CanReduceSize = false; + if (EnableMemorySSA) { + MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + MemSetAccess->getDefiningAccess(), MemCpyLoc); + if (auto *MD = dyn_cast(Clobber)) + if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize)) + CanReduceSize = true; + } else { + MemDepResult DepInfo = MD->getPointerDependencyFrom( + MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent()); + if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) + CanReduceSize = true; + } + + if (!CanReduceSize) return false; + CopySize = MemSetSize; } IRBuilder<> Builder(MemCpy); @@ -1267,63 +1362,140 @@ return true; } - MemDepResult DepInfo = MD->getDependency(M); - - // Try to turn a partially redundant memset + memcpy into - // memcpy + smaller memset. We don't need the memcpy size for this. - if (DepInfo.isClobber()) - if (MemSetInst *MDep = dyn_cast(DepInfo.getInst())) - if (processMemSetMemCpyDependence(M, MDep)) - return true; + if (EnableMemorySSA) { + MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); + MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); + MemoryLocation DestLoc = MemoryLocation::getForDest(M); + const MemoryAccess *DestClobber = + MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); + + // Try to turn a partially redundant memset + memcpy into + // memcpy + smaller memset. We don't need the memcpy size for this. + // The memcpy most post-dom the memset, so limit this to the same basic + // block. A non-local generalization is likely not worthwhile. + if (auto *MD = dyn_cast(DestClobber)) + if (auto *MDep = dyn_cast_or_null(MD->getMemoryInst())) + if (DestClobber->getBlock() == M->getParent()) + if (processMemSetMemCpyDependence(M, MDep)) + return true; + + // The optimizations after this point require the memcpy size. + ConstantInt *CopySize = dyn_cast(M->getLength()); + if (!CopySize) return false; + + MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( + AnyClobber, MemoryLocation::getForSource(M)); + + // There are four possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. + // c) memcpy from freshly alloca'd space or space that has just started + // its lifetime copies undefined data, and we can therefore eliminate + // the memcpy in favor of the data that was already at the destination. + // d) memcpy from a just-memset'd source can be turned into memset. + if (auto *MD = dyn_cast(SrcClobber)) { + if (Instruction *MI = MD->getMemoryInst()) { + if (auto *C = dyn_cast(MI)) { + // The memcpy must post-dom the call. Limit to the same block for now. + // Additionally, we need to ensure that there are no accesses to dest + // between the call and the memcpy. Accesses to src will be checked + // by performCallSlotOptzn(). + // TODO: Support non-local call-slot optimization? + if (C->getParent() == M->getParent() && + !accessedBetween(*AA, DestLoc, MD, MA)) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); + if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), Alignment, C)) { + LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" + << " call: " << *C << "\n" + << " memcpy: " << *M << "\n"); + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + } + } + if (auto *MDep = dyn_cast(MI)) + return processMemCpyMemCpyDependence(M, MDep); + if (auto *MDep = dyn_cast(MI)) { + if (performMemCpyToMemSetOptzn(M, MDep)) { + LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); + eraseInstruction(M); + ++NumCpyToSet; + return true; + } + } + } - // The optimizations after this point require the memcpy size. - ConstantInt *CopySize = dyn_cast(M->getLength()); - if (!CopySize) return false; - - // There are four possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE. - // b) call-memcpy xform for return slot optimization. - // c) memcpy from freshly alloca'd space or space that has just started its - // lifetime copies undefined data, and we can therefore eliminate the - // memcpy in favor of the data that was already at the destination. - // d) memcpy from a just-memset'd source can be turned into memset. - if (DepInfo.isClobber()) { - if (CallInst *C = dyn_cast(DepInfo.getInst())) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), Alignment, C)) { + if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) { + LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n"); eraseInstruction(M); ++NumMemCpyInstr; return true; } } - } + } else { + MemDepResult DepInfo = MD->getDependency(M); + + // Try to turn a partially redundant memset + memcpy into + // memcpy + smaller memset. We don't need the memcpy size for this. + if (DepInfo.isClobber()) + if (MemSetInst *MDep = dyn_cast(DepInfo.getInst())) + if (processMemSetMemCpyDependence(M, MDep)) + return true; - MemoryLocation SrcLoc = MemoryLocation::getForSource(M); - MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( - SrcLoc, true, M->getIterator(), M->getParent()); - - if (SrcDepInfo.isClobber()) { - if (MemCpyInst *MDep = dyn_cast(SrcDepInfo.getInst())) - return processMemCpyMemCpyDependence(M, MDep); - } else if (SrcDepInfo.isDef()) { - if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { - eraseInstruction(M); - ++NumMemCpyInstr; - return true; + // The optimizations after this point require the memcpy size. + ConstantInt *CopySize = dyn_cast(M->getLength()); + if (!CopySize) return false; + + // There are four possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. + // c) memcpy from freshly alloca'd space or space that has just started + // its lifetime copies undefined data, and we can therefore eliminate + // the memcpy in favor of the data that was already at the destination. + // d) memcpy from a just-memset'd source can be turned into memset. + if (DepInfo.isClobber()) { + if (CallInst *C = dyn_cast(DepInfo.getInst())) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); + if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), Alignment, C)) { + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + } } - } - if (SrcDepInfo.isClobber()) - if (MemSetInst *MDep = dyn_cast(SrcDepInfo.getInst())) - if (performMemCpyToMemSetOptzn(M, MDep)) { + MemoryLocation SrcLoc = MemoryLocation::getForSource(M); + MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( + SrcLoc, true, M->getIterator(), M->getParent()); + + if (SrcDepInfo.isClobber()) { + if (MemCpyInst *MDep = dyn_cast(SrcDepInfo.getInst())) + return processMemCpyMemCpyDependence(M, MDep); + } else if (SrcDepInfo.isDef()) { + if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) { eraseInstruction(M); - ++NumCpyToSet; + ++NumMemCpyInstr; return true; } + } + + if (SrcDepInfo.isClobber()) + if (MemSetInst *MDep = dyn_cast(SrcDepInfo.getInst())) + if (performMemCpyToMemSetOptzn(M, MDep)) { + eraseInstruction(M); + ++NumCpyToSet; + return true; + } + } return false; } @@ -1354,7 +1526,8 @@ // MemDep may have over conservative information about this instruction, just // conservatively flush it from the cache. - MD->removeInstruction(M); + if (MD) + MD->removeInstruction(M); ++NumMoveToCpy; return true; @@ -1367,16 +1540,25 @@ Value *ByValArg = CB.getArgOperand(ArgNo); Type *ByValTy = cast(ByValArg->getType())->getElementType(); uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); - MemDepResult DepInfo = MD->getPointerDependencyFrom( - MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true, - CB.getIterator(), CB.getParent()); - if (!DepInfo.isClobber()) - return false; + MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); + MemCpyInst *MDep = nullptr; + if (EnableMemorySSA) { + MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + CallAccess->getDefiningAccess(), Loc); + if (auto *MD = dyn_cast(Clobber)) + MDep = dyn_cast_or_null(MD->getMemoryInst()); + } else { + MemDepResult DepInfo = MD->getPointerDependencyFrom( + Loc, true, CB.getIterator(), CB.getParent()); + if (!DepInfo.isClobber()) + return false; + MDep = dyn_cast(DepInfo.getInst()); + } // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by // a memcpy, see if we can byval from the source of the memcpy instead of the // result. - MemCpyInst *MDep = dyn_cast(DepInfo.getInst()); if (!MDep || MDep->isVolatile() || ByValArg->stripPointerCasts() != MDep->getDest()) return false; @@ -1410,14 +1592,19 @@ // *b = 42; // foo(*a) // It would be invalid to transform the second memcpy into foo(*b). - // - // NOTE: This is conservative, it will stop on any read from the source loc, - // not just the defining memcpy. - MemDepResult SourceDep = MD->getPointerDependencyFrom( - MemoryLocation::getForSource(MDep), false, - CB.getIterator(), MDep->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; + if (EnableMemorySSA) { + if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) + return false; + } else { + // NOTE: This is conservative, it will stop on any read from the source loc, + // not just the defining memcpy. + MemDepResult SourceDep = MD->getPointerDependencyFrom( + MemoryLocation::getForSource(MDep), false, + CB.getIterator(), MDep->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + } Value *TmpCast = MDep->getSource(); if (MDep->getSource()->getType() != ByValArg->getType()) { @@ -1484,7 +1671,8 @@ } PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { - auto &MD = AM.getResult(F); + auto *MD = !EnableMemorySSA ? &AM.getResult(F) + : AM.getCachedResult(F); auto &TLI = AM.getResult(F); auto *AA = &AM.getResult(F); auto *AC = &AM.getResult(F); @@ -1493,14 +1681,15 @@ : AM.getCachedResult(F); bool MadeChange = - runImpl(F, &MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); + runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); if (!MadeChange) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - PA.preserve(); + if (MD) + PA.preserve(); if (MSSA) PA.preserve(); return PA; @@ -1516,6 +1705,7 @@ AA = AA_; AC = AC_; DT = DT_; + MSSA = MSSA_; MemorySSAUpdater MSSAU_(MSSA_); MSSAU = MSSA_ ? &MSSAU_ : nullptr; // If we don't have at least memset and memcpy, there is little point of doing @@ -1542,7 +1732,9 @@ if (skipFunction(F)) return false; - auto *MD = &getAnalysis().getMemDep(); + auto *MDWP = !EnableMemorySSA + ? &getAnalysis() + : getAnalysisIfAvailable(); auto *TLI = &getAnalysis().getTLI(F); auto *AA = &getAnalysis().getAAResults(); auto *AC = &getAnalysis().getAssumptionCache(F); @@ -1551,6 +1743,6 @@ ? &getAnalysis() : getAnalysisIfAvailable(); - return Impl.runImpl(F, MD, TLI, AA, AC, DT, + return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT, MSSAWP ? &MSSAWP->getMSSA() : nullptr); } diff --git a/llvm/test/Transforms/MemCpyOpt/callslot.ll b/llvm/test/Transforms/MemCpyOpt/callslot.ll --- a/llvm/test/Transforms/MemCpyOpt/callslot.ll +++ b/llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA +; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA define i8 @read_dest_between_call_and_memcpy() { ; CHECK-LABEL: @read_dest_between_call_and_memcpy( @@ -26,15 +26,25 @@ } define i8 @read_src_between_call_and_memcpy() { -; CHECK-LABEL: @read_src_between_call_and_memcpy( -; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 -; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 -; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* -; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) -; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[SRC_I8]], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 16, i1 false) -; CHECK-NEXT: ret i8 [[X]] +; NO_MSSA-LABEL: @read_src_between_call_and_memcpy( +; NO_MSSA-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; NO_MSSA-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; NO_MSSA-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; NO_MSSA-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; NO_MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) +; NO_MSSA-NEXT: [[X:%.*]] = load i8, i8* [[SRC_I8]], align 1 +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 16, i1 false) +; NO_MSSA-NEXT: ret i8 [[X]] +; +; MSSA-LABEL: @read_src_between_call_and_memcpy( +; MSSA-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; MSSA-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; MSSA-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; MSSA-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) +; MSSA-NEXT: [[X:%.*]] = load i8, i8* [[SRC_I8]], align 1 +; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST_I8]], i8 0, i64 16, i1 false) +; MSSA-NEXT: ret i8 [[X]] ; %dest = alloca [16 x i8] %src = alloca [16 x i8] diff --git a/llvm/test/Transforms/MemCpyOpt/invariant.start.ll b/llvm/test/Transforms/MemCpyOpt/invariant.start.ll --- a/llvm/test/Transforms/MemCpyOpt/invariant.start.ll +++ b/llvm/test/Transforms/MemCpyOpt/invariant.start.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; MemCpy optimizations should take place even in presence of invariant.start -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA +; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -18,13 +18,21 @@ ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be transformed to a memmove. define void @test1(i8* %P, i8* %Q) nounwind { -; CHECK-LABEL: @test1( -; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = bitcast %0* [[MEMTMP]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[R]], i8* align 16 [[P:%.*]], i32 32, i1 false) -; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[P]]) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[R]], i32 32, i1 false) -; CHECK-NEXT: ret void +; NO_MSSA-LABEL: @test1( +; NO_MSSA-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0:%.*]], align 16 +; NO_MSSA-NEXT: [[R:%.*]] = bitcast %0* [[MEMTMP]] to i8* +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[R]], i8* align 16 [[P:%.*]], i32 32, i1 false) +; NO_MSSA-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[P]]) +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[R]], i32 32, i1 false) +; NO_MSSA-NEXT: ret void +; +; MSSA-LABEL: @test1( +; MSSA-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0:%.*]], align 16 +; MSSA-NEXT: [[R:%.*]] = bitcast %0* [[MEMTMP]] to i8* +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[R]], i8* align 16 [[P:%.*]], i32 32, i1 false) +; MSSA-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[P]]) +; MSSA-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[P]], i32 32, i1 false) +; MSSA-NEXT: ret void ; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll --- a/llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA +; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA ; Test memcpy-memcpy dependencies across invoke edges. @@ -8,19 +8,33 @@ ; TODO: Not supported yet. define hidden void @test_normal(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: @test_normal( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) -; CHECK-NEXT: invoke void @invoke_me() -; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] -; CHECK: lpad: -; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } -; CHECK-NEXT: catch i8* null -; CHECK-NEXT: ret void -; CHECK: try.cont: -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) -; CHECK-NEXT: ret void +; NO_MSSA-LABEL: @test_normal( +; NO_MSSA-NEXT: entry: +; NO_MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; NO_MSSA-NEXT: invoke void @invoke_me() +; NO_MSSA-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] +; NO_MSSA: lpad: +; NO_MSSA-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; NO_MSSA-NEXT: catch i8* null +; NO_MSSA-NEXT: ret void +; NO_MSSA: try.cont: +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) +; NO_MSSA-NEXT: ret void +; +; MSSA-LABEL: @test_normal( +; MSSA-NEXT: entry: +; MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; MSSA-NEXT: invoke void @invoke_me() +; MSSA-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] +; MSSA: lpad: +; MSSA-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; MSSA-NEXT: catch i8* null +; MSSA-NEXT: ret void +; MSSA: try.cont: +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false) +; MSSA-NEXT: ret void ; entry: %temp = alloca i8, i32 64 @@ -42,19 +56,33 @@ ; TODO: Not supported yet. define hidden void @test_unwind(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: @test_unwind( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) -; CHECK-NEXT: invoke void @invoke_me() -; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] -; CHECK: lpad: -; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } -; CHECK-NEXT: catch i8* null -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) -; CHECK-NEXT: ret void -; CHECK: try.cont: -; CHECK-NEXT: ret void +; NO_MSSA-LABEL: @test_unwind( +; NO_MSSA-NEXT: entry: +; NO_MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; NO_MSSA-NEXT: invoke void @invoke_me() +; NO_MSSA-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] +; NO_MSSA: lpad: +; NO_MSSA-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; NO_MSSA-NEXT: catch i8* null +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) +; NO_MSSA-NEXT: ret void +; NO_MSSA: try.cont: +; NO_MSSA-NEXT: ret void +; +; MSSA-LABEL: @test_unwind( +; MSSA-NEXT: entry: +; MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; MSSA-NEXT: invoke void @invoke_me() +; MSSA-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] +; MSSA: lpad: +; MSSA-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; MSSA-NEXT: catch i8* null +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false) +; MSSA-NEXT: ret void +; MSSA: try.cont: +; MSSA-NEXT: ret void ; entry: %temp = alloca i8, i32 64 diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -141,13 +141,21 @@ } define i8 @test4_read_between(i8 *%P) { -; CHECK-LABEL: @test4_read_between( -; CHECK-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 -; CHECK-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A2]], i8* align 4 [[P:%.*]], i64 8, i1 false) -; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[A2]], align 1 -; CHECK-NEXT: call void @test4a(i8* byval(i8) align 1 [[A2]]) -; CHECK-NEXT: ret i8 [[X]] +; NO_MSSA-LABEL: @test4_read_between( +; NO_MSSA-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 +; NO_MSSA-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8* +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A2]], i8* align 4 [[P:%.*]], i64 8, i1 false) +; NO_MSSA-NEXT: [[X:%.*]] = load i8, i8* [[A2]], align 1 +; NO_MSSA-NEXT: call void @test4a(i8* byval align 1 [[A2]]) +; NO_MSSA-NEXT: ret i8 [[X]] +; +; MSSA-LABEL: @test4_read_between( +; MSSA-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 +; MSSA-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8* +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A2]], i8* align 4 [[P:%.*]], i64 8, i1 false) +; MSSA-NEXT: [[X:%.*]] = load i8, i8* [[A2]], align 1 +; MSSA-NEXT: call void @test4a(i8* byval align 1 [[P]]) +; MSSA-NEXT: ret i8 [[X]] ; %a1 = alloca %1 %a2 = bitcast %1* %a1 to i8* @@ -158,16 +166,27 @@ } define void @test4_non_local(i8 *%P, i1 %c) { -; CHECK-LABEL: @test4_non_local( -; CHECK-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 -; CHECK-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A2]], i8* align 4 [[P:%.*]], i64 8, i1 false) -; CHECK-NEXT: br i1 [[C:%.*]], label [[CALL:%.*]], label [[EXIT:%.*]] -; CHECK: call: -; CHECK-NEXT: call void @test4a(i8* byval(i8) align 1 [[A2]]) -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: ret void +; NO_MSSA-LABEL: @test4_non_local( +; NO_MSSA-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 +; NO_MSSA-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8* +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A2]], i8* align 4 [[P:%.*]], i64 8, i1 false) +; NO_MSSA-NEXT: br i1 [[C:%.*]], label [[CALL:%.*]], label [[EXIT:%.*]] +; NO_MSSA: call: +; NO_MSSA-NEXT: call void @test4a(i8* byval align 1 [[A2]]) +; NO_MSSA-NEXT: br label [[EXIT]] +; NO_MSSA: exit: +; NO_MSSA-NEXT: ret void +; +; MSSA-LABEL: @test4_non_local( +; MSSA-NEXT: [[A1:%.*]] = alloca [[TMP1:%.*]], align 8 +; MSSA-NEXT: [[A2:%.*]] = bitcast %1* [[A1]] to i8* +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A2]], i8* align 4 [[P:%.*]], i64 8, i1 false) +; MSSA-NEXT: br i1 [[C:%.*]], label [[CALL:%.*]], label [[EXIT:%.*]] +; MSSA: call: +; MSSA-NEXT: call void @test4a(i8* byval align 1 [[P]]) +; MSSA-NEXT: br label [[EXIT]] +; MSSA: exit: +; MSSA-NEXT: ret void ; %a1 = alloca %1 %a2 = bitcast %1* %a1 to i8* diff --git a/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll b/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll --- a/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll +++ b/llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefix=NO_MSSA +; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefix=MSSA target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -8,19 +8,33 @@ ; which will be deleted. define void @foo(i1 %c, i8* %d, i8* %e, i8* %f) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP:%.*]] = alloca [50 x i8], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast [50 x i8]* [[TMP]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull [[D:%.*]], i8 0, i64 10, i1 false) -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 11, i1 false) -; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] -; CHECK: if.then: -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[F:%.*]], i8* nonnull align 8 [[TMP4]], i64 30, i1 false) -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: ret void +; NO_MSSA-LABEL: @foo( +; NO_MSSA-NEXT: entry: +; NO_MSSA-NEXT: [[TMP:%.*]] = alloca [50 x i8], align 8 +; NO_MSSA-NEXT: [[TMP4:%.*]] = bitcast [50 x i8]* [[TMP]] to i8* +; NO_MSSA-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 1 +; NO_MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull [[D:%.*]], i8 0, i64 10, i1 false) +; NO_MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 11, i1 false) +; NO_MSSA-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] +; NO_MSSA: if.then: +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[F:%.*]], i8* nonnull align 8 [[TMP4]], i64 30, i1 false) +; NO_MSSA-NEXT: br label [[EXIT]] +; NO_MSSA: exit: +; NO_MSSA-NEXT: ret void +; +; MSSA-LABEL: @foo( +; MSSA-NEXT: entry: +; MSSA-NEXT: [[TMP:%.*]] = alloca [50 x i8], align 8 +; MSSA-NEXT: [[TMP4:%.*]] = bitcast [50 x i8]* [[TMP]] to i8* +; MSSA-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 1 +; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull [[D:%.*]], i8 0, i64 10, i1 false) +; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 11, i1 false) +; MSSA-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] +; MSSA: if.then: +; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[F:%.*]], i8 0, i64 11, i1 false) +; MSSA-NEXT: br label [[EXIT]] +; MSSA: exit: +; MSSA-NEXT: ret void ; entry: %tmp = alloca [50 x i8], align 8 diff --git a/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll b/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll --- a/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll +++ b/llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefix=NO_MSSA +; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefix=MSSA ; Handle memcpy-memcpy dependencies of differing sizes correctly. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -9,25 +9,44 @@ ; memcpy with a larger size from the same address. define i32 @foo(i1 %z) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca [10 x i32], align 4 -; CHECK-NEXT: [[S:%.*]] = alloca [10 x i32], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast [10 x i32]* [[A]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[S]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[TMP1]], i8 0, i64 40, i1 false) -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i64 0, i64 0 -; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [10 x i32], [10 x i32]* [[S]], i64 0, i64 1 -; CHECK-NEXT: [[SCEVGEP7:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; CHECK-NEXT: br i1 [[Z:%.*]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC7_1:%.*]] -; CHECK: for.body3.lr.ph: -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 17179869180, i1 false) -; CHECK-NEXT: br label [[FOR_INC7_1]] -; CHECK: for.inc7.1: -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 4, i1 false) -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] +; NO_MSSA-LABEL: @foo( +; NO_MSSA-NEXT: entry: +; NO_MSSA-NEXT: [[A:%.*]] = alloca [10 x i32], align 4 +; NO_MSSA-NEXT: [[S:%.*]] = alloca [10 x i32], align 4 +; NO_MSSA-NEXT: [[TMP0:%.*]] = bitcast [10 x i32]* [[A]] to i8* +; NO_MSSA-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[S]] to i8* +; NO_MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[TMP1]], i8 0, i64 40, i1 false) +; NO_MSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i64 0, i64 0 +; NO_MSSA-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +; NO_MSSA-NEXT: [[SCEVGEP:%.*]] = getelementptr [10 x i32], [10 x i32]* [[S]], i64 0, i64 1 +; NO_MSSA-NEXT: [[SCEVGEP7:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; NO_MSSA-NEXT: br i1 [[Z:%.*]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC7_1:%.*]] +; NO_MSSA: for.body3.lr.ph: +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 17179869180, i1 false) +; NO_MSSA-NEXT: br label [[FOR_INC7_1]] +; NO_MSSA: for.inc7.1: +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 4, i1 false) +; NO_MSSA-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; NO_MSSA-NEXT: ret i32 [[TMP2]] +; +; MSSA-LABEL: @foo( +; MSSA-NEXT: entry: +; MSSA-NEXT: [[A:%.*]] = alloca [10 x i32], align 4 +; MSSA-NEXT: [[S:%.*]] = alloca [10 x i32], align 4 +; MSSA-NEXT: [[TMP0:%.*]] = bitcast [10 x i32]* [[A]] to i8* +; MSSA-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[S]] to i8* +; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[TMP1]], i8 0, i64 40, i1 false) +; MSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i64 0, i64 0 +; MSSA-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +; MSSA-NEXT: [[SCEVGEP:%.*]] = getelementptr [10 x i32], [10 x i32]* [[S]], i64 0, i64 1 +; MSSA-NEXT: [[SCEVGEP7:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; MSSA-NEXT: br i1 [[Z:%.*]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC7_1:%.*]] +; MSSA: for.body3.lr.ph: +; MSSA-NEXT: br label [[FOR_INC7_1]] +; MSSA: for.inc7.1: +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 4, i1 false) +; MSSA-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; MSSA-NEXT: ret i32 [[TMP2]] ; entry: %a = alloca [10 x i32] diff --git a/llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll --- a/llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefix=NO_MSSA +; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefix=MSSA ; Test whether memcpy-memcpy dependence is optimized across ; basic blocks (conditional branches and invokes). @@ -22,17 +22,29 @@ ; to copy directly from the original source rather than from the temporary. define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) { -; CHECK-LABEL: @wobble( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) -; CHECK-NEXT: br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]] -; CHECK: out: -; CHECK-NEXT: call void @qux() -; CHECK-NEXT: unreachable -; CHECK: more: -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) -; CHECK-NEXT: ret void +; NO_MSSA-LABEL: @wobble( +; NO_MSSA-NEXT: bb: +; NO_MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; NO_MSSA-NEXT: br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]] +; NO_MSSA: out: +; NO_MSSA-NEXT: call void @qux() +; NO_MSSA-NEXT: unreachable +; NO_MSSA: more: +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) +; NO_MSSA-NEXT: ret void +; +; MSSA-LABEL: @wobble( +; MSSA-NEXT: bb: +; MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; MSSA-NEXT: br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]] +; MSSA: out: +; MSSA-NEXT: call void @qux() +; MSSA-NEXT: unreachable +; MSSA: more: +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false) +; MSSA-NEXT: ret void ; bb: %temp = alloca i8, i32 64 @@ -53,25 +65,45 @@ ; source rather than from the temporary. define i32 @foo(i1 %t3) { -; CHECK-LABEL: @foo( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -; CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 -; CHECK-NEXT: [[S1:%.*]] = bitcast %struct.s* [[S]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) -; CHECK-NEXT: br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]] -; CHECK: bb4: -; CHECK-NEXT: [[T5:%.*]] = bitcast %struct.s* [[T]] to i8* -; CHECK-NEXT: [[S6:%.*]] = bitcast %struct.s* [[S]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T5]], i8* align 4 [[S6]], i64 8, i1 false) -; CHECK-NEXT: br label [[BB7]] -; CHECK: bb7: -; CHECK-NEXT: [[T8:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 -; CHECK-NEXT: [[T9:%.*]] = load i32, i32* [[T8]], align 4 -; CHECK-NEXT: [[T10:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 -; CHECK-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4 -; CHECK-NEXT: [[T12:%.*]] = add i32 [[T9]], [[T11]] -; CHECK-NEXT: ret i32 [[T12]] +; NO_MSSA-LABEL: @foo( +; NO_MSSA-NEXT: bb: +; NO_MSSA-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; NO_MSSA-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 +; NO_MSSA-NEXT: [[S1:%.*]] = bitcast %struct.s* [[S]] to i8* +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) +; NO_MSSA-NEXT: br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]] +; NO_MSSA: bb4: +; NO_MSSA-NEXT: [[T5:%.*]] = bitcast %struct.s* [[T]] to i8* +; NO_MSSA-NEXT: [[S6:%.*]] = bitcast %struct.s* [[S]] to i8* +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T5]], i8* align 4 [[S6]], i64 8, i1 false) +; NO_MSSA-NEXT: br label [[BB7]] +; NO_MSSA: bb7: +; NO_MSSA-NEXT: [[T8:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 +; NO_MSSA-NEXT: [[T9:%.*]] = load i32, i32* [[T8]], align 4 +; NO_MSSA-NEXT: [[T10:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 +; NO_MSSA-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4 +; NO_MSSA-NEXT: [[T12:%.*]] = add i32 [[T9]], [[T11]] +; NO_MSSA-NEXT: ret i32 [[T12]] +; +; MSSA-LABEL: @foo( +; MSSA-NEXT: bb: +; MSSA-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; MSSA-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 +; MSSA-NEXT: [[S1:%.*]] = bitcast %struct.s* [[S]] to i8* +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) +; MSSA-NEXT: br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]] +; MSSA: bb4: +; MSSA-NEXT: [[T5:%.*]] = bitcast %struct.s* [[T]] to i8* +; MSSA-NEXT: [[S6:%.*]] = bitcast %struct.s* [[S]] to i8* +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T5]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) +; MSSA-NEXT: br label [[BB7]] +; MSSA: bb7: +; MSSA-NEXT: [[T8:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 +; MSSA-NEXT: [[T9:%.*]] = load i32, i32* [[T8]], align 4 +; MSSA-NEXT: [[T10:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 +; MSSA-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4 +; MSSA-NEXT: [[T12:%.*]] = add i32 [[T9]], [[T11]] +; MSSA-NEXT: ret i32 [[T12]] ; bb: %s = alloca %struct.s, align 4 @@ -102,37 +134,69 @@ ; pattern. define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: @baz( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -; CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 -; CHECK-NEXT: [[S3:%.*]] = bitcast %struct.s* [[S]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S3]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) -; CHECK-NEXT: br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]] -; CHECK: bb6: -; CHECK-NEXT: invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) -; CHECK-NEXT: to label [[BB25:%.*]] unwind label [[BB9:%.*]] -; CHECK: bb9: -; CHECK-NEXT: [[T10:%.*]] = landingpad { i8*, i32 } -; CHECK-NEXT: catch i8* null -; CHECK-NEXT: br label [[BB13:%.*]] -; CHECK: bb13: -; CHECK-NEXT: [[T15:%.*]] = call i8* @__cxa_begin_catch(i8* null) -; CHECK-NEXT: br label [[BB23:%.*]] -; CHECK: bb22: -; CHECK-NEXT: [[T23:%.*]] = bitcast %struct.s* [[T]] to i8* -; CHECK-NEXT: [[S24:%.*]] = bitcast %struct.s* [[S]] to i8* -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T23]], i8* align 4 [[S24]], i64 8, i1 false) -; CHECK-NEXT: br label [[BB23]] -; CHECK: bb23: -; CHECK-NEXT: [[T17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 -; CHECK-NEXT: [[T18:%.*]] = load i32, i32* [[T17]], align 4 -; CHECK-NEXT: [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 -; CHECK-NEXT: [[T20:%.*]] = load i32, i32* [[T19]], align 4 -; CHECK-NEXT: [[T21:%.*]] = add nsw i32 [[T18]], [[T20]] -; CHECK-NEXT: ret i32 [[T21]] -; CHECK: bb25: -; CHECK-NEXT: unreachable +; NO_MSSA-LABEL: @baz( +; NO_MSSA-NEXT: bb: +; NO_MSSA-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; NO_MSSA-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 +; NO_MSSA-NEXT: [[S3:%.*]] = bitcast %struct.s* [[S]] to i8* +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S3]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) +; NO_MSSA-NEXT: br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]] +; NO_MSSA: bb6: +; NO_MSSA-NEXT: invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) +; NO_MSSA-NEXT: to label [[BB25:%.*]] unwind label [[BB9:%.*]] +; NO_MSSA: bb9: +; NO_MSSA-NEXT: [[T10:%.*]] = landingpad { i8*, i32 } +; NO_MSSA-NEXT: catch i8* null +; NO_MSSA-NEXT: br label [[BB13:%.*]] +; NO_MSSA: bb13: +; NO_MSSA-NEXT: [[T15:%.*]] = call i8* @__cxa_begin_catch(i8* null) +; NO_MSSA-NEXT: br label [[BB23:%.*]] +; NO_MSSA: bb22: +; NO_MSSA-NEXT: [[T23:%.*]] = bitcast %struct.s* [[T]] to i8* +; NO_MSSA-NEXT: [[S24:%.*]] = bitcast %struct.s* [[S]] to i8* +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T23]], i8* align 4 [[S24]], i64 8, i1 false) +; NO_MSSA-NEXT: br label [[BB23]] +; NO_MSSA: bb23: +; NO_MSSA-NEXT: [[T17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 +; NO_MSSA-NEXT: [[T18:%.*]] = load i32, i32* [[T17]], align 4 +; NO_MSSA-NEXT: [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 +; NO_MSSA-NEXT: [[T20:%.*]] = load i32, i32* [[T19]], align 4 +; NO_MSSA-NEXT: [[T21:%.*]] = add nsw i32 [[T18]], [[T20]] +; NO_MSSA-NEXT: ret i32 [[T21]] +; NO_MSSA: bb25: +; NO_MSSA-NEXT: unreachable +; +; MSSA-LABEL: @baz( +; MSSA-NEXT: bb: +; MSSA-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; MSSA-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 +; MSSA-NEXT: [[S3:%.*]] = bitcast %struct.s* [[S]] to i8* +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S3]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) +; MSSA-NEXT: br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]] +; MSSA: bb6: +; MSSA-NEXT: invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) +; MSSA-NEXT: to label [[BB25:%.*]] unwind label [[BB9:%.*]] +; MSSA: bb9: +; MSSA-NEXT: [[T10:%.*]] = landingpad { i8*, i32 } +; MSSA-NEXT: catch i8* null +; MSSA-NEXT: br label [[BB13:%.*]] +; MSSA: bb13: +; MSSA-NEXT: [[T15:%.*]] = call i8* @__cxa_begin_catch(i8* null) +; MSSA-NEXT: br label [[BB23:%.*]] +; MSSA: bb22: +; MSSA-NEXT: [[T23:%.*]] = bitcast %struct.s* [[T]] to i8* +; MSSA-NEXT: [[S24:%.*]] = bitcast %struct.s* [[S]] to i8* +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T23]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) +; MSSA-NEXT: br label [[BB23]] +; MSSA: bb23: +; MSSA-NEXT: [[T17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 +; MSSA-NEXT: [[T18:%.*]] = load i32, i32* [[T17]], align 4 +; MSSA-NEXT: [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 +; MSSA-NEXT: [[T20:%.*]] = load i32, i32* [[T19]], align 4 +; MSSA-NEXT: [[T21:%.*]] = add nsw i32 [[T18]], [[T20]] +; MSSA-NEXT: ret i32 [[T21]] +; MSSA: bb25: +; MSSA-NEXT: unreachable ; bb: %s = alloca %struct.s, align 4 diff --git a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll --- a/llvm/test/Transforms/MemCpyOpt/stackrestore.ll +++ b/llvm/test/Transforms/MemCpyOpt/stackrestore.ll @@ -16,19 +16,33 @@ ; a call to @external. define i32 @test_norestore(i32 %n) { -; CHECK-LABEL: @test_norestore( -; CHECK-NEXT: [[TMPMEM:%.*]] = alloca [10 x i8], align 4 -; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[TMPMEM]], i32 0, i32 0 -; CHECK-NEXT: [[P:%.*]] = alloca i8, i32 [[N:%.*]], align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[P]], i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) -; CHECK-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 9 -; CHECK-NEXT: store i8 0, i8* [[P10]], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP]], i8* [[P]], i32 10, i1 false) -; CHECK-NEXT: call void @external() -; CHECK-NEXT: [[HEAP:%.*]] = call i8* @malloc(i32 9) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[HEAP]], i8* [[P]], i32 9, i1 false) -; CHECK-NEXT: call void @useit(i8* [[HEAP]]) -; CHECK-NEXT: ret i32 0 +; NO_MSSA-LABEL: @test_norestore( +; NO_MSSA-NEXT: [[TMPMEM:%.*]] = alloca [10 x i8], align 4 +; NO_MSSA-NEXT: [[TMP:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[TMPMEM]], i32 0, i32 0 +; NO_MSSA-NEXT: [[P:%.*]] = alloca i8, i32 [[N:%.*]], align 4 +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[P]], i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) +; NO_MSSA-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 9 +; NO_MSSA-NEXT: store i8 0, i8* [[P10]], align 1 +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP]], i8* [[P]], i32 10, i1 false) +; NO_MSSA-NEXT: call void @external() +; NO_MSSA-NEXT: [[HEAP:%.*]] = call i8* @malloc(i32 9) +; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[HEAP]], i8* [[P]], i32 9, i1 false) +; NO_MSSA-NEXT: call void @useit(i8* [[HEAP]]) +; NO_MSSA-NEXT: ret i32 0 +; +; MSSA-LABEL: @test_norestore( +; MSSA-NEXT: [[TMPMEM:%.*]] = alloca [10 x i8], align 4 +; MSSA-NEXT: [[TMP:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[TMPMEM]], i32 0, i32 0 +; MSSA-NEXT: [[P:%.*]] = alloca i8, i32 [[N:%.*]], align 4 +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[P]], i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) +; MSSA-NEXT: [[P10:%.*]] = getelementptr inbounds i8, i8* [[P]], i32 9 +; MSSA-NEXT: store i8 0, i8* [[P10]], align 1 +; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP]], i8* [[P]], i32 10, i1 false) +; MSSA-NEXT: call void @external() +; MSSA-NEXT: [[HEAP:%.*]] = call i8* @malloc(i32 9) +; MSSA-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* [[HEAP]], i8* align 1 getelementptr inbounds ([9 x i8], [9 x i8]* @str, i32 0, i32 0), i32 9, i1 false) +; MSSA-NEXT: call void @useit(i8* [[HEAP]]) +; MSSA-NEXT: ret i32 0 ; %tmpmem = alloca [10 x i8], align 4 %tmp = getelementptr inbounds [10 x i8], [10 x i8]* %tmpmem, i32 0, i32 0