Index: llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -31,7 +31,6 @@ class LoadInst; class MemCpyInst; class MemMoveInst; -class MemoryDependenceResults; class MemorySSA; class MemorySSAUpdater; class MemSetInst; @@ -40,7 +39,6 @@ class Value; class MemCpyOptPass : public PassInfoMixin { - MemoryDependenceResults *MD = nullptr; TargetLibraryInfo *TLI = nullptr; AAResults *AA = nullptr; AssumptionCache *AC = nullptr; @@ -54,9 +52,8 @@ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Glue for the old PM. - bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI, - AAResults *AA, AssumptionCache *AC, DominatorTree *DT, - MemorySSA *MSSA); + bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, + AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA); private: // Helper functions Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" @@ -67,10 +66,6 @@ #define DEBUG_TYPE "memcpyopt" -static cl::opt - EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden, - cl::desc("Use MemorySSA-backed MemCpyOpt.")); - STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); @@ -282,13 +277,9 @@ AU.addPreserved(); AU.addPreserved(); AU.addRequired(); - if (!EnableMemorySSA) - AU.addRequired(); - AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - if (EnableMemorySSA) - AU.addRequired(); + AU.addRequired(); AU.addPreserved(); } }; @@ -304,7 +295,6 @@ false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) @@ -329,10 +319,7 @@ } void MemCpyOptPass::eraseInstruction(Instruction *I) { - if (MSSAU) - MSSAU->removeMemoryAccess(I); - if (MD) - MD->removeInstruction(I); + MSSAU->removeMemoryAccess(I); I->eraseFromParent(); } @@ -389,14 +376,12 @@ // memsets. MemoryDef *LastMemDef = nullptr; for (++BI; !BI->isTerminator(); ++BI) { - if (MSSAU) { - auto *CurrentAcc = cast_or_null( - MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); - if (CurrentAcc) { - MemInsertPoint = CurrentAcc; - if (auto *CurrentDef = dyn_cast(CurrentAcc)) - LastMemDef = CurrentDef; - } + auto *CurrentAcc = cast_or_null( + MSSAU->getMemorySSA()->getMemoryAccess(&*BI)); + if (CurrentAcc) { + MemInsertPoint = CurrentAcc; + if (auto *CurrentDef = dyn_cast(CurrentAcc)) + LastMemDef = CurrentDef; } // Calls that only access inaccessible memory do not block merging @@ -494,19 +479,17 @@ if (!Range.TheStores.empty()) AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); - if (MSSAU) { - assert(LastMemDef && MemInsertPoint && - "Both LastMemDef and MemInsertPoint need to be set"); - auto *NewDef = - cast(MemInsertPoint->getMemoryInst() == &*BI - ? MSSAU->createMemoryAccessBefore( - AMemSet, LastMemDef, MemInsertPoint) - : MSSAU->createMemoryAccessAfter( - AMemSet, LastMemDef, MemInsertPoint)); - MSSAU->insertDef(NewDef, /*RenameUses=*/true); - LastMemDef = NewDef; - MemInsertPoint = NewDef; - } + assert(LastMemDef && MemInsertPoint && + "Both LastMemDef and MemInsertPoint need to be set"); + auto *NewDef = + cast(MemInsertPoint->getMemoryInst() == &*BI + ? MSSAU->createMemoryAccessBefore( + AMemSet, LastMemDef, MemInsertPoint) + : MSSAU->createMemoryAccessAfter( + AMemSet, LastMemDef, MemInsertPoint)); + MSSAU->insertDef(NewDef, /*RenameUses=*/true); + LastMemDef = NewDef; + MemInsertPoint = NewDef; // Zap all the stores. for (Instruction *SI : Range.TheStores) @@ -615,17 +598,15 @@ // TODO: Simplify this once P will be determined by MSSA, in which case the // discrepancy can no longer occur. MemoryUseOrDef *MemInsertPoint = nullptr; - if (MSSAU) { - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) { - MemInsertPoint = cast(--MA->getIterator()); - } else { - const Instruction *ConstP = P; - for (const Instruction &I : make_range(++ConstP->getReverseIterator(), - ++LI->getReverseIterator())) { - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) { - MemInsertPoint = MA; - break; - } + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) { + MemInsertPoint = cast(--MA->getIterator()); + } else { + const Instruction *ConstP = P; + for (const Instruction &I : make_range(++ConstP->getReverseIterator(), + ++LI->getReverseIterator())) { + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) { + MemInsertPoint = MA; + break; } } } @@ -634,12 +615,10 @@ for (auto *I : llvm::reverse(ToLift)) { LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); I->moveBefore(P); - if (MSSAU) { - assert(MemInsertPoint && "Must have found insert point"); - if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) { - MSSAU->moveAfter(MA, MemInsertPoint); - MemInsertPoint = MA; - } + assert(MemInsertPoint && "Must have found insert point"); + if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) { + MSSAU->moveAfter(MA, MemInsertPoint); + MemInsertPoint = MA; } } @@ -724,13 +703,10 @@ LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); - if (MSSAU) { - auto *LastDef = - cast(MSSAU->getMemorySSA()->getMemoryAccess(SI)); - auto *NewAccess = - MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); - MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - } + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(SI)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); eraseInstruction(SI); eraseInstruction(LI); @@ -746,38 +722,21 @@ // happen to be using a load-store pair to implement it, rather than // a memcpy. CallInst *C = nullptr; - if (EnableMemorySSA) { - if (auto *LoadClobber = dyn_cast( - MSSA->getWalker()->getClobberingMemoryAccess(LI))) { - // The load most post-dom the call. Limit to the same block for now. - // TODO: Support non-local call-slot optimization? - if (LoadClobber->getBlock() == SI->getParent()) - C = dyn_cast_or_null(LoadClobber->getMemoryInst()); - } - } else { - MemDepResult ldep = MD->getDependency(LI); - if (ldep.isClobber() && !isa(ldep.getInst())) - C = dyn_cast(ldep.getInst()); + if (auto *LoadClobber = dyn_cast( + MSSA->getWalker()->getClobberingMemoryAccess(LI))) { + // The load most post-dom the call. Limit to the same block for now. + // TODO: Support non-local call-slot optimization? + if (LoadClobber->getBlock() == SI->getParent()) + C = dyn_cast_or_null(LoadClobber->getMemoryInst()); } if (C) { // Check that nothing touches the dest of the "copy" between // the call and the store. MemoryLocation StoreLoc = MemoryLocation::get(SI); - if (EnableMemorySSA) { - if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), - MSSA->getMemoryAccess(SI))) - C = nullptr; - } else { - for (BasicBlock::iterator I = --SI->getIterator(), - E = C->getIterator(); - I != E; --I) { - if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) { - C = nullptr; - break; - } - } - } + if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C), + MSSA->getMemoryAccess(SI))) + C = nullptr; } if (C) { @@ -822,13 +781,11 @@ LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); - if (MSSAU) { - assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(SI))); - auto *LastDef = - cast(MSSAU->getMemorySSA()->getMemoryAccess(SI)); - auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); - MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - } + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(SI))); + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(SI)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); eraseInstruction(SI); NumMemSetInfer++; @@ -1020,11 +977,6 @@ cast(cpyDest)->setAlignment(srcAlign); } - // Drop any cached information about the call, because we may have changed - // its dependence information by changing its parameter. - if (MD) - MD->removeInstruction(C); - // Update AA metadata // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be // handled here, but combineMetadata doesn't support them yet @@ -1073,21 +1025,11 @@ // // TODO: If the code between M and MDep is transparent to the destination "c", // then we could still perform the xform by moving M up to the first memcpy. - if (EnableMemorySSA) { - // TODO: It would be sufficient to check the MDep source up to the memcpy - // size of M, rather than MDep. - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), - MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) - return false; - } else { - // NOTE: This is conservative, it will stop on any read from the source loc, - // not just the defining memcpy. - MemDepResult SourceDep = - MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, - M->getIterator(), M->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; - } + // TODO: It would be sufficient to check the MDep source up to the memcpy + // size of M, rather than MDep. + if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M))) + return false; // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate @@ -1114,12 +1056,10 @@ MDep->getRawSource(), MDep->getSourceAlign(), M->getLength(), M->isVolatile()); - if (MSSAU) { - assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(M))); - auto *LastDef = cast(MSSAU->getMemorySSA()->getMemoryAccess(M)); - auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); - MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - } + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(M))); + auto *LastDef = cast(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); // Remove the instruction we're replacing. eraseInstruction(M); @@ -1155,24 +1095,13 @@ LocationSize::precise(1)))) return false; - if (EnableMemorySSA) { - // We know that dst up to src_size is not written. We now need to make sure - // that dst up to dst_size is not accessed. (If we did not move the memset, - // checking for reads would be sufficient.) - if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet), - MSSA->getMemoryAccess(MemSet), - MSSA->getMemoryAccess(MemCpy))) { - return false; - } - } else { - // We have already checked that dst up to src_size is not accessed. We - // need to make sure that there are no accesses up to dst_size either. - MemDepResult DstDepInfo = MD->getPointerDependencyFrom( - MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(), - MemCpy->getParent()); - if (DstDepInfo.getInst() != MemSet) - return false; - } + // We know that dst up to src_size is not written. We now need to make sure + // that dst up to dst_size is not accessed. (If we did not move the memset, + // checking for reads would be sufficient.) + if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet), + MSSA->getMemoryAccess(MemSet), + MSSA->getMemoryAccess(MemCpy))) + return false; // Use the same i8* dest as the memcpy, killing the memset dest if different. Value *Dest = MemCpy->getRawDest(); @@ -1219,18 +1148,16 @@ SrcSize), MemSet->getOperand(1), MemsetLen, MaybeAlign(Align)); - if (MSSAU) { - assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && - "MemCpy must be a MemoryDef"); - // The new memset is inserted after the memcpy, but it is known that its - // defining access is the memset about to be removed which immediately - // precedes the memcpy. - auto *LastDef = - cast(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); - auto *NewAccess = MSSAU->createMemoryAccessBefore( - NewMemSet, LastDef->getDefiningAccess(), LastDef); - MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - } + assert(isa(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && + "MemCpy must be a MemoryDef"); + // The new memset is inserted after the memcpy, but it is known that its + // defining access is the memset about to be removed which immediately + // precedes the memcpy. + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessBefore( + NewMemSet, LastDef->getDefiningAccess(), LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); eraseInstruction(MemSet); return true; @@ -1238,23 +1165,8 @@ /// Determine whether the instruction has undefined content for the given Size, /// either because it was freshly alloca'd or started its lifetime. -static bool hasUndefContents(Instruction *I, Value *Size) { - if (isa(I)) - return true; - - if (ConstantInt *CSize = dyn_cast(Size)) { - if (IntrinsicInst *II = dyn_cast(I)) - if (II->getIntrinsicID() == Intrinsic::lifetime_start) - if (ConstantInt *LTSize = dyn_cast(II->getArgOperand(0))) - if (LTSize->getZExtValue() >= CSize->getZExtValue()) - return true; - } - - return false; -} - -static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, - MemoryDef *Def, Value *Size) { +static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, + MemoryDef *Def, Value *Size) { if (MSSA->isLiveOnEntryDef(Def)) return isa(getUnderlyingObject(V)); @@ -1328,19 +1240,12 @@ // easily represent this location, we use the full 0..CopySize range. MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy); bool CanReduceSize = false; - if (EnableMemorySSA) { - MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); - MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - MemSetAccess->getDefiningAccess(), MemCpyLoc); - if (auto *MD = dyn_cast(Clobber)) - if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize)) - CanReduceSize = true; - } else { - MemDepResult DepInfo = MD->getPointerDependencyFrom( - MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent()); - if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize)) + MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + MemSetAccess->getDefiningAccess(), MemCpyLoc); + if (auto *MD = dyn_cast(Clobber)) + if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize)) CanReduceSize = true; - } if (!CanReduceSize) return false; @@ -1352,12 +1257,10 @@ Instruction *NewM = Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), CopySize, MaybeAlign(MemCpy->getDestAlignment())); - if (MSSAU) { - auto *LastDef = - cast(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); - auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); - MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - } + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)); + auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); return true; } @@ -1387,149 +1290,88 @@ Instruction *NewM = Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), MaybeAlign(M->getDestAlignment()), false); - if (MSSAU) { - auto *LastDef = - cast(MSSAU->getMemorySSA()->getMemoryAccess(M)); - auto *NewAccess = - MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); - MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); - } + auto *LastDef = + cast(MSSAU->getMemorySSA()->getMemoryAccess(M)); + auto *NewAccess = + MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef); + MSSAU->insertDef(cast(NewAccess), /*RenameUses=*/true); eraseInstruction(M); ++NumCpyToSet; return true; } - if (EnableMemorySSA) { - MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); - MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); - MemoryLocation DestLoc = MemoryLocation::getForDest(M); - const MemoryAccess *DestClobber = - MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); - - // Try to turn a partially redundant memset + memcpy into - // memcpy + smaller memset. We don't need the memcpy size for this. - // The memcpy most post-dom the memset, so limit this to the same basic - // block. A non-local generalization is likely not worthwhile. - if (auto *MD = dyn_cast(DestClobber)) - if (auto *MDep = dyn_cast_or_null(MD->getMemoryInst())) - if (DestClobber->getBlock() == M->getParent()) - if (processMemSetMemCpyDependence(M, MDep)) - return true; - - MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( - AnyClobber, MemoryLocation::getForSource(M)); - - // There are four possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE. - // b) call-memcpy xform for return slot optimization. - // c) memcpy from freshly alloca'd space or space that has just started - // its lifetime copies undefined data, and we can therefore eliminate - // the memcpy in favor of the data that was already at the destination. - // d) memcpy from a just-memset'd source can be turned into memset. - if (auto *MD = dyn_cast(SrcClobber)) { - if (Instruction *MI = MD->getMemoryInst()) { - if (ConstantInt *CopySize = dyn_cast(M->getLength())) { - if (auto *C = dyn_cast(MI)) { - // The memcpy must post-dom the call. Limit to the same block for - // now. Additionally, we need to ensure that there are no accesses - // to dest between the call and the memcpy. Accesses to src will be - // checked by performCallSlotOptzn(). - // TODO: Support non-local call-slot optimization? - if (C->getParent() == M->getParent() && - !accessedBetween(*AA, DestLoc, MD, MA)) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), Alignment, - C)) { - LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" - << " call: " << *C << "\n" - << " memcpy: " << *M << "\n"); - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - } - } - } - if (auto *MDep = dyn_cast(MI)) - return processMemCpyMemCpyDependence(M, MDep); - if (auto *MDep = dyn_cast(MI)) { - if (performMemCpyToMemSetOptzn(M, MDep)) { - LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); - eraseInstruction(M); - ++NumCpyToSet; - return true; - } - } - } - - if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) { - LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n"); - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - } - } else { - MemDepResult DepInfo = MD->getDependency(M); - - // Try to turn a partially redundant memset + memcpy into - // memcpy + smaller memset. We don't need the memcpy size for this. - if (DepInfo.isClobber()) - if (MemSetInst *MDep = dyn_cast(DepInfo.getInst())) + MemoryUseOrDef *MA = MSSA->getMemoryAccess(M); + MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA); + MemoryLocation DestLoc = MemoryLocation::getForDest(M); + const MemoryAccess *DestClobber = + MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc); + + // Try to turn a partially redundant memset + memcpy into + // memcpy + smaller memset. We don't need the memcpy size for this. + // The memcpy most post-dom the memset, so limit this to the same basic + // block. A non-local generalization is likely not worthwhile. + if (auto *MD = dyn_cast(DestClobber)) + if (auto *MDep = dyn_cast_or_null(MD->getMemoryInst())) + if (DestClobber->getBlock() == M->getParent()) if (processMemSetMemCpyDependence(M, MDep)) return true; - // There are four possible optimizations we can do for memcpy: - // a) memcpy-memcpy xform which exposes redundance for DSE. - // b) call-memcpy xform for return slot optimization. - // c) memcpy from freshly alloca'd space or space that has just started - // its lifetime copies undefined data, and we can therefore eliminate - // the memcpy in favor of the data that was already at the destination. - // d) memcpy from a just-memset'd source can be turned into memset. - if (ConstantInt *CopySize = dyn_cast(M->getLength())) { - if (DepInfo.isClobber()) { - if (CallInst *C = dyn_cast(DepInfo.getInst())) { - // FIXME: Can we pass in either of dest/src alignment here instead - // of conservatively taking the minimum? - Align Alignment = std::min(M->getDestAlign().valueOrOne(), - M->getSourceAlign().valueOrOne()); - if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), Alignment, C)) { - eraseInstruction(M); - ++NumMemCpyInstr; - return true; + MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess( + AnyClobber, MemoryLocation::getForSource(M)); + + // There are four possible optimizations we can do for memcpy: + // a) memcpy-memcpy xform which exposes redundance for DSE. + // b) call-memcpy xform for return slot optimization. + // c) memcpy from freshly alloca'd space or space that has just started + // its lifetime copies undefined data, and we can therefore eliminate + // the memcpy in favor of the data that was already at the destination. + // d) memcpy from a just-memset'd source can be turned into memset. + if (auto *MD = dyn_cast(SrcClobber)) { + if (Instruction *MI = MD->getMemoryInst()) { + if (ConstantInt *CopySize = dyn_cast(M->getLength())) { + if (auto *C = dyn_cast(MI)) { + // The memcpy must post-dom the call. Limit to the same block for + // now. Additionally, we need to ensure that there are no accesses + // to dest between the call and the memcpy. Accesses to src will be + // checked by performCallSlotOptzn(). + // TODO: Support non-local call-slot optimization? + if (C->getParent() == M->getParent() && + !accessedBetween(*AA, DestLoc, MD, MA)) { + // FIXME: Can we pass in either of dest/src alignment here instead + // of conservatively taking the minimum? + Align Alignment = std::min(M->getDestAlign().valueOrOne(), + M->getSourceAlign().valueOrOne()); + if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), Alignment, C)) { + LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n" + << " call: " << *C << "\n" + << " memcpy: " << *M << "\n"); + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } } } } - } - - MemoryLocation SrcLoc = MemoryLocation::getForSource(M); - MemDepResult SrcDepInfo = MD->getPointerDependencyFrom( - SrcLoc, true, M->getIterator(), M->getParent()); - - if (SrcDepInfo.isClobber()) { - if (MemCpyInst *MDep = dyn_cast(SrcDepInfo.getInst())) + if (auto *MDep = dyn_cast(MI)) return processMemCpyMemCpyDependence(M, MDep); - } else if (SrcDepInfo.isDef()) { - if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) { - eraseInstruction(M); - ++NumMemCpyInstr; - return true; - } - } - - if (SrcDepInfo.isClobber()) - if (MemSetInst *MDep = dyn_cast(SrcDepInfo.getInst())) + if (auto *MDep = dyn_cast(MI)) { if (performMemCpyToMemSetOptzn(M, MDep)) { + LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n"); eraseInstruction(M); ++NumCpyToSet; return true; } + } + } + + if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) { + LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n"); + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } } return false; @@ -1559,11 +1401,6 @@ // For MemorySSA nothing really changes (except that memcpy may imply stricter // aliasing guarantees). - // MemDep may have over conservative information about this instruction, just - // conservatively flush it from the cache. - if (MD) - MD->removeInstruction(M); - ++NumMoveToCpy; return true; } @@ -1576,22 +1413,14 @@ Type *ByValTy = cast(ByValArg->getType())->getElementType(); uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize)); + MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); + if (!CallAccess) + return false; MemCpyInst *MDep = nullptr; - if (EnableMemorySSA) { - MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); - if (!CallAccess) - return false; - MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( - CallAccess->getDefiningAccess(), Loc); - if (auto *MD = dyn_cast(Clobber)) - MDep = dyn_cast_or_null(MD->getMemoryInst()); - } else { - MemDepResult DepInfo = MD->getPointerDependencyFrom( - Loc, true, CB.getIterator(), CB.getParent()); - if (!DepInfo.isClobber()) - return false; - MDep = dyn_cast(DepInfo.getInst()); - } + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + CallAccess->getDefiningAccess(), Loc); + if (auto *MD = dyn_cast(Clobber)) + MDep = dyn_cast_or_null(MD->getMemoryInst()); // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by // a memcpy, see if we can byval from the source of the memcpy instead of the @@ -1629,19 +1458,9 @@ // *b = 42; // foo(*a) // It would be invalid to transform the second memcpy into foo(*b). - if (EnableMemorySSA) { - if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), - MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) - return false; - } else { - // NOTE: This is conservative, it will stop on any read from the source loc, - // not just the defining memcpy. - MemDepResult SourceDep = MD->getPointerDependencyFrom( - MemoryLocation::getForSource(MDep), false, - CB.getIterator(), MDep->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; - } + if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) + return false; Value *TmpCast = MDep->getSource(); if (MDep->getSource()->getType() != ByValArg->getType()) { @@ -1708,43 +1527,34 @@ } PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { - auto *MD = !EnableMemorySSA ? &AM.getResult(F) - : AM.getCachedResult(F); auto &TLI = AM.getResult(F); auto *AA = &AM.getResult(F); auto *AC = &AM.getResult(F); auto *DT = &AM.getResult(F); - auto *MSSA = EnableMemorySSA ? &AM.getResult(F) - : AM.getCachedResult(F); + auto *MSSA = &AM.getResult(F); - bool MadeChange = - runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr); + bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA()); if (!MadeChange) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet(); PA.preserve(); - if (MD) - PA.preserve(); - if (MSSA) - PA.preserve(); + PA.preserve(); return PA; } -bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, - TargetLibraryInfo *TLI_, AliasAnalysis *AA_, - AssumptionCache *AC_, DominatorTree *DT_, - MemorySSA *MSSA_) { +bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_, + AliasAnalysis *AA_, AssumptionCache *AC_, + DominatorTree *DT_, MemorySSA *MSSA_) { bool MadeChange = false; - MD = MD_; TLI = TLI_; AA = AA_; AC = AC_; DT = DT_; MSSA = MSSA_; MemorySSAUpdater MSSAU_(MSSA_); - MSSAU = MSSA_ ? &MSSAU_ : nullptr; + MSSAU = &MSSAU_; // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. @@ -1757,10 +1567,9 @@ MadeChange = true; } - if (MSSA_ && VerifyMemorySSA) + if (VerifyMemorySSA) MSSA_->verifyMemorySSA(); - MD = nullptr; return MadeChange; } @@ -1769,17 +1578,11 @@ if (skipFunction(F)) return false; - auto *MDWP = !EnableMemorySSA - ? &getAnalysis() - : getAnalysisIfAvailable(); auto *TLI = &getAnalysis().getTLI(F); auto *AA = &getAnalysis().getAAResults(); auto *AC = &getAnalysis().getAssumptionCache(F); auto *DT = &getAnalysis().getDomTree(); - auto *MSSAWP = EnableMemorySSA - ? &getAnalysis() - : getAnalysisIfAvailable(); + auto *MSSA = &getAnalysis().getMSSA(); - return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT, - MSSAWP ? &MSSAWP->getMSSA() : nullptr); + return Impl.runImpl(F, TLI, AA, AC, DT, MSSA); } Index: llvm/test/Analysis/BasicAA/phi-values-usage.ll =================================================================== --- llvm/test/Analysis/BasicAA/phi-values-usage.ll +++ llvm/test/Analysis/BasicAA/phi-values-usage.ll @@ -1,16 +1,16 @@ -; RUN: opt -debug-pass=Executions -phi-values -memcpyopt -instcombine -disable-output < %s -enable-new-pm=0 -enable-memcpyopt-memoryssa=0 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-MEMCPY -; RUN: opt -debug-pass=Executions -memdep -instcombine -disable-output < %s -enable-new-pm=0 2>&1 | FileCheck %s -check-prefix=CHECK -; RUN: opt -debug-pass-manager -aa-pipeline=basic-aa -passes=memcpyopt,instcombine -disable-output -enable-memcpyopt-memoryssa=0 < %s 2>&1 | FileCheck %s -check-prefixes=NPM +; RUN: opt -debug-pass=Executions -phi-values -memcpyopt -instcombine -disable-output < %s -enable-new-pm=0 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-MEMCPY +; RUN: opt -debug-pass=Executions -phi-values -memoryssa -instcombine -disable-output < %s -enable-new-pm=0 2>&1 | FileCheck %s -check-prefix=CHECK +; RUN: opt -debug-pass-manager -aa-pipeline=basic-aa -passes='require,memcpyopt,instcombine' -disable-output < %s 2>&1 | FileCheck %s -check-prefixes=NPM ; Check that phi values is not run when it's not already available, and that ; basicaa is not freed after a pass that preserves CFG, as it preserves CFG. ; CHECK: Executing Pass 'Phi Values Analysis' ; CHECK: Executing Pass 'Basic Alias Analysis (stateless AA impl)' -; CHECK: Executing Pass 'Memory Dependence Analysis' +; CHECK: Executing Pass 'Memory SSA' ; CHECK-MEMCPY: Executing Pass 'MemCpy Optimization' ; CHECK-MEMCPY-DAG: Freeing Pass 'MemCpy Optimization' -; CHECK-DAG: Freeing Pass 'Memory Dependence Analysis' +; CHECK-DAG: Freeing Pass 'Memory SSA' ; CHECK-DAG: Freeing Pass 'Phi Values Analysis' ; CHECK-NOT: Executing Pass 'Phi Values Analysis' ; CHECK-NOT: Executing Pass 'Basic Alias Analysis (stateless AA impl)' @@ -18,7 +18,7 @@ ; NPM-DAG: Running analysis: PhiValuesAnalysis ; NPM-DAG: Running analysis: BasicAA -; NPM-DAG: Running analysis: MemoryDependenceAnalysis +; NPM-DAG: Running analysis: MemorySSA ; NPM: Running pass: MemCpyOptPass ; NPM-NOT: Invalidating analysis ; NPM: Running pass: InstCombinePass Index: llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll +++ llvm/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -memcpyopt -dse -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -basic-aa -memcpyopt -dse -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -basic-aa -memcpyopt -dse -S -verify-memoryssa | FileCheck %s ; PR2077 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" Index: llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll +++ llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -basic-aa -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" %a = type { i32 } Index: llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll +++ llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -basic-aa -memcpyopt -S -verify-memoryssa | FileCheck %s ; PR10067 ; Make sure the call+copy isn't optimized in such a way that ; %ret ends up with the wrong value. Index: llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll +++ llvm/test/Transforms/MemCpyOpt/aggregate-type-crash.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -memcpyopt -S -o - < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -memcpyopt -S -o - < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -memcpyopt -S -o - < %s -verify-memoryssa | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" Index: llvm/test/Transforms/MemCpyOpt/align.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/align.ll +++ llvm/test/Transforms/MemCpyOpt/align.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -S -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -S -basic-aa -memcpyopt -verify-memoryssa | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind Index: llvm/test/Transforms/MemCpyOpt/atomic.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/atomic.ll +++ llvm/test/Transforms/MemCpyOpt/atomic.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -basic-aa -memcpyopt -S < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -basic-aa -memcpyopt -S < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -basic-aa -memcpyopt -S < %s -verify-memoryssa | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.0" Index: llvm/test/Transforms/MemCpyOpt/byval-readnone.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/byval-readnone.ll +++ llvm/test/Transforms/MemCpyOpt/byval-readnone.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s %struct = type { i16 } Index: llvm/test/Transforms/MemCpyOpt/callslot.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/callslot.ll +++ llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA -; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA +; RUN: opt -S -memcpyopt < %s -verify-memoryssa | FileCheck %s define i8 @read_dest_between_call_and_memcpy() { ; CHECK-LABEL: @read_dest_between_call_and_memcpy( @@ -26,25 +25,15 @@ } define i8 @read_src_between_call_and_memcpy() { -; NO_MSSA-LABEL: @read_src_between_call_and_memcpy( -; NO_MSSA-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 -; NO_MSSA-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 -; NO_MSSA-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* -; NO_MSSA-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* -; NO_MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) -; NO_MSSA-NEXT: [[X:%.*]] = load i8, i8* [[SRC_I8]], align 1 -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 16, i1 false) -; NO_MSSA-NEXT: ret i8 [[X]] -; -; MSSA-LABEL: @read_src_between_call_and_memcpy( -; MSSA-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 -; MSSA-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 -; MSSA-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* -; MSSA-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* -; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) -; MSSA-NEXT: [[X:%.*]] = load i8, i8* [[SRC_I8]], align 1 -; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST_I8]], i8 0, i64 16, i1 false) -; MSSA-NEXT: ret i8 [[X]] +; CHECK-LABEL: @read_src_between_call_and_memcpy( +; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[DEST_I8:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* +; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) +; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[SRC_I8]], align 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST_I8]], i8 0, i64 16, i1 false) +; CHECK-NEXT: ret i8 [[X]] ; %dest = alloca [16 x i8] %src = alloca [16 x i8] @@ -103,7 +92,7 @@ ; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRC_I8]], i8 0, i64 16, i1 false) -; CHECK-NEXT: call void @may_throw() [[ATTR2:#.*]] +; CHECK-NEXT: call void @may_throw() #[[ATTR2:[0-9]+]] ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DEST_I8:%.*]], i8 0, i64 16, i1 false) ; CHECK-NEXT: ret void ; @@ -123,7 +112,7 @@ ; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 ; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8]] to [8 x i8]* ; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [8 x i8]* [[DEST_I81]] to i8* -; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) [[ATTR3:#.*]] +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -163,7 +152,7 @@ ; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 ; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8]] to [8 x i8]* ; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [8 x i8]* [[DEST_I81]] to i8* -; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) [[ATTR3]] +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) #[[ATTR3]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -183,7 +172,7 @@ ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* ; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I8]]) ; CHECK-NEXT: [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* -; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) [[ATTR4:#.*]] +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) #[[ATTR4:[0-9]+]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -204,7 +193,7 @@ ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* ; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I8]]) ; CHECK-NEXT: [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* -; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) [[ATTR5:#.*]] +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] @@ -226,7 +215,7 @@ ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [16 x i8]* [[SRC]] to i8* ; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I8]]) ; CHECK-NEXT: [[DEST1:%.*]] = bitcast [16 x i8]* [[DEST]] to i8* -; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) [[ATTR0:#.*]] +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST1]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8] Index: llvm/test/Transforms/MemCpyOpt/callslot_aa.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/callslot_aa.ll +++ llvm/test/Transforms/MemCpyOpt/callslot_aa.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -S -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -S -basic-aa -memcpyopt -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %T = type { i64, i64 } Index: llvm/test/Transforms/MemCpyOpt/callslot_deref.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/callslot_deref.ll +++ llvm/test/Transforms/MemCpyOpt/callslot_deref.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -S -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -S -basic-aa -memcpyopt -verify-memoryssa | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) unnamed_addr nounwind Index: llvm/test/Transforms/MemCpyOpt/callslot_throw.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/callslot_throw.ll +++ llvm/test/Transforms/MemCpyOpt/callslot_throw.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -S -memcpyopt < %s -verify-memoryssa | FileCheck %s declare void @may_throw(i32* nocapture %x) define void @test1(i32* nocapture noalias dereferenceable(4) %x) { Index: llvm/test/Transforms/MemCpyOpt/capturing-func.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/capturing-func.ll +++ llvm/test/Transforms/MemCpyOpt/capturing-func.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -basic-aa -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e" Index: llvm/test/Transforms/MemCpyOpt/crash.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/crash.ll +++ llvm/test/Transforms/MemCpyOpt/crash.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -S -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -S -basic-aa -memcpyopt -verify-memoryssa | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "armv7-eabi" Index: llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -memcpyopt -S < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -memcpyopt -S < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -memcpyopt -S < %s -verify-memoryssa | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/Transforms/MemCpyOpt/form-memset.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/form-memset.ll +++ llvm/test/Transforms/MemCpyOpt/form-memset.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s ; All the stores in this example should be merged into a single memset. Index: llvm/test/Transforms/MemCpyOpt/invariant.start.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/invariant.start.ll +++ llvm/test/Transforms/MemCpyOpt/invariant.start.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; MemCpy optimizations should take place even in presence of invariant.start -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO_MSSA -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA +; RUN: opt < %s -basic-aa -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -14,25 +13,16 @@ declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly -; FIXME: The invariant.start does not modify %P. ; The intermediate alloca and one of the memcpy's should be eliminated, the ; other should be transformed to a memmove. define void @test1(i8* %P, i8* %Q) nounwind { -; NO_MSSA-LABEL: @test1( -; NO_MSSA-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0:%.*]], align 16 -; NO_MSSA-NEXT: [[R:%.*]] = bitcast %0* [[MEMTMP]] to i8* -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[R]], i8* align 16 [[P:%.*]], i32 32, i1 false) -; NO_MSSA-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[P]]) -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[R]], i32 32, i1 false) -; NO_MSSA-NEXT: ret void -; -; MSSA-LABEL: @test1( -; MSSA-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0:%.*]], align 16 -; MSSA-NEXT: [[R:%.*]] = bitcast %0* [[MEMTMP]] to i8* -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[R]], i8* align 16 [[P:%.*]], i32 32, i1 false) -; MSSA-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[P]]) -; MSSA-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[P]], i32 32, i1 false) -; MSSA-NEXT: ret void +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[MEMTMP:%.*]] = alloca [[TMP0:%.*]], align 16 +; CHECK-NEXT: [[R:%.*]] = bitcast %0* [[MEMTMP]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[R]], i8* align 16 [[P:%.*]], i32 32, i1 false) +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* [[P]]) +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* align 16 [[Q:%.*]], i8* align 16 [[P]], i32 32, i1 false) +; CHECK-NEXT: ret void ; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* Index: llvm/test/Transforms/MemCpyOpt/lifetime.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/lifetime.ll +++ llvm/test/Transforms/MemCpyOpt/lifetime.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -O2 -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -O2 -S -verify-memoryssa | FileCheck %s ; performCallSlotOptzn in MemCpy should not exchange the calls to ; @llvm.lifetime.start and @llvm.memcpy. Index: llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/load-store-to-memcpy.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -basic-aa -scoped-noalias-aa -memcpyopt -S %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -basic-aa -scoped-noalias-aa -memcpyopt -S %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -basic-aa -scoped-noalias-aa -memcpyopt -S %s -verify-memoryssa | FileCheck %s %T = type { i8, i32 } Index: llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll +++ llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -S < %s -basic-aa -memcpyopt -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -S < %s -basic-aa -memcpyopt -verify-memoryssa | FileCheck %s ; target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" Index: llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy-invoke-memcpy.ll @@ -1,40 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefix=NO_MSSA -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefix=MSSA +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s ; Test memcpy-memcpy dependencies across invoke edges. ; Test that memcpyopt works across the non-unwind edge of an invoke. -; TODO: Not supported yet. define hidden void @test_normal(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; NO_MSSA-LABEL: @test_normal( -; NO_MSSA-NEXT: entry: -; NO_MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) -; NO_MSSA-NEXT: invoke void @invoke_me() -; NO_MSSA-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] -; NO_MSSA: lpad: -; NO_MSSA-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } -; NO_MSSA-NEXT: catch i8* null -; NO_MSSA-NEXT: ret void -; NO_MSSA: try.cont: -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) -; NO_MSSA-NEXT: ret void -; -; MSSA-LABEL: @test_normal( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) -; MSSA-NEXT: invoke void @invoke_me() -; MSSA-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] -; MSSA: lpad: -; MSSA-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } -; MSSA-NEXT: catch i8* null -; MSSA-NEXT: ret void -; MSSA: try.cont: -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false) -; MSSA-NEXT: ret void +; CHECK-LABEL: @test_normal( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; CHECK-NEXT: invoke void @invoke_me() +; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: lpad: +; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* null +; CHECK-NEXT: ret void +; CHECK: try.cont: +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false) +; CHECK-NEXT: ret void ; entry: %temp = alloca i8, i32 64 @@ -53,36 +37,21 @@ } ; Test that memcpyopt works across the unwind edge of an invoke. -; TODO: Not supported yet. define hidden void @test_unwind(i8* noalias %dst, i8* %src) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; NO_MSSA-LABEL: @test_unwind( -; NO_MSSA-NEXT: entry: -; NO_MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) -; NO_MSSA-NEXT: invoke void @invoke_me() -; NO_MSSA-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] -; NO_MSSA: lpad: -; NO_MSSA-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } -; NO_MSSA-NEXT: catch i8* null -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) -; NO_MSSA-NEXT: ret void -; NO_MSSA: try.cont: -; NO_MSSA-NEXT: ret void -; -; MSSA-LABEL: @test_unwind( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) -; MSSA-NEXT: invoke void @invoke_me() -; MSSA-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] -; MSSA: lpad: -; MSSA-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } -; MSSA-NEXT: catch i8* null -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false) -; MSSA-NEXT: ret void -; MSSA: try.cont: -; MSSA-NEXT: ret void +; CHECK-LABEL: @test_unwind( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; CHECK-NEXT: invoke void @invoke_me() +; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]] +; CHECK: lpad: +; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* null +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false) +; CHECK-NEXT: ret void +; CHECK: try.cont: +; CHECK-NEXT: ret void ; entry: %temp = alloca i8, i32 64 Index: llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -basic-aa -memcpyopt -instcombine -S < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -basic-aa -memcpyopt -instcombine -S < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -basic-aa -memcpyopt -instcombine -S < %s -verify-memoryssa | FileCheck %s target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy-to-memset.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -memcpyopt -S < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -memcpyopt -S < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -memcpyopt -S < %s -verify-memoryssa | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind Index: llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy-undef.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefixes=CHECK,NO-MSSA -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefixes=CHECK,MSSA +; RUN: opt < %s -basic-aa -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -101,20 +100,13 @@ ; lifetime.start on part of alloca, copy in range. define void @test_lifetime_partial_alias_3(i8* noalias %dst) { -; NO-MSSA-LABEL: @test_lifetime_partial_alias_3( -; NO-MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 -; NO-MSSA-NEXT: [[A_I8:%.*]] = bitcast [16 x i8]* [[A]] to i8* -; NO-MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A_I8]]) -; NO-MSSA-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[A_I8]], i64 8 -; NO-MSSA-NEXT: ret void -; -; MSSA-LABEL: @test_lifetime_partial_alias_3( -; MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 -; MSSA-NEXT: [[A_I8:%.*]] = bitcast [16 x i8]* [[A]] to i8* -; MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A_I8]]) -; MSSA-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[A_I8]], i64 8 -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST:%.*]], i8* [[GEP]], i64 4, i1 false) -; MSSA-NEXT: ret void +; CHECK-LABEL: @test_lifetime_partial_alias_3( +; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[A_I8:%.*]] = bitcast [16 x i8]* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A_I8]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[A_I8]], i64 8 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST:%.*]], i8* [[GEP]], i64 4, i1 false) +; CHECK-NEXT: ret void ; %a = alloca [16 x i8] %a.i8 = bitcast [16 x i8]* %a to i8* @@ -126,20 +118,13 @@ ; lifetime.start on part of alloca, copy out of range. define void @test_lifetime_partial_alias_4(i8* noalias %dst) { -; NO-MSSA-LABEL: @test_lifetime_partial_alias_4( -; NO-MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 -; NO-MSSA-NEXT: [[A_I8:%.*]] = bitcast [16 x i8]* [[A]] to i8* -; NO-MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A_I8]]) -; NO-MSSA-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[A_I8]], i64 8 -; NO-MSSA-NEXT: ret void -; -; MSSA-LABEL: @test_lifetime_partial_alias_4( -; MSSA-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 -; MSSA-NEXT: [[A_I8:%.*]] = bitcast [16 x i8]* [[A]] to i8* -; MSSA-NEXT: call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A_I8]]) -; MSSA-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[A_I8]], i64 8 -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST:%.*]], i8* [[GEP]], i64 8, i1 false) -; MSSA-NEXT: ret void +; CHECK-LABEL: @test_lifetime_partial_alias_4( +; CHECK-NEXT: [[A:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[A_I8:%.*]] = bitcast [16 x i8]* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A_I8]]) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[A_I8]], i64 8 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST:%.*]], i8* [[GEP]], i64 8, i1 false) +; CHECK-NEXT: ret void ; %a = alloca [16 x i8] %a.i8 = bitcast [16 x i8]* %a to i8* Index: llvm/test/Transforms/MemCpyOpt/memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -memcpyopt -dse -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -basic-aa -memcpyopt -dse -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -basic-aa -memcpyopt -dse -S -verify-memoryssa | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" @@ -8,6 +7,8 @@ %0 = type { x86_fp80, x86_fp80 } %1 = type { i32, i32 } +; Check that one of the memcpy's are removed. +;; FIXME: PR 8643 We should be able to eliminate the last memcpy here. define void @test1(%0* sret(%0) %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: @@ -32,10 +33,6 @@ %agg.result21 = bitcast %0* %agg.result to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %agg.result21, i8* align 16 %tmp219, i32 32, i1 false) ret void - -; Check that one of the memcpy's are removed. -;; FIXME: PR 8643 We should be able to eliminate the last memcpy here. - } declare void @ccoshl(%0* nocapture sret(%0), x86_fp80, x86_fp80) nounwind Index: llvm/test/Transforms/MemCpyOpt/memmove.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memmove.ll +++ llvm/test/Transforms/MemCpyOpt/memmove.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -basic-aa -memcpyopt -S -verify-memoryssa | FileCheck %s ; These memmoves should get optimized to memcpys. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" Index: llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll +++ llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -memcpyopt -S %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -memcpyopt -S %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -memcpyopt -S %s -verify-memoryssa | FileCheck %s ; memset -> memcpy forwarding, if memcpy is larger than memset, but trailing ; bytes are known to be undef. Index: llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll +++ llvm/test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -basic-aa -memcpyopt -S %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -basic-aa -memcpyopt -S %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -basic-aa -memcpyopt -S %s -verify-memoryssa | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll +++ llvm/test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -memcpyopt -S %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -memcpyopt -S %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -memcpyopt -S %s -verify-memoryssa | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll +++ llvm/test/Transforms/MemCpyOpt/merge-into-memset.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefix=NO_MSSA -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefix=MSSA +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -8,33 +7,19 @@ ; which will be deleted. define void @foo(i1 %c, i8* %d, i8* %e, i8* %f) { -; NO_MSSA-LABEL: @foo( -; NO_MSSA-NEXT: entry: -; NO_MSSA-NEXT: [[TMP:%.*]] = alloca [50 x i8], align 8 -; NO_MSSA-NEXT: [[TMP4:%.*]] = bitcast [50 x i8]* [[TMP]] to i8* -; NO_MSSA-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 1 -; NO_MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull [[D:%.*]], i8 0, i64 10, i1 false) -; NO_MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 11, i1 false) -; NO_MSSA-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] -; NO_MSSA: if.then: -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[F:%.*]], i8* nonnull align 8 [[TMP4]], i64 30, i1 false) -; NO_MSSA-NEXT: br label [[EXIT]] -; NO_MSSA: exit: -; NO_MSSA-NEXT: ret void -; -; MSSA-LABEL: @foo( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[TMP:%.*]] = alloca [50 x i8], align 8 -; MSSA-NEXT: [[TMP4:%.*]] = bitcast [50 x i8]* [[TMP]] to i8* -; MSSA-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 1 -; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull [[D:%.*]], i8 0, i64 10, i1 false) -; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 11, i1 false) -; MSSA-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] -; MSSA: if.then: -; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[F:%.*]], i8 0, i64 11, i1 false) -; MSSA-NEXT: br label [[EXIT]] -; MSSA: exit: -; MSSA-NEXT: ret void +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP:%.*]] = alloca [50 x i8], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast [50 x i8]* [[TMP]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i64 1 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull [[D:%.*]], i8 0, i64 10, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP4]], i8 0, i64 11, i1 false) +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[F:%.*]], i8 0, i64 11, i1 false) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: %tmp = alloca [50 x i8], align 8 Index: llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll +++ llvm/test/Transforms/MemCpyOpt/mixed-sizes.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefix=NO_MSSA -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefix=MSSA +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s ; Handle memcpy-memcpy dependencies of differing sizes correctly. target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -9,44 +8,24 @@ ; memcpy with a larger size from the same address. define i32 @foo(i1 %z) { -; NO_MSSA-LABEL: @foo( -; NO_MSSA-NEXT: entry: -; NO_MSSA-NEXT: [[A:%.*]] = alloca [10 x i32], align 4 -; NO_MSSA-NEXT: [[S:%.*]] = alloca [10 x i32], align 4 -; NO_MSSA-NEXT: [[TMP0:%.*]] = bitcast [10 x i32]* [[A]] to i8* -; NO_MSSA-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[S]] to i8* -; NO_MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[TMP1]], i8 0, i64 40, i1 false) -; NO_MSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i64 0, i64 0 -; NO_MSSA-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -; NO_MSSA-NEXT: [[SCEVGEP:%.*]] = getelementptr [10 x i32], [10 x i32]* [[S]], i64 0, i64 1 -; NO_MSSA-NEXT: [[SCEVGEP7:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; NO_MSSA-NEXT: br i1 [[Z:%.*]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC7_1:%.*]] -; NO_MSSA: for.body3.lr.ph: -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 17179869180, i1 false) -; NO_MSSA-NEXT: br label [[FOR_INC7_1]] -; NO_MSSA: for.inc7.1: -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 4, i1 false) -; NO_MSSA-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; NO_MSSA-NEXT: ret i32 [[TMP2]] -; -; MSSA-LABEL: @foo( -; MSSA-NEXT: entry: -; MSSA-NEXT: [[A:%.*]] = alloca [10 x i32], align 4 -; MSSA-NEXT: [[S:%.*]] = alloca [10 x i32], align 4 -; MSSA-NEXT: [[TMP0:%.*]] = bitcast [10 x i32]* [[A]] to i8* -; MSSA-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[S]] to i8* -; MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[TMP1]], i8 0, i64 40, i1 false) -; MSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i64 0, i64 0 -; MSSA-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -; MSSA-NEXT: [[SCEVGEP:%.*]] = getelementptr [10 x i32], [10 x i32]* [[S]], i64 0, i64 1 -; MSSA-NEXT: [[SCEVGEP7:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; MSSA-NEXT: br i1 [[Z:%.*]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC7_1:%.*]] -; MSSA: for.body3.lr.ph: -; MSSA-NEXT: br label [[FOR_INC7_1]] -; MSSA: for.inc7.1: -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 4, i1 false) -; MSSA-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; MSSA-NEXT: ret i32 [[TMP2]] +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [10 x i32], align 4 +; CHECK-NEXT: [[S:%.*]] = alloca [10 x i32], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [10 x i32]* [[A]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 16 [[TMP1]], i8 0, i64 40, i1 false) +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[A]], i64 0, i64 0 +; CHECK-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [10 x i32], [10 x i32]* [[S]], i64 0, i64 1 +; CHECK-NEXT: [[SCEVGEP7:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; CHECK-NEXT: br i1 [[Z:%.*]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC7_1:%.*]] +; CHECK: for.body3.lr.ph: +; CHECK-NEXT: br label [[FOR_INC7_1]] +; CHECK: for.inc7.1: +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 [[SCEVGEP7]], i64 4, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: ret i32 [[TMP2]] ; entry: %a = alloca [10 x i32] Index: llvm/test/Transforms/MemCpyOpt/non-integral.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/non-integral.ll +++ llvm/test/Transforms/MemCpyOpt/non-integral.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -memcpyopt -S < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -memcpyopt -S < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -memcpyopt -S < %s -verify-memoryssa | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128-ni:1" Index: llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/nonlocal-memcpy-memcpy.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s --check-prefix=NO_MSSA -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s --check-prefix=MSSA +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s ; Test whether memcpy-memcpy dependence is optimized across ; basic blocks (conditional branches and invokes). @@ -22,29 +21,17 @@ ; to copy directly from the original source rather than from the temporary. define void @wobble(i8* noalias %dst, i8* %src, i1 %some_condition) { -; NO_MSSA-LABEL: @wobble( -; NO_MSSA-NEXT: bb: -; NO_MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) -; NO_MSSA-NEXT: br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]] -; NO_MSSA: out: -; NO_MSSA-NEXT: call void @qux() -; NO_MSSA-NEXT: unreachable -; NO_MSSA: more: -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[TEMP]], i64 64, i1 false) -; NO_MSSA-NEXT: ret void -; -; MSSA-LABEL: @wobble( -; MSSA-NEXT: bb: -; MSSA-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) -; MSSA-NEXT: br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]] -; MSSA: out: -; MSSA-NEXT: call void @qux() -; MSSA-NEXT: unreachable -; MSSA: more: -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false) -; MSSA-NEXT: ret void +; CHECK-LABEL: @wobble( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TEMP:%.*]] = alloca i8, i32 64, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TEMP]], i8* nonnull align 8 [[SRC:%.*]], i64 64, i1 false) +; CHECK-NEXT: br i1 [[SOME_CONDITION:%.*]], label [[MORE:%.*]], label [[OUT:%.*]] +; CHECK: out: +; CHECK-NEXT: call void @qux() +; CHECK-NEXT: unreachable +; CHECK: more: +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[DST:%.*]], i8* align 8 [[SRC]], i64 64, i1 false) +; CHECK-NEXT: ret void ; bb: %temp = alloca i8, i32 64 @@ -65,45 +52,25 @@ ; source rather than from the temporary. define i32 @foo(i1 %t3) { -; NO_MSSA-LABEL: @foo( -; NO_MSSA-NEXT: bb: -; NO_MSSA-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -; NO_MSSA-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 -; NO_MSSA-NEXT: [[S1:%.*]] = bitcast %struct.s* [[S]] to i8* -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) -; NO_MSSA-NEXT: br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]] -; NO_MSSA: bb4: -; NO_MSSA-NEXT: [[T5:%.*]] = bitcast %struct.s* [[T]] to i8* -; NO_MSSA-NEXT: [[S6:%.*]] = bitcast %struct.s* [[S]] to i8* -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T5]], i8* align 4 [[S6]], i64 8, i1 false) -; NO_MSSA-NEXT: br label [[BB7]] -; NO_MSSA: bb7: -; NO_MSSA-NEXT: [[T8:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 -; NO_MSSA-NEXT: [[T9:%.*]] = load i32, i32* [[T8]], align 4 -; NO_MSSA-NEXT: [[T10:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 -; NO_MSSA-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4 -; NO_MSSA-NEXT: [[T12:%.*]] = add i32 [[T9]], [[T11]] -; NO_MSSA-NEXT: ret i32 [[T12]] -; -; MSSA-LABEL: @foo( -; MSSA-NEXT: bb: -; MSSA-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -; MSSA-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 -; MSSA-NEXT: [[S1:%.*]] = bitcast %struct.s* [[S]] to i8* -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) -; MSSA-NEXT: br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]] -; MSSA: bb4: -; MSSA-NEXT: [[T5:%.*]] = bitcast %struct.s* [[T]] to i8* -; MSSA-NEXT: [[S6:%.*]] = bitcast %struct.s* [[S]] to i8* -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T5]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) -; MSSA-NEXT: br label [[BB7]] -; MSSA: bb7: -; MSSA-NEXT: [[T8:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 -; MSSA-NEXT: [[T9:%.*]] = load i32, i32* [[T8]], align 4 -; MSSA-NEXT: [[T10:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 -; MSSA-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4 -; MSSA-NEXT: [[T12:%.*]] = add i32 [[T9]], [[T11]] -; MSSA-NEXT: ret i32 [[T12]] +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 +; CHECK-NEXT: [[S1:%.*]] = bitcast %struct.s* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S1]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) +; CHECK-NEXT: br i1 [[T3:%.*]], label [[BB4:%.*]], label [[BB7:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[T5:%.*]] = bitcast %struct.s* [[T]] to i8* +; CHECK-NEXT: [[S6:%.*]] = bitcast %struct.s* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T5]], i8* align 4 bitcast (%struct.s* @s_foo to i8*), i64 8, i1 false) +; CHECK-NEXT: br label [[BB7]] +; CHECK: bb7: +; CHECK-NEXT: [[T8:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 +; CHECK-NEXT: [[T9:%.*]] = load i32, i32* [[T8]], align 4 +; CHECK-NEXT: [[T10:%.*]] = getelementptr [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 +; CHECK-NEXT: [[T11:%.*]] = load i32, i32* [[T10]], align 4 +; CHECK-NEXT: [[T12:%.*]] = add i32 [[T9]], [[T11]] +; CHECK-NEXT: ret i32 [[T12]] ; bb: %s = alloca %struct.s, align 4 @@ -134,69 +101,37 @@ ; pattern. define i32 @baz(i1 %t5) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; NO_MSSA-LABEL: @baz( -; NO_MSSA-NEXT: bb: -; NO_MSSA-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -; NO_MSSA-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 -; NO_MSSA-NEXT: [[S3:%.*]] = bitcast %struct.s* [[S]] to i8* -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S3]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) -; NO_MSSA-NEXT: br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]] -; NO_MSSA: bb6: -; NO_MSSA-NEXT: invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) -; NO_MSSA-NEXT: to label [[BB25:%.*]] unwind label [[BB9:%.*]] -; NO_MSSA: bb9: -; NO_MSSA-NEXT: [[T10:%.*]] = landingpad { i8*, i32 } -; NO_MSSA-NEXT: catch i8* null -; NO_MSSA-NEXT: br label [[BB13:%.*]] -; NO_MSSA: bb13: -; NO_MSSA-NEXT: [[T15:%.*]] = call i8* @__cxa_begin_catch(i8* null) -; NO_MSSA-NEXT: br label [[BB23:%.*]] -; NO_MSSA: bb22: -; NO_MSSA-NEXT: [[T23:%.*]] = bitcast %struct.s* [[T]] to i8* -; NO_MSSA-NEXT: [[S24:%.*]] = bitcast %struct.s* [[S]] to i8* -; NO_MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T23]], i8* align 4 [[S24]], i64 8, i1 false) -; NO_MSSA-NEXT: br label [[BB23]] -; NO_MSSA: bb23: -; NO_MSSA-NEXT: [[T17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 -; NO_MSSA-NEXT: [[T18:%.*]] = load i32, i32* [[T17]], align 4 -; NO_MSSA-NEXT: [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 -; NO_MSSA-NEXT: [[T20:%.*]] = load i32, i32* [[T19]], align 4 -; NO_MSSA-NEXT: [[T21:%.*]] = add nsw i32 [[T18]], [[T20]] -; NO_MSSA-NEXT: ret i32 [[T21]] -; NO_MSSA: bb25: -; NO_MSSA-NEXT: unreachable -; -; MSSA-LABEL: @baz( -; MSSA-NEXT: bb: -; MSSA-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -; MSSA-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 -; MSSA-NEXT: [[S3:%.*]] = bitcast %struct.s* [[S]] to i8* -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S3]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) -; MSSA-NEXT: br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]] -; MSSA: bb6: -; MSSA-NEXT: invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) -; MSSA-NEXT: to label [[BB25:%.*]] unwind label [[BB9:%.*]] -; MSSA: bb9: -; MSSA-NEXT: [[T10:%.*]] = landingpad { i8*, i32 } -; MSSA-NEXT: catch i8* null -; MSSA-NEXT: br label [[BB13:%.*]] -; MSSA: bb13: -; MSSA-NEXT: [[T15:%.*]] = call i8* @__cxa_begin_catch(i8* null) -; MSSA-NEXT: br label [[BB23:%.*]] -; MSSA: bb22: -; MSSA-NEXT: [[T23:%.*]] = bitcast %struct.s* [[T]] to i8* -; MSSA-NEXT: [[S24:%.*]] = bitcast %struct.s* [[S]] to i8* -; MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T23]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) -; MSSA-NEXT: br label [[BB23]] -; MSSA: bb23: -; MSSA-NEXT: [[T17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 -; MSSA-NEXT: [[T18:%.*]] = load i32, i32* [[T17]], align 4 -; MSSA-NEXT: [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 -; MSSA-NEXT: [[T20:%.*]] = load i32, i32* [[T19]], align 4 -; MSSA-NEXT: [[T21:%.*]] = add nsw i32 [[T18]], [[T20]] -; MSSA-NEXT: ret i32 [[T21]] -; MSSA: bb25: -; MSSA-NEXT: unreachable +; CHECK-LABEL: @baz( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +; CHECK-NEXT: [[T:%.*]] = alloca [[STRUCT_S]], align 4 +; CHECK-NEXT: [[S3:%.*]] = bitcast %struct.s* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[S3]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) +; CHECK-NEXT: br i1 [[T5:%.*]], label [[BB6:%.*]], label [[BB22:%.*]] +; CHECK: bb6: +; CHECK-NEXT: invoke void @__cxa_throw(i8* null, i8* bitcast (i8** @i to i8*), i8* null) +; CHECK-NEXT: to label [[BB25:%.*]] unwind label [[BB9:%.*]] +; CHECK: bb9: +; CHECK-NEXT: [[T10:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* null +; CHECK-NEXT: br label [[BB13:%.*]] +; CHECK: bb13: +; CHECK-NEXT: [[T15:%.*]] = call i8* @__cxa_begin_catch(i8* null) +; CHECK-NEXT: br label [[BB23:%.*]] +; CHECK: bb22: +; CHECK-NEXT: [[T23:%.*]] = bitcast %struct.s* [[T]] to i8* +; CHECK-NEXT: [[S24:%.*]] = bitcast %struct.s* [[S]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[T23]], i8* align 4 bitcast (%struct.s* @s_baz to i8*), i64 8, i1 false) +; CHECK-NEXT: br label [[BB23]] +; CHECK: bb23: +; CHECK-NEXT: [[T17:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 0 +; CHECK-NEXT: [[T18:%.*]] = load i32, i32* [[T17]], align 4 +; CHECK-NEXT: [[T19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.s* [[T]], i32 0, i32 1 +; CHECK-NEXT: [[T20:%.*]] = load i32, i32* [[T19]], align 4 +; CHECK-NEXT: [[T21:%.*]] = add nsw i32 [[T18]], [[T20]] +; CHECK-NEXT: ret i32 [[T21]] +; CHECK: bb25: +; CHECK-NEXT: unreachable ; bb: %s = alloca %struct.s, align 4 Index: llvm/test/Transforms/MemCpyOpt/nontemporal.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/nontemporal.ll +++ llvm/test/Transforms/MemCpyOpt/nontemporal.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/Transforms/MemCpyOpt/pr29105.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/pr29105.ll +++ llvm/test/Transforms/MemCpyOpt/pr29105.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -memcpyopt -instcombine -S %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -memcpyopt -instcombine -S %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -memcpyopt -instcombine -S %s -verify-memoryssa | FileCheck %s %Foo = type { [2048 x i64] } ; Make sure that all mempcy calls are converted to memset calls, or removed. Index: llvm/test/Transforms/MemCpyOpt/pr37967.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/pr37967.ll +++ llvm/test/Transforms/MemCpyOpt/pr37967.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -debugify -memcpyopt -check-debugify -S < %s 2>&1 -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -debugify -memcpyopt -check-debugify -S < %s 2>&1 -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -debugify -memcpyopt -check-debugify -S < %s 2>&1 -verify-memoryssa | FileCheck %s ; CHECK: CheckModuleDebugify: PASS Index: llvm/test/Transforms/MemCpyOpt/process_store.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/process_store.ll +++ llvm/test/Transforms/MemCpyOpt/process_store.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -memcpyopt -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -S -memcpyopt -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -S -memcpyopt -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/Transforms/MemCpyOpt/profitable-memset.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/profitable-memset.ll +++ llvm/test/Transforms/MemCpyOpt/profitable-memset.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" Index: llvm/test/Transforms/MemCpyOpt/smaller.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/smaller.ll +++ llvm/test/Transforms/MemCpyOpt/smaller.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -memcpyopt -S < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -memcpyopt -S < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s -; RUN: opt -passes=memcpyopt -S < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -passes=memcpyopt -S < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -memcpyopt -S < %s -verify-memoryssa | FileCheck %s +; RUN: opt -passes=memcpyopt -S < %s -verify-memoryssa | FileCheck %s ; rdar://8875553 ; Memcpyopt shouldn't optimize the second memcpy using the first Index: llvm/test/Transforms/MemCpyOpt/sret.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/sret.ll +++ llvm/test/Transforms/MemCpyOpt/sret.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -basic-aa -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -basic-aa -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" Index: llvm/test/Transforms/MemCpyOpt/stackrestore.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/stackrestore.ll +++ llvm/test/Transforms/MemCpyOpt/stackrestore.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -S -memcpyopt < %s -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -S -memcpyopt < %s -verify-memoryssa | FileCheck %s ; PR40118: BasicAA didn't realize that stackrestore ends the lifetime of ; unescaped dynamic allocas, such as those that might come from inalloca. Index: llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll +++ llvm/test/Transforms/MemCpyOpt/store-to-memset-is-nonzero-type.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S < %s -memcpyopt -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt -S < %s -memcpyopt -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt -S < %s -memcpyopt -verify-memoryssa | FileCheck %s ; Array Index: llvm/test/Transforms/MemCpyOpt/store-to-memset.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/store-to-memset.ll +++ llvm/test/Transforms/MemCpyOpt/store-to-memset.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-grtev4-linux-gnu" Index: llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-memcpy.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @test(i8* %src, i64 %size) { Index: llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-uninit.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-uninit.ll +++ llvm/test/Transforms/MemCpyOpt/variable-sized-memcpy-uninit.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @test(i64 %size) { Index: llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/variable-sized-memset-memcpy.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -S -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @test(i8* %src, i8 %c, i64 %size) { Index: llvm/test/Transforms/MemCpyOpt/vscale-memset.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/vscale-memset.ll +++ llvm/test/Transforms/MemCpyOpt/vscale-memset.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -memcpyopt -dce -S -enable-memcpyopt-memoryssa=0 | FileCheck %s -; RUN: opt < %s -memcpyopt -dce -S -enable-memcpyopt-memoryssa=1 -verify-memoryssa | FileCheck %s +; RUN: opt < %s -memcpyopt -dce -S -verify-memoryssa | FileCheck %s ; Negative test ; Check this test is not transformed into memset, or cause a compiler warning