Index: include/llvm-c/Transforms/Scalar.h =================================================================== --- include/llvm-c/Transforms/Scalar.h +++ include/llvm-c/Transforms/Scalar.h @@ -89,6 +89,9 @@ /** See llvm::createMemCpyOptPass function. */ void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM); +/** See llvm::createMemCpyOptPass function. */ +void LLVMAddMemCpyMemSSAOptPass(LLVMPassManagerRef PM); + /** See llvm::createPartiallyInlineLibCallsPass function. */ void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM); Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -237,6 +237,7 @@ void initializeMachineTraceMetricsPass(PassRegistry&); void initializeMachineVerifierPassPass(PassRegistry&); void initializeMemCpyOptLegacyPassPass(PassRegistry&); +void initializeMemCpyOptMemSSALegacyPassPass(PassRegistry&); void initializeMemDepPrinterPass(PassRegistry&); void initializeMemDerefPrinterPass(PassRegistry&); void initializeMemoryDependenceWrapperPassPass(PassRegistry&); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -351,7 +351,7 @@ // MemCpyOpt - This pass performs optimizations related to eliminating memcpy // calls and/or combining multiple stores into memset's. // -FunctionPass *createMemCpyOptPass(); +FunctionPass *createMemCpyOptPass(bool = false); //===----------------------------------------------------------------------===// // Index: include/llvm/Transforms/Scalar/MemCpyOptimizer.h =================================================================== --- include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -29,7 +29,11 @@ namespace llvm { +class MemorySSA; + class MemCpyOptPass : public PassInfoMixin { + bool UseMemorySSA; + MemorySSA *MSSA = nullptr; MemoryDependenceResults *MD = nullptr; TargetLibraryInfo *TLI = nullptr; std::function LookupAliasAnalysis; @@ -37,10 +41,10 @@ std::function LookupDomTree; public: - MemCpyOptPass() {} + MemCpyOptPass(bool UseMSSA) : UseMemorySSA(UseMSSA) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Glue for the old PM. - bool runImpl(Function &F, MemoryDependenceResults *MD_, + bool runImpl(Function &F, MemorySSA *MSSA_, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, std::function LookupAliasAnalysis_, std::function LookupAssumptionCache_, @@ -51,6 +55,7 @@ bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); bool processMemCpy(MemCpyInst *M); + bool processMemCpyMSSA(MemCpyInst *); bool processMemMove(MemMoveInst *M); bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, uint64_t cpyLen, unsigned cpyAlign, CallInst *C); @@ -61,6 +66,8 @@ Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); + void eraseInstruction(Instruction *); + bool iterateOnFunction(Function &F); }; } Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -158,7 +158,8 @@ FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("lowerinvoke", LowerInvokePass()) FUNCTION_PASS("mem2reg", PromotePass()) -FUNCTION_PASS("memcpyopt", MemCpyOptPass()) +FUNCTION_PASS("memcpyopt", MemCpyOptPass(false)) +FUNCTION_PASS("memcpyopt-mssa", MemCpyOptPass(true)) FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -25,11 +25,15 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/MemorySSA.h" #include using namespace llvm; #define DEBUG_TYPE "memcpyopt" +static cl::opt MCOMSSA("mco-mssa", cl::init(false), cl::Hidden, + cl::desc("Force MemCpyOpt to use MemorySSA")); + STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); @@ -119,6 +123,26 @@ return true; } +// \brief Locates the nearest MemoryAccess that clobbers \p MemLoc and strictly +// dominates \p StartAbove +static MemoryAccess *getCMA(MemorySSA *MSSA, MemoryUseOrDef *StartAbove, + const MemoryLocation &MemLoc) { + MemoryAccess *Start = StartAbove->getDefiningAccess(); + return MSSA->getWalker()->getClobberingMemoryAccess(Start, MemLoc); +} + +// \brief Returns the clobber between [\p EndAt, \p StartAbove) that clobbers \p +// MemLoc, otherwise returns \p EndAt. For this query to make sense, \p EndAt +// must dominate \p StartAbove. +static MemoryAccess *getCMABetween(const MemoryLocation &MemLoc, + MemoryAccess *EndAt, + MemoryUseOrDef *StartAbove, + MemorySSA *MSSA) { + assert(MSSA->dominates(EndAt, StartAbove) && + "EndAt must dominate StartAbove."); + MemoryAccess *Clob = getCMA(MSSA, StartAbove, MemLoc); + return MSSA->dominates(EndAt, Clob) ? Clob : EndAt; +} /// Represents a range of memset'd bytes with the ByteVal value. /// This allows us to analyze stores like: @@ -140,6 +164,9 @@ /// range. Value *StartPtr; + /// StartPtrUser - The insertion point for the resulting memset. + Instruction *StartPtrUser; + /// Alignment - The known alignment of the first store. unsigned Alignment; @@ -252,6 +279,7 @@ R.Start = Start; R.End = End; R.StartPtr = Ptr; + R.StartPtrUser = Inst; R.Alignment = Alignment; R.TheStores.push_back(Inst); return; @@ -274,6 +302,7 @@ if (Start < I->Start) { I->Start = Start; I->StartPtr = Ptr; + I->StartPtrUser = Inst; I->Alignment = Alignment; } @@ -299,12 +328,17 @@ //===----------------------------------------------------------------------===// namespace { - class MemCpyOptLegacyPass : public FunctionPass { + template + class MemCpyOptLegacyCommon : public FunctionPass { MemCpyOptPass Impl; public: static char ID; // Pass identification, replacement for typeid - MemCpyOptLegacyPass() : FunctionPass(ID) { - initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry()); + MemCpyOptLegacyCommon() : FunctionPass(ID), Impl(UseMSSA) { + if (UseMSSA) + initializeMemCpyOptMemSSALegacyPassPass( + *PassRegistry::getPassRegistry()); + else + initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -320,6 +354,11 @@ AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + + if (UseMSSA) { + AU.addRequired(); + AU.addPreserved(); + } } // Helper functions @@ -339,11 +378,19 @@ bool iterateOnFunction(Function &F); }; - char MemCpyOptLegacyPass::ID = 0; + template<> char MemCpyOptLegacyCommon::ID = 0; + template<> char MemCpyOptLegacyCommon::ID = 0; } +using MemCpyOptLegacyPass = MemCpyOptLegacyCommon; +using MemCpyOptMemSSALegacyPass = MemCpyOptLegacyCommon; + /// The public interface to this file... -FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); } +FunctionPass *llvm::createMemCpyOptPass(bool UseMSSA) { + if (UseMSSA || MCOMSSA) + return new MemCpyOptMemSSALegacyPass(); + return new MemCpyOptLegacyPass(); +} INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) @@ -356,6 +403,39 @@ INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) +INITIALIZE_PASS_BEGIN(MemCpyOptMemSSALegacyPass, "memcpyopt-mssa", + "MemCpy Optimization (Memory SSA)", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_END(MemCpyOptMemSSALegacyPass, "memcpyopt-mssa", + "MemCpy Optimization (Memory SSA)", false, false) + +void MemCpyOptPass::eraseInstruction(Instruction *I) { + assert(MD); + MD->removeInstruction(I); + if (UseMemorySSA) + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + MSSA->removeMemoryAccess(MA); + I->eraseFromParent(); +} + +static MemoryUseOrDef *replaceMemoryAccess(MemorySSA &MSSA, Instruction *Old, + Instruction *New) { + MemoryUseOrDef *OldAcc = MSSA.getMemoryAccess(Old); + MemoryUseOrDef *NewAcc = + MSSA.createMemoryAccessBefore(New, OldAcc->getDefiningAccess(), OldAcc); + assert((isa(OldAcc) && isa(NewAcc)) || + (isa(OldAcc) && isa(NewAcc)) && + "Must replace with equivalent MSSA access type."); + OldAcc->replaceAllUsesWith(NewAcc); + return NewAcc; +} + /// When scanning forward over instructions, we look for some other patterns to /// fold away. In particular, this looks for stores to neighboring locations of /// memory. If it sees enough consecutive ones, it attempts to merge them @@ -373,6 +453,7 @@ BasicBlock::iterator BI(StartInst); for (++BI; !isa(BI); ++BI) { + // TODO: walk along bb AccessList instead. if (!isa(BI) && !isa(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: @@ -423,11 +504,6 @@ // interesting as a small compile-time optimization. Ranges.addInst(0, StartInst); - // If we create any memsets, we put it right before the first instruction that - // isn't part of the memset block. This ensure that the memset is dominated - // by any addressing instruction needed by the start of the block. - IRBuilder<> Builder(&*BI); - // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. Instruction *AMemSet = nullptr; @@ -451,8 +527,16 @@ Alignment = DL.getABITypeAlignment(EltType); } - AMemSet = - Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); + // Insert memset just before the store in this range that uses + // Range.StartPtr. + IRBuilder<> Builder(Range.StartPtrUser); + AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start, + Alignment); + + if (UseMemorySSA) { + replaceMemoryAccess(*MSSA, Range.StartPtrUser, AMemSet); + MSSA->removeMemoryAccess(MSSA->getMemoryAccess(Range.StartPtrUser)); + } DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI : Range.TheStores) @@ -464,8 +548,7 @@ // Zap all the stores. for (Instruction *SI : Range.TheStores) { - MD->removeInstruction(SI); - SI->eraseFromParent(); + eraseInstruction(SI); } ++NumMemSetInfer; } @@ -485,11 +568,16 @@ return std::min(StoreAlign, LoadAlign); } -// This method try to lift a store instruction before position P. +// This method try to lift a store instruction before position demarcated by +// memory instruction P. // It will lift the store and its argument + that anything that // may alias with these. // The method returns true if it was successful. -static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P) { +static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, + MemorySSA *MSSA = nullptr) { + MemoryDef *PAcc = + MSSA ? dyn_cast_or_null(MSSA->getMemoryAccess(P)) : nullptr; + assert((!MSSA || PAcc) && "P must be a memory def-ing instruction."); // If the store alias this position, early bail out. MemoryLocation StoreLoc = MemoryLocation::get(SI); if (AA.getModRefInfo(P, StoreLoc) != MRI_NoModRef) @@ -503,7 +591,7 @@ Args.insert(Ptr); // Instruction to lift before P. - SmallVector ToLift; + SmallVector ToLift{SI}; // Memory locations of lifted instructions. SmallVector MemLocs; @@ -564,6 +652,11 @@ for (auto *I : reverse(ToLift)) { DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n"); I->moveBefore(P); + if (MSSA) { + if (MemoryUseOrDef *A = MSSA->getMemoryAccess(I)) { + MSSA->spliceMemoryAccessAbove(PAcc, A); + } + } } return true; @@ -586,22 +679,32 @@ // Load to store forwarding can be interpreted as memcpy. if (LoadInst *LI = dyn_cast(SI->getOperand(0))) { if (LI->isSimple() && LI->hasOneUse() && + // TODO: Make non-local if LI doms SI and SI post-doms LI. LI->getParent() == SI->getParent()) { + MemoryUse *LUse = + UseMemorySSA ? cast(MSSA->getMemoryAccess(LI)) : nullptr; auto *T = LI->getType(); if (T->isAggregateType()) { AliasAnalysis &AA = LookupAliasAnalysis(); MemoryLocation LoadLoc = MemoryLocation::get(LI); + Instruction *P = nullptr; - // We use alias analysis to check if an instruction may store to - // the memory we load from in between the load and the store. If - // such an instruction is found, we try to promote there instead - // of at the store position. - Instruction *P = SI; - for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { - if (AA.getModRefInfo(&I, LoadLoc) & MRI_Mod) { - P = &I; - break; + if (UseMemorySSA) { + if (auto *LClob = dyn_cast( + getCMA(MSSA, MSSA->getMemoryAccess(SI), LoadLoc))) + P = MSSA->dominates(LUse, LClob) ? LClob->getMemoryInst() : SI; + } else { + // We use alias analysis to check if an instruction may store to + // the memory we load from in between the load and the store. If + // such an instruction is found, we try to promote there instead + // of at the store position. + P = SI; + for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { + if (AA.getModRefInfo(&I, LoadLoc) & MRI_Mod) { + P = &I; + break; + } } } @@ -610,7 +713,7 @@ // position if nothing alias the store memory after this and the store // destination is not in the range. if (P && P != SI) { - if (!moveUp(AA, SI, P)) + if (!moveUp(AA, SI, P, MSSA)) P = nullptr; } @@ -637,14 +740,14 @@ M = Builder.CreateMemCpy(SI->getPointerOperand(), LI->getPointerOperand(), Size, Align, SI->isVolatile()); + if (UseMemorySSA) + replaceMemoryAccess(*MSSA, SI, M); DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); - MD->removeInstruction(SI); - SI->eraseFromParent(); - MD->removeInstruction(LI); - LI->eraseFromParent(); + eraseInstruction(SI); + eraseInstruction(LI); ++NumMemCpyInstr; // Make sure we do not invalidate the iterator. @@ -656,10 +759,19 @@ // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than // a memcpy. - MemDepResult ldep = MD->getDependency(LI); CallInst *C = nullptr; - if (ldep.isClobber() && !isa(ldep.getInst())) - C = dyn_cast(ldep.getInst()); + if (UseMemorySSA) { + if (MemoryUseOrDef *LoadClob = dyn_cast( + MSSA->getWalker()->getClobberingMemoryAccess(LUse))) + // TODO: This funnels to performCallSlotOptzn, whose transform is + // valid as long as LI doms SI and SI post-doms LI. + if (LoadClob->getBlock() == SI->getParent()) + C = dyn_cast_or_null(LoadClob->getMemoryInst()); + } else { + MemDepResult ldep = MD->getDependency(LI); + if (ldep.isClobber() && !isa(ldep.getInst())) + C = dyn_cast(ldep.getInst()); + } if (C) { // Check that nothing touches the dest of the "copy" between @@ -690,10 +802,8 @@ DL.getTypeStoreSize(SI->getOperand(0)->getType()), findCommonAlignment(DL, SI, LI), C); if (changed) { - MD->removeInstruction(SI); - SI->eraseFromParent(); - MD->removeInstruction(LI); - LI->eraseFromParent(); + eraseInstruction(SI); + eraseInstruction(LI); ++NumMemCpyInstr; return true; } @@ -727,11 +837,12 @@ IRBuilder<> Builder(SI); auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, Align, SI->isVolatile()); + if (UseMemorySSA) + replaceMemoryAccess(*MSSA, SI, M); DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); - MD->removeInstruction(SI); - SI->eraseFromParent(); + eraseInstruction(SI); NumMemSetInfer++; // Make sure we do not invalidate the iterator. @@ -944,10 +1055,7 @@ LLVMContext::MD_invariant_group}; combineMetadata(C, cpy, KnownIDs); - // Remove the memcpy. - MD->removeInstruction(cpy); - ++NumMemCpyInstr; - + // memcpy removal left to caller. return true; } @@ -989,11 +1097,18 @@ // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. - MemDepResult SourceDep = - MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, - M->getIterator(), M->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; + if (UseMemorySSA) { + MemoryUseOrDef *DepAcc = MSSA->getMemoryAccess(MDep); + if (getCMABetween(MemoryLocation::getForSource(MDep), DepAcc, + MSSA->getMemoryAccess(M), MSSA) != DepAcc) + return false; + } else { + MemDepResult SourceDep = + MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false, + M->getIterator(), M->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + } // If the dest of the second might alias the source of the first, then the // source and dest might overlap. We still want to eliminate the intermediate @@ -1013,16 +1128,17 @@ unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); IRBuilder<> Builder(M); - if (UseMemMove) - Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), - Align, M->isVolatile()); - else - Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), - Align, M->isVolatile()); + auto *New = + UseMemMove + ? Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), + M->getLength(), Align, M->isVolatile()) + : Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), + M->getLength(), Align, M->isVolatile()); + if (UseMemorySSA) + replaceMemoryAccess(*MSSA, M, New); // Remove the instruction we're replacing. - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); ++NumMemCpyInstr; return true; } @@ -1038,8 +1154,8 @@ /// \endcode /// into: /// \code -/// memcpy(dst, src, src_size); /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size); +/// memcpy(dst, src, src_size); /// \endcode bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet) { @@ -1048,11 +1164,29 @@ return false; // Check that there are no other dependencies on the memset destination. - MemDepResult DstDepInfo = - MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false, - MemCpy->getIterator(), MemCpy->getParent()); - if (DstDepInfo.getInst() != MemSet) - return false; + if (UseMemorySSA) { + MemoryUseOrDef *MSAcc = MSSA->getMemoryAccess(MemSet); + MemoryUseOrDef *MemCpyAcc = MSSA->getMemoryAccess(MemCpy); + assert(MSSA->dominates(MSAcc, MemCpyAcc)); + + // TODO: non-local + if (MemCpy->getParent() != MemSet->getParent()) + return false; + + AliasAnalysis &AA = LookupAliasAnalysis(); + using It = MemorySSA::AccessList::iterator; + for (const auto &Acc : make_range(std::next(It(MSAcc)), It(MemCpyAcc))) { + if (AA.getModRefInfo(cast(Acc).getMemoryInst(), + MemoryLocation::getForDest(MemSet)) != MRI_NoModRef) + return false; + } + } else { + MemDepResult DstDepInfo = MD->getPointerDependencyFrom( + MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(), + MemCpy->getParent()); + if (DstDepInfo.getInst() != MemSet) + return false; + } // Use the same i8* dest as the memcpy, killing the memset dest if different. Value *Dest = MemCpy->getRawDest(); @@ -1069,7 +1203,8 @@ if (ConstantInt *SrcSizeC = dyn_cast(SrcSize)) Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); - IRBuilder<> Builder(MemCpy); + // MSSA replacement requires new memset to be inserted next to old. + IRBuilder<> Builder(MemSet); // If the sizes have different types, zext the smaller one. if (DestSize->getType() != SrcSize->getType()) { @@ -1084,11 +1219,12 @@ Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize), ConstantInt::getNullValue(DestSize->getType()), Builder.CreateSub(DestSize, SrcSize)); - Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1), - MemsetLen, Align); + auto *M = Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), + MemSet->getOperand(1), MemsetLen, Align); + if (UseMemorySSA) + replaceMemoryAccess(*MSSA, MemSet, M); - MD->removeInstruction(MemSet); - MemSet->eraseFromParent(); + eraseInstruction(MemSet); return true; } @@ -1123,8 +1259,13 @@ return false; IRBuilder<> Builder(MemCpy); - Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), - CopySize, MemCpy->getAlignment()); + auto *MemSetNew = + Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), + CopySize, MemCpy->getAlignment()); + if (UseMemorySSA) + replaceMemoryAccess(*MSSA, MemCpy, MemSetNew); + eraseInstruction(MemCpy); + ++NumCpyToSet; return true; } @@ -1139,8 +1280,7 @@ // If the source and destination of the memcpy are the same, then zap it. if (M->getSource() == M->getDest()) { - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); return false; } @@ -1151,8 +1291,7 @@ IRBuilder<> Builder(M); Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), M->getAlignment(), false); - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); ++NumCpyToSet; return true; } @@ -1182,8 +1321,8 @@ if (performCallSlotOptzn(M, M->getDest(), M->getSource(), CopySize->getZExtValue(), M->getAlignment(), C)) { - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); + ++NumMemCpyInstr; return true; } } @@ -1210,8 +1349,7 @@ } if (hasUndefContents) { - MD->removeInstruction(M); - M->eraseFromParent(); + eraseInstruction(M); ++NumMemCpyInstr; return true; } @@ -1219,13 +1357,108 @@ if (SrcDepInfo.isClobber()) if (MemSetInst *MDep = dyn_cast(SrcDepInfo.getInst())) - if (performMemCpyToMemSetOptzn(M, MDep)) { - MD->removeInstruction(M); - M->eraseFromParent(); + if (performMemCpyToMemSetOptzn(M, MDep)) + return true; + + return false; +} + +bool MemCpyOptPass::processMemCpyMSSA(MemCpyInst *M) { + if (M->isVolatile()) + return false; + + if (M->getSource() == M->getDest()) { + eraseInstruction(M); + return false; + } + + // If copying from a constant, try to turn the memcpy into a memset. + if (GlobalVariable *GV = dyn_cast(M->getSource())) + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { + IRBuilder<> Builder(M); + auto *MemSet = Builder.CreateMemSet( + M->getRawDest(), ByteVal, M->getLength(), M->getAlignment(), false); + replaceMemoryAccess(*MSSA, M, MemSet); + eraseInstruction(M); ++NumCpyToSet; return true; } + MemoryAccess *DestClob = + getCMA(MSSA, MSSA->getMemoryAccess(M), MemoryLocation::getForDest(M)); + + // TODO: This can be made non-local if M post-doms MDep + if (DestClob->getBlock() == M->getParent()) + if (auto *MUD = dyn_cast(DestClob)) + if (auto *MDep = dyn_cast_or_null(MUD->getMemoryInst())) + if (processMemSetMemCpyDependence(M, MDep)) + return true; + + // The optimizations after this point require the memcpy size. + ConstantInt *CopySize = dyn_cast(M->getLength()); + if (!CopySize) + return false; + + MemoryUseOrDef *MAcc = MSSA->getMemoryAccess(M); + MemoryAccess *SrcClob = getCMA(MSSA, MAcc, MemoryLocation::getForSource(M)); + + if (auto *MUD = dyn_cast(SrcClob)) { + if (auto *C = dyn_cast_or_null(MUD->getMemoryInst())) { + // TODO: Can be made non-local if M post-doms C + if (C->getParent() == M->getParent() && + performCallSlotOptzn(M, M->getDest(), M->getSource(), + CopySize->getZExtValue(), M->getAlignment(), + C)) { + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + } + + // Non-local permitted since processMemCpyMemCpyDependence modifies M only + // and getCMA ensures that MDep doms M. + if (auto *MDep = dyn_cast_or_null(MUD->getMemoryInst())) + if (processMemCpyMemCpyDependence(M, MDep)) + return true; + + if (auto *MDep = dyn_cast_or_null(MUD->getMemoryInst())) + if (performMemCpyToMemSetOptzn(M, MDep)) + return true; + + const DataLayout &DL = M->getParent()->getModule()->getDataLayout(); + // TODO: test these. + bool hasUndefContents = + MSSA->isLiveOnEntryDef(MUD) && + isa(GetUnderlyingObject(M->getRawSource(), DL)); + + // Both MUD and and lifetime_start (if one exists for M's source) dominate + // MAcc. The only way for lifetime_start to imply undef contents is if it + // resides between MUD and MAcc. + for (MemoryUseOrDef *L = + dyn_cast_or_null(MAcc->getDefiningAccess()); + L && L != MUD; + L = dyn_cast_or_null(L->getDefiningAccess())) { + if (auto *II = dyn_cast_or_null(L->getMemoryInst())) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start && + II->getArgOperand(1)->stripPointerCasts() == M->getSource()) { + if (ConstantInt *LTSize = + dyn_cast(II->getArgOperand(0))) { + hasUndefContents |= + LTSize->getZExtValue() >= CopySize->getZExtValue(); + break; + } + } + } + } + + if (hasUndefContents) { + eraseInstruction(M); + ++NumMemCpyInstr; + return true; + } + } + return false; } @@ -1267,16 +1500,28 @@ Value *ByValArg = CS.getArgument(ArgNo); Type *ByValTy = cast(ByValArg->getType())->getElementType(); uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); - MemDepResult DepInfo = MD->getPointerDependencyFrom( - MemoryLocation(ByValArg, ByValSize), true, - CS.getInstruction()->getIterator(), CS.getInstruction()->getParent()); - if (!DepInfo.isClobber()) - return false; + MemoryLocation ByValLoc(ByValArg, ByValSize); + MemCpyInst *MDep = nullptr; + + if (UseMemorySSA) { + if (MemoryUseOrDef *MUD = MSSA->getMemoryAccess(CS.getInstruction())) + if (auto *ByValClob = + dyn_cast(getCMA(MSSA, MUD, ByValLoc))) + // TODO: Non-local: It's sufficient for MDep to dominate CS. + if (ByValClob->getBlock() == CS.getParent()) + MDep = dyn_cast_or_null(ByValClob->getMemoryInst()); + } else { + MemDepResult DepInfo = MD->getPointerDependencyFrom( + ByValLoc, true, CS.getInstruction()->getIterator(), + CS.getInstruction()->getParent()); + if (!DepInfo.isClobber()) + return false; + MDep = dyn_cast(DepInfo.getInst()); + } // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by // a memcpy, see if we can byval from the source of the memcpy instead of the // result. - MemCpyInst *MDep = dyn_cast(DepInfo.getInst()); if (!MDep || MDep->isVolatile() || ByValArg->stripPointerCasts() != MDep->getDest()) return false; @@ -1309,11 +1554,18 @@ // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. - MemDepResult SourceDep = MD->getPointerDependencyFrom( - MemoryLocation::getForSource(MDep), false, - CS.getInstruction()->getIterator(), MDep->getParent()); - if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) - return false; + if (UseMemorySSA) { + MemoryUseOrDef *Dep = MSSA->getMemoryAccess(MDep); + if (getCMABetween(MemoryLocation::getForSource(MDep), Dep, + MSSA->getMemoryAccess(CS.getInstruction()), MSSA) != Dep) + return false; + } else { + MemDepResult SourceDep = MD->getPointerDependencyFrom( + MemoryLocation::getForSource(MDep), false, + CS.getInstruction()->getIterator(), MDep->getParent()); + if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) + return false; + } Value *TmpCast = MDep->getSource(); if (MDep->getSource()->getType() != ByValArg->getType()) @@ -1347,7 +1599,8 @@ else if (MemSetInst *M = dyn_cast(I)) RepeatInstruction = processMemSet(M, BI); else if (MemCpyInst *M = dyn_cast(I)) - RepeatInstruction = processMemCpy(M); + RepeatInstruction = + UseMemorySSA ? processMemCpyMSSA(M) : processMemCpy(M); else if (MemMoveInst *M = dyn_cast(I)) RepeatInstruction = processMemMove(M); else if (auto CS = CallSite(I)) { @@ -1370,6 +1623,8 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { + auto *MSSA = + UseMemorySSA ? &AM.getResult(F).getMSSA() : nullptr; auto &MD = AM.getResult(F); auto &TLI = AM.getResult(F); @@ -1383,22 +1638,28 @@ return AM.getResult(F); }; - bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis, + bool MadeChange = runImpl(F, MSSA, &MD, &TLI, LookupAliasAnalysis, LookupAssumptionCache, LookupDomTree); if (!MadeChange) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); PA.preserve(); + + if (UseMemorySSA) + PA.preserve(); + return PA; } bool MemCpyOptPass::runImpl( - Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, + Function &F, MemorySSA *MSSA_, MemoryDependenceResults *MD_, + TargetLibraryInfo *TLI_, std::function LookupAliasAnalysis_, std::function LookupAssumptionCache_, std::function LookupDomTree_) { bool MadeChange = false; + MSSA = MSSA_; MD = MD_; TLI = TLI_; LookupAliasAnalysis = std::move(LookupAliasAnalysis_); @@ -1418,14 +1679,18 @@ } MD = nullptr; + MSSA = nullptr; return MadeChange; } /// This is the main transformation entry point for a function. -bool MemCpyOptLegacyPass::runOnFunction(Function &F) { +template +bool MemCpyOptLegacyCommon::runOnFunction(Function &F) { if (skipFunction(F)) return false; + auto *MSSA = + UseMSSA ? &getAnalysis().getMSSA() : nullptr; auto *MD = &getAnalysis().getMemDep(); auto *TLI = &getAnalysis().getTLI(); @@ -1439,6 +1704,6 @@ return getAnalysis().getDomTree(); }; - return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupAssumptionCache, - LookupDomTree); + return Impl.runImpl(F, MSSA, MD, TLI, LookupAliasAnalysis, + LookupAssumptionCache, LookupDomTree); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -68,6 +68,7 @@ initializeLowerExpectIntrinsicPass(Registry); initializeLowerGuardIntrinsicLegacyPassPass(Registry); initializeMemCpyOptLegacyPassPass(Registry); + initializeMemCpyOptMemSSALegacyPassPass(Registry); initializeMergedLoadStoreMotionLegacyPassPass(Registry); initializeNaryReassociateLegacyPassPass(Registry); initializePartiallyInlineLibCallsLegacyPassPass(Registry); @@ -179,7 +180,11 @@ } void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) { - unwrap(PM)->add(createMemCpyOptPass()); + unwrap(PM)->add(createMemCpyOptPass(false)); +} + +void LLVMAddMemCpyOptMemSSAPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createMemCpyOptPass(true)); } void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM) { Index: test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll =================================================================== --- test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll +++ test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -tbaa -basicaa -memcpyopt -instcombine < %s | FileCheck %s +; RUN: opt -S -tbaa -basicaa -memcpyopt-mssa -instcombine < %s | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-p:64:64:64" @@ -12,6 +13,11 @@ ; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa !3 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @foo( +; MCO-MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0 +; MCO-MSSA-NEXT: store i8 2, i8* %s, align 1, !tbaa !3 +; MCO-MSSA-NEXT: ret void +; tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2 store i8 2, i8* %s, align 1, !tbaa !1 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i32 1, i1 false), !tbaa !2 Index: test/Transforms/GVN/pr24426.ll =================================================================== --- test/Transforms/GVN/pr24426.ll +++ test/Transforms/GVN/pr24426.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -mldst-motion -gvn -S | FileCheck %s +; RUN: opt < %s -memcpyopt-mssa -mldst-motion -gvn -S | FileCheck %s --check-prefix=MCO-MSSA declare void @check(i8) @@ -14,6 +15,14 @@ ; CHECK-NEXT: call void @check(i8 [[TMP3]]) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test1( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = alloca [10 x i8] +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast [10 x i8]* [[TMP1]] to i8* +; MCO-MSSA-NEXT: call void @write(i8* [[TMP2]]) +; MCO-MSSA-NEXT: [[TMP3:%.*]] = load i8, i8* [[TMP2]] +; MCO-MSSA-NEXT: call void @check(i8 [[TMP3]]) +; MCO-MSSA-NEXT: ret void +; %1 = alloca [10 x i8] %2 = bitcast [10 x i8]* %1 to i8* call void @write(i8* %2) Index: test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll =================================================================== --- test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll +++ test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep "call.*initialize" | not grep memtmp +; RUN: opt < %s -basicaa -memcpyopt-mssa -dse -S | grep "call.*initialize" | not grep memtmp ; PR2077 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32" Index: test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll =================================================================== --- test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll +++ test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy." +; RUN: opt < %s -basicaa -memcpyopt-mssa -S | not grep "call.*memcpy." target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" %a = type { i32 } Index: test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll =================================================================== --- test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll +++ test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s +; RUN: opt < %s -basicaa -memcpyopt-mssa -S | FileCheck %s --check-prefix=MCO-MSSA ; PR10067 ; Make sure the call+copy isn't optimized in such a way that ; %ret ends up with the wrong value. @@ -29,6 +30,22 @@ ; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[GEP1]] ; CHECK-NEXT: ret i32 [[RET]] ; +; MCO-MSSA-LABEL: @foo( +; MCO-MSSA-NEXT: [[X:%.*]] = alloca %struct1, align 8 +; MCO-MSSA-NEXT: [[Y:%.*]] = alloca %struct2, align 8 +; MCO-MSSA-NEXT: call void @bar(%struct1* sret [[X]]) #0 +; MCO-MSSA-NEXT: [[GEPN1:%.*]] = getelementptr inbounds %struct2, %struct2* [[Y]], i32 0, i32 0, i32 0 +; MCO-MSSA-NEXT: store i32 0, i32* [[GEPN1]], align 8 +; MCO-MSSA-NEXT: [[GEPN2:%.*]] = getelementptr inbounds %struct2, %struct2* [[Y]], i32 0, i32 0, i32 1 +; MCO-MSSA-NEXT: store i32 0, i32* [[GEPN2]], align 4 +; MCO-MSSA-NEXT: [[BIT1:%.*]] = bitcast %struct1* [[X]] to i64* +; MCO-MSSA-NEXT: [[BIT2:%.*]] = bitcast %struct2* [[Y]] to i64* +; MCO-MSSA-NEXT: [[LOAD:%.*]] = load i64, i64* [[BIT1]], align 8 +; MCO-MSSA-NEXT: store i64 [[LOAD]], i64* [[BIT2]], align 8 +; MCO-MSSA-NEXT: [[GEP1:%.*]] = getelementptr %struct2, %struct2* [[Y]], i32 0, i32 0, i32 0 +; MCO-MSSA-NEXT: [[RET:%.*]] = load i32, i32* [[GEP1]] +; MCO-MSSA-NEXT: ret i32 [[RET]] +; %x = alloca %struct1, align 8 %y = alloca %struct2, align 8 call void @bar(%struct1* sret %x) nounwind Index: test/Transforms/MemCpyOpt/align.ll =================================================================== --- test/Transforms/MemCpyOpt/align.ll +++ test/Transforms/MemCpyOpt/align.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s +; RUN: opt < %s -S -basicaa -memcpyopt-mssa | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind @@ -11,13 +12,22 @@ define void @foo(i32* %p) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: [[A0:%.*]] = getelementptr i32, i32* %p, i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A0]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 16, i32 4, i1 false) ; CHECK-NEXT: [[A1:%.*]] = getelementptr i32, i32* %p, i64 1 ; CHECK-NEXT: [[A2:%.*]] = getelementptr i32, i32* %p, i64 2 ; CHECK-NEXT: [[A3:%.*]] = getelementptr i32, i32* %p, i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A0]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 16, i32 4, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @foo( +; MCO-MSSA-NEXT: [[A0:%.*]] = getelementptr i32, i32* %p, i64 0 +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast i32* [[A0]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 16, i32 4, i1 false) +; MCO-MSSA-NEXT: [[A1:%.*]] = getelementptr i32, i32* %p, i64 1 +; MCO-MSSA-NEXT: [[A2:%.*]] = getelementptr i32, i32* %p, i64 2 +; MCO-MSSA-NEXT: [[A3:%.*]] = getelementptr i32, i32* %p, i64 3 +; MCO-MSSA-NEXT: ret void +; %a0 = getelementptr i32, i32* %p, i64 0 store i32 0, i32* %a0, align 4 %a1 = getelementptr i32, i32* %p, i64 1 @@ -41,6 +51,15 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A41]], i8 0, i64 4, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @bar( +; MCO-MSSA-NEXT: [[A4:%.*]] = alloca i32, align 8 +; MCO-MSSA-NEXT: [[A8:%.*]] = alloca i32, align 8 +; MCO-MSSA-NEXT: [[A8_CAST:%.*]] = bitcast i32* [[A8]] to i8* +; MCO-MSSA-NEXT: [[A4_CAST:%.*]] = bitcast i32* [[A4]] to i8* +; MCO-MSSA-NEXT: [[A41:%.*]] = bitcast i32* [[A4]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A41]], i8 0, i64 4, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; %a4 = alloca i32, align 4 %a8 = alloca i32, align 8 %a8.cast = bitcast i32* %a8 to i8* Index: test/Transforms/MemCpyOpt/atomic.ll =================================================================== --- test/Transforms/MemCpyOpt/atomic.ll +++ test/Transforms/MemCpyOpt/atomic.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basicaa -memcpyopt -S < %s | FileCheck %s +; RUN: opt -basicaa -memcpyopt-mssa -S < %s | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.0" @@ -22,6 +23,16 @@ ; CHECK-NEXT: call void @otherf(i32* [[GEP2]]) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test1( +; MCO-MSSA-NEXT: [[X:%.*]] = alloca [101 x i32], align 16 +; MCO-MSSA-NEXT: [[BC:%.*]] = bitcast [101 x i32]* [[X]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[BC]], i8 0, i64 400, i32 16, i1 false) +; MCO-MSSA-NEXT: [[GEP1:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* [[X]], i32 0, i32 100 +; MCO-MSSA-NEXT: store atomic i32 0, i32* [[GEP1]] unordered, align 4 +; MCO-MSSA-NEXT: [[GEP2:%.*]] = getelementptr inbounds [101 x i32], [101 x i32]* [[X]], i32 0, i32 0 +; MCO-MSSA-NEXT: call void @otherf(i32* [[GEP2]]) +; MCO-MSSA-NEXT: ret void +; %x = alloca [101 x i32], align 16 %bc = bitcast [101 x i32]* %x to i8* call void @llvm.memset.p0i8.i64(i8* %bc, i8 0, i64 400, i32 16, i1 false) @@ -42,6 +53,14 @@ ; CHECK-NEXT: call void @otherf(i32* nocapture [[NEW]]) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test2( +; MCO-MSSA-NEXT: [[OLD:%.*]] = alloca i32 +; MCO-MSSA-NEXT: [[NEW:%.*]] = alloca i32 +; MCO-MSSA-NEXT: call void @otherf(i32* nocapture [[NEW]]) +; MCO-MSSA-NEXT: store atomic i32 0, i32* @x unordered, align 4 +; MCO-MSSA-NEXT: call void @otherf(i32* nocapture [[NEW]]) +; MCO-MSSA-NEXT: ret void +; %old = alloca i32 %new = alloca i32 call void @otherf(i32* nocapture %old) Index: test/Transforms/MemCpyOpt/callslot_aa.ll =================================================================== --- test/Transforms/MemCpyOpt/callslot_aa.ll +++ test/Transforms/MemCpyOpt/callslot_aa.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s +; RUN: opt < %s -S -basicaa -memcpyopt-mssa | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %T = type { i64, i64 } @@ -11,6 +12,12 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* %src, i64 1, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test( +; MCO-MSSA-NEXT: [[TMP:%.*]] = alloca i8 +; MCO-MSSA-NEXT: [[DST:%.*]] = alloca i8 +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* %src, i64 1, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; %tmp = alloca i8 %dst = alloca i8 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %src, i64 1, i32 8, i1 false), !noalias !2 Index: test/Transforms/MemCpyOpt/callslot_deref.ll =================================================================== --- test/Transforms/MemCpyOpt/callslot_deref.ll +++ test/Transforms/MemCpyOpt/callslot_deref.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s +; RUN: opt < %s -S -basicaa -memcpyopt-mssa | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind @@ -15,6 +16,14 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST12]], i8 0, i64 4096, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @must_remove_memcpy( +; MCO-MSSA-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1 +; MCO-MSSA-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0 +; MCO-MSSA-NEXT: [[DST1:%.*]] = bitcast i8* %dst to [4096 x i8]* +; MCO-MSSA-NEXT: [[DST12:%.*]] = bitcast [4096 x i8]* [[DST1]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[DST12]], i8 0, i64 4096, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; %src = alloca [4096 x i8], align 1 %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0 call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) @@ -32,6 +41,13 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 4096, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @must_not_remove_memcpy( +; MCO-MSSA-NEXT: [[SRC:%.*]] = alloca [4096 x i8], align 1 +; MCO-MSSA-NEXT: [[P:%.*]] = getelementptr inbounds [4096 x i8], [4096 x i8]* [[SRC]], i64 0, i64 0 +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[P]], i8 0, i64 4096, i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 4096, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; %src = alloca [4096 x i8], align 1 %p = getelementptr inbounds [4096 x i8], [4096 x i8]* %src, i64 0, i64 0 call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) Index: test/Transforms/MemCpyOpt/callslot_throw.ll =================================================================== --- test/Transforms/MemCpyOpt/callslot_throw.ll +++ test/Transforms/MemCpyOpt/callslot_throw.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -memcpyopt < %s | FileCheck %s +; RUN: opt -S -memcpyopt-mssa < %s | FileCheck %s --check-prefix=MCO-MSSA declare void @may_throw(i32* nocapture %x) define void @test1(i32* nocapture noalias dereferenceable(4) %x) { @@ -11,6 +12,14 @@ ; CHECK-NEXT: store i32 [[LOAD]], i32* %x, align 4 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test1( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[T:%.*]] = alloca i32, align 4 +; MCO-MSSA-NEXT: call void @may_throw(i32* nonnull [[T]]) +; MCO-MSSA-NEXT: [[LOAD:%.*]] = load i32, i32* [[T]], align 4 +; MCO-MSSA-NEXT: store i32 [[LOAD]], i32* %x, align 4 +; MCO-MSSA-NEXT: ret void +; entry: %t = alloca i32, align 4 call void @may_throw(i32* nonnull %t) @@ -31,6 +40,15 @@ ; CHECK-NEXT: store i32 [[LOAD]], i32* %x, align 4 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test2( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[T:%.*]] = alloca i32, align 4 +; MCO-MSSA-NEXT: call void @may_throw(i32* nonnull [[T]]) #0 +; MCO-MSSA-NEXT: [[LOAD:%.*]] = load i32, i32* [[T]], align 4 +; MCO-MSSA-NEXT: call void @always_throws() +; MCO-MSSA-NEXT: store i32 [[LOAD]], i32* %x, align 4 +; MCO-MSSA-NEXT: ret void +; entry: %t = alloca i32, align 4 call void @may_throw(i32* nonnull %t) nounwind Index: test/Transforms/MemCpyOpt/capturing-func.ll =================================================================== --- test/Transforms/MemCpyOpt/capturing-func.ll +++ test/Transforms/MemCpyOpt/capturing-func.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s +; RUN: opt < %s -basicaa -memcpyopt-mssa -S | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e" @@ -17,6 +18,14 @@ ; CHECK-NEXT: call void @foo(i8* [[PTR1]]) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test( +; MCO-MSSA-NEXT: [[PTR1:%.*]] = alloca i8 +; MCO-MSSA-NEXT: [[PTR2:%.*]] = alloca i8 +; MCO-MSSA-NEXT: call void @foo(i8* [[PTR2]]) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR1]], i8* [[PTR2]], i32 1, i32 1, i1 false) +; MCO-MSSA-NEXT: call void @foo(i8* [[PTR1]]) +; MCO-MSSA-NEXT: ret void +; %ptr1 = alloca i8 %ptr2 = alloca i8 call void @foo(i8* %ptr2) Index: test/Transforms/MemCpyOpt/crash.ll =================================================================== --- test/Transforms/MemCpyOpt/crash.ll +++ test/Transforms/MemCpyOpt/crash.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -basicaa -memcpyopt -disable-output +; RUN: opt < %s -basicaa -memcpyopt-mssa -disable-output target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "armv7-eabi" Index: test/Transforms/MemCpyOpt/fca2memcpy.ll =================================================================== --- test/Transforms/MemCpyOpt/fca2memcpy.ll +++ test/Transforms/MemCpyOpt/fca2memcpy.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s +; RUN: opt -memcpyopt-mssa -S < %s | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-i64:64-f80:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" @@ -13,6 +14,12 @@ ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @copy( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast %S* %dst to i8* +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; MCO-MSSA-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void @@ -25,6 +32,12 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @noaliassrc( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast %S* %dst to i8* +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void @@ -37,6 +50,12 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @noaliasdst( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast %S* %dst to i8* +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; %1 = load %S, %S* %src store %S %1, %S* %dst ret void @@ -50,6 +69,13 @@ ; CHECK-NEXT: store %S [[TMP1]], %S* %dst ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @destroysrc( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = load %S, %S* %src +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP2]], i8 0, i64 16, i32 8, i1 false) +; MCO-MSSA-NEXT: store %S [[TMP1]], %S* %dst +; MCO-MSSA-NEXT: ret void +; %1 = load %S, %S* %src store %S zeroinitializer, %S* %src store %S %1, %S* %dst @@ -65,6 +91,14 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 16, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @destroynoaliassrc( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast %S* %src to i8* +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast %S* %dst to i8* +; MCO-MSSA-NEXT: [[TMP3:%.*]] = bitcast %S* %src to i8* +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 16, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; %1 = load %S, %S* %src store %S zeroinitializer, %S* %src store %S %1, %S* %dst @@ -80,6 +114,14 @@ ; CHECK-NEXT: store %S [[TMP1]], %S* %dst ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @copyalias( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = load %S, %S* %src +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast %S* %dst to i8* +; MCO-MSSA-NEXT: [[TMP3:%.*]] = bitcast %S* %src to i8* +; MCO-MSSA-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +; MCO-MSSA-NEXT: store %S [[TMP1]], %S* %dst +; MCO-MSSA-NEXT: ret void +; %1 = load %S, %S* %src %2 = load %S, %S* %src store %S %1, %S* %dst @@ -98,6 +140,14 @@ ; CHECK-NEXT: store %S undef, %S* %dst ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @addrproducer( +; MCO-MSSA-NEXT: [[DST2:%.*]] = getelementptr %S, %S* %dst, i64 1 +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast %S* [[DST2]] to i8* +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast %S* %src to i8* +; MCO-MSSA-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP2]], i64 16, i32 8, i1 false) +; MCO-MSSA-NEXT: store %S undef, %S* %dst +; MCO-MSSA-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %dst %dst2 = getelementptr %S , %S* %dst, i64 1 @@ -114,6 +164,14 @@ ; CHECK-NEXT: store %S [[TMP1]], %S* [[DST2]] ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @aliasaddrproducer( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = load %S, %S* %src +; MCO-MSSA-NEXT: store %S undef, %S* %dst +; MCO-MSSA-NEXT: [[DSTINDEX:%.*]] = load i32, i32* %dstidptr +; MCO-MSSA-NEXT: [[DST2:%.*]] = getelementptr %S, %S* %dst, i32 [[DSTINDEX]] +; MCO-MSSA-NEXT: store %S [[TMP1]], %S* [[DST2]] +; MCO-MSSA-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %dst %dstindex = load i32, i32* %dstidptr @@ -133,6 +191,16 @@ ; CHECK-NEXT: store %S undef, %S* %src ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @noaliasaddrproducer( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = load i32, i32* %dstidptr +; MCO-MSSA-NEXT: [[DSTINDEX:%.*]] = or i32 [[TMP1]], 1 +; MCO-MSSA-NEXT: [[DST2:%.*]] = getelementptr %S, %S* %dst, i32 [[DSTINDEX]] +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast %S* [[DST2]] to i8* +; MCO-MSSA-NEXT: [[TMP3:%.*]] = bitcast %S* %src to i8* +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP2]], i8* [[TMP3]], i64 16, i32 8, i1 false) +; MCO-MSSA-NEXT: store %S undef, %S* %src +; MCO-MSSA-NEXT: ret void +; %1 = load %S, %S* %src store %S undef, %S* %src %2 = load i32, i32* %dstidptr Index: test/Transforms/MemCpyOpt/form-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/form-memset.ll +++ test/Transforms/MemCpyOpt/form-memset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s +; RUN: opt < %s -memcpyopt-mssa -S | FileCheck %s --check-prefix=MCO-MSSA ; All the stores in this example should be merged into a single memset. @@ -11,6 +12,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = alloca [19 x i8] ; CHECK-NEXT: [[TMP:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP]], i8 %c, i64 19, i32 1, i1 false) ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 2 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 3 @@ -29,10 +31,35 @@ ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 16 ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 17 ; CHECK-NEXT: [[TMP73:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 18 -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP]], i8 %c, i64 19, i32 1, i1 false) ; CHECK-NEXT: [[TMP76:%.*]] = call i32 (...) @bar([19 x i8]* [[X]]) #0 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test1( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[X:%.*]] = alloca [19 x i8] +; MCO-MSSA-NEXT: [[TMP:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 0 +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP]], i8 %c, i64 19, i32 1, i1 false) +; MCO-MSSA-NEXT: [[TMP5:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 1 +; MCO-MSSA-NEXT: [[TMP9:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 2 +; MCO-MSSA-NEXT: [[TMP13:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 3 +; MCO-MSSA-NEXT: [[TMP17:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 4 +; MCO-MSSA-NEXT: [[TMP21:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 5 +; MCO-MSSA-NEXT: [[TMP25:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 6 +; MCO-MSSA-NEXT: [[TMP29:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 7 +; MCO-MSSA-NEXT: [[TMP33:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 8 +; MCO-MSSA-NEXT: [[TMP37:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 9 +; MCO-MSSA-NEXT: [[TMP41:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 10 +; MCO-MSSA-NEXT: [[TMP45:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 11 +; MCO-MSSA-NEXT: [[TMP49:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 12 +; MCO-MSSA-NEXT: [[TMP53:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 13 +; MCO-MSSA-NEXT: [[TMP57:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 14 +; MCO-MSSA-NEXT: [[TMP61:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 15 +; MCO-MSSA-NEXT: [[TMP65:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 16 +; MCO-MSSA-NEXT: [[TMP69:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 17 +; MCO-MSSA-NEXT: [[TMP73:%.*]] = getelementptr [19 x i8], [19 x i8]* [[X]], i32 0, i32 18 +; MCO-MSSA-NEXT: [[TMP76:%.*]] = call i32 (...) @bar([19 x i8]* [[X]]) #0 +; MCO-MSSA-NEXT: ret void +; entry: %x = alloca [19 x i8] ; <[19 x i8]*> [#uses=20] %tmp = getelementptr [19 x i8], [19 x i8]* %x, i32 0, i32 0 ; [#uses=1] @@ -96,8 +123,8 @@ ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 2 ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 1 ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 0 -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 0 ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP41]], i8 -1, i64 8, i32 1, i1 false) +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 0 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 1 ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 0 ; CHECK-NEXT: [[TMP60:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 1 @@ -112,10 +139,10 @@ ; CHECK-NEXT: [[TMP127:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 0 ; CHECK-NEXT: [[TMP130:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 1 ; CHECK-NEXT: [[TMP141:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 0 -; CHECK-NEXT: [[TMP144:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 1 -; CHECK-NEXT: [[TMP148:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[TMP141]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 32, i32 8, i1 false) +; CHECK-NEXT: [[TMP144:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 1 +; CHECK-NEXT: [[TMP148:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 0 ; CHECK-NEXT: [[TMP151:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 1 ; CHECK-NEXT: [[TMP162:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 0 ; CHECK-NEXT: [[TMP165:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 1 @@ -130,14 +157,69 @@ ; CHECK-NEXT: [[TMP232:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 0 ; CHECK-NEXT: [[TMP235:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 1 ; CHECK-NEXT: [[TMP246:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP246]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 32, i32 8, i1 false) ; CHECK-NEXT: [[TMP249:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 1 ; CHECK-NEXT: [[UP_MVD252:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0 ; CHECK-NEXT: [[LEFT_MVD253:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP246]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 32, i32 8, i1 false) ; CHECK-NEXT: call void @foo(%struct.MV* [[UP_MVD252]], %struct.MV* [[LEFT_MVD253]], i8* [[TMP41]]) #0 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test2( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[REF_IDX:%.*]] = alloca [8 x i8] +; MCO-MSSA-NEXT: [[LEFT_MVD:%.*]] = alloca [8 x %struct.MV] +; MCO-MSSA-NEXT: [[UP_MVD:%.*]] = alloca [8 x %struct.MV] +; MCO-MSSA-NEXT: [[TMP20:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 7 +; MCO-MSSA-NEXT: [[TMP23:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 6 +; MCO-MSSA-NEXT: [[TMP26:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 5 +; MCO-MSSA-NEXT: [[TMP29:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 4 +; MCO-MSSA-NEXT: [[TMP32:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 3 +; MCO-MSSA-NEXT: [[TMP35:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 2 +; MCO-MSSA-NEXT: [[TMP38:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 1 +; MCO-MSSA-NEXT: [[TMP41:%.*]] = getelementptr [8 x i8], [8 x i8]* [[REF_IDX]], i32 0, i32 0 +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP41]], i8 -1, i64 8, i32 1, i1 false) +; MCO-MSSA-NEXT: [[TMP43:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 0 +; MCO-MSSA-NEXT: [[TMP46:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 7, i32 1 +; MCO-MSSA-NEXT: [[TMP57:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 0 +; MCO-MSSA-NEXT: [[TMP60:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 6, i32 1 +; MCO-MSSA-NEXT: [[TMP71:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 5, i32 0 +; MCO-MSSA-NEXT: [[TMP74:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 5, i32 1 +; MCO-MSSA-NEXT: [[TMP85:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 4, i32 0 +; MCO-MSSA-NEXT: [[TMP88:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 4, i32 1 +; MCO-MSSA-NEXT: [[TMP99:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 3, i32 0 +; MCO-MSSA-NEXT: [[TMP102:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 3, i32 1 +; MCO-MSSA-NEXT: [[TMP113:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 2, i32 0 +; MCO-MSSA-NEXT: [[TMP116:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 2, i32 1 +; MCO-MSSA-NEXT: [[TMP127:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 0 +; MCO-MSSA-NEXT: [[TMP130:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 1, i32 1 +; MCO-MSSA-NEXT: [[TMP141:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 0 +; MCO-MSSA-NEXT: [[TMP0:%.*]] = bitcast i16* [[TMP141]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 32, i32 8, i1 false) +; MCO-MSSA-NEXT: [[TMP144:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0, i32 1 +; MCO-MSSA-NEXT: [[TMP148:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 0 +; MCO-MSSA-NEXT: [[TMP151:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 7, i32 1 +; MCO-MSSA-NEXT: [[TMP162:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 0 +; MCO-MSSA-NEXT: [[TMP165:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 6, i32 1 +; MCO-MSSA-NEXT: [[TMP176:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 5, i32 0 +; MCO-MSSA-NEXT: [[TMP179:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 5, i32 1 +; MCO-MSSA-NEXT: [[TMP190:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 4, i32 0 +; MCO-MSSA-NEXT: [[TMP193:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 4, i32 1 +; MCO-MSSA-NEXT: [[TMP204:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 3, i32 0 +; MCO-MSSA-NEXT: [[TMP207:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 3, i32 1 +; MCO-MSSA-NEXT: [[TMP218:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 2, i32 0 +; MCO-MSSA-NEXT: [[TMP221:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 2, i32 1 +; MCO-MSSA-NEXT: [[TMP232:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 0 +; MCO-MSSA-NEXT: [[TMP235:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 1, i32 1 +; MCO-MSSA-NEXT: [[TMP246:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 0 +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast i16* [[TMP246]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 32, i32 8, i1 false) +; MCO-MSSA-NEXT: [[TMP249:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0, i32 1 +; MCO-MSSA-NEXT: [[UP_MVD252:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[UP_MVD]], i32 0, i32 0 +; MCO-MSSA-NEXT: [[LEFT_MVD253:%.*]] = getelementptr [8 x %struct.MV], [8 x %struct.MV]* [[LEFT_MVD]], i32 0, i32 0 +; MCO-MSSA-NEXT: call void @foo(%struct.MV* [[UP_MVD252]], %struct.MV* [[LEFT_MVD253]], i8* [[TMP41]]) #0 +; MCO-MSSA-NEXT: ret void +; entry: %ref_idx = alloca [8 x i8] ; <[8 x i8]*> [#uses=8] %left_mvd = alloca [8 x %struct.MV] ; <[8 x %struct.MV]*> [#uses=17] @@ -237,12 +319,21 @@ ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %P, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 15, i32 4, i1 false) ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 15, i32 4, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ADD_PTR]] to i8* ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test3( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %P, i64 1 +; MCO-MSSA-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 15, i32 4, i1 false) +; MCO-MSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 2 +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; MCO-MSSA-NEXT: ret void +; entry: %arrayidx = getelementptr inbounds i32, i32* %P, i64 1 store i32 0, i32* %arrayidx, align 4 @@ -256,12 +347,20 @@ define void @test4(i32* nocapture %P) nounwind ssp { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* %P to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 15, i32 4, i1 false) ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* %P to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 15, i32 4, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ADD_PTR]] to i8* ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test4( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[TMP0:%.*]] = bitcast i32* %P to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 15, i32 4, i1 false) +; MCO-MSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 1 +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; MCO-MSSA-NEXT: ret void +; entry: store i32 0, i32* %P, align 4 %add.ptr = getelementptr inbounds i32, i32* %P, i64 1 @@ -283,6 +382,15 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 15, i32 4, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test5( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 2 +; MCO-MSSA-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; MCO-MSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %P, i64 1 +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast i32* [[ARRAYIDX]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 15, i32 4, i1 false) +; MCO-MSSA-NEXT: ret void +; entry: %add.ptr = getelementptr inbounds i32, i32* %P, i64 2 %0 = bitcast i32* %add.ptr to i8* @@ -297,12 +405,21 @@ ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* %P to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* %P to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 24, i32 1, i1 false) ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[ADD_PTR]] to i8* -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* %P to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP2]], i8 0, i64 24, i32 1, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ADD_PTR]] to i8* ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test6( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[TMP0:%.*]] = bitcast i32* %P to i8* +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast i32* %P to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 0, i64 24, i32 1, i1 false) +; MCO-MSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 3 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; MCO-MSSA-NEXT: ret void +; entry: %0 = bitcast i32* %P to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false) @@ -316,14 +433,23 @@ ; rdar://9892684 define void @test7(i32* nocapture %c) nounwind optsize { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* %c, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* %c, i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* %c, i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* %c, i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* %c to i8* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP5]], i8 -1, i64 20, i32 4, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* %c to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 -1, i64 20, i32 4, i1 false) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* %c, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* %c, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* %c, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* %c, i32 4 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test7( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast i32* %c to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 -1, i64 20, i32 4, i1 false) +; MCO-MSSA-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* %c, i32 1 +; MCO-MSSA-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* %c, i32 2 +; MCO-MSSA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* %c, i32 3 +; MCO-MSSA-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* %c, i32 4 +; MCO-MSSA-NEXT: ret void +; store i32 -1, i32* %c, align 4 %1 = getelementptr inbounds i32, i32* %c, i32 1 store i32 -1, i32* %1, align 4 @@ -346,6 +472,13 @@ ; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[TMP0]], align 16 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test8( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[MEMTMP:%.*]] = alloca %struct.test8, align 16 +; MCO-MSSA-NEXT: [[TMP0:%.*]] = bitcast %struct.test8* [[MEMTMP]] to <4 x i32>* +; MCO-MSSA-NEXT: store <4 x i32> , <4 x i32>* [[TMP0]], align 16 +; MCO-MSSA-NEXT: ret void +; entry: %memtmp = alloca %struct.test8, align 16 %0 = bitcast %struct.test8* %memtmp to <4 x i32>* @@ -360,6 +493,10 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test9( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false) +; MCO-MSSA-NEXT: ret void +; store i8 -1, i8* bitcast ([16 x i64]* @test9buf to i8*), align 16 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 1), align 1 store i8 -1, i8* getelementptr (i8, i8* bitcast ([16 x i64]* @test9buf to i8*), i64 2), align 2 @@ -385,6 +522,10 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test10( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 42, i32 1, i1 false) tail call void @llvm.memset.p0i8.i64(i8* %P, i8 0, i64 23, i32 1, i1 false) ret void @@ -402,6 +543,16 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 1, i64 23, i32 4, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test11( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* %P, i64 3 +; MCO-MSSA-NEXT: [[TMP0:%.*]] = bitcast i32* [[ADD_PTR]] to i8* +; MCO-MSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %P, i64 0 +; MCO-MSSA-NEXT: [[ARRAYIDX_CAST:%.*]] = bitcast i32* [[ARRAYIDX]] to i96* +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast i96* [[ARRAYIDX_CAST]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP1]], i8 1, i64 23, i32 4, i1 false) +; MCO-MSSA-NEXT: ret void +; entry: %add.ptr = getelementptr inbounds i32, i32* %P, i64 3 %0 = bitcast i32* %add.ptr to i8* Index: test/Transforms/MemCpyOpt/invariant.start.ll =================================================================== --- test/Transforms/MemCpyOpt/invariant.start.ll +++ test/Transforms/MemCpyOpt/invariant.start.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; MemCpy optimizations should take place even in presence of invariant.start ; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s +; RUN: opt < %s -basicaa -memcpyopt-mssa -dse -S | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" @@ -27,6 +28,11 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* [[R]], i32 32, i32 16, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test1( +; MCO-MSSA-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %P) +; MCO-MSSA-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %Q, i8* %P, i32 32, i32 16, i1 false) +; MCO-MSSA-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false) @@ -45,6 +51,12 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test2( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; MCO-MSSA-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) %i = call {}* @llvm.invariant.start.p0i8(i64 32, i8* %dst1) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 8, i1 false) Index: test/Transforms/MemCpyOpt/lifetime.ll =================================================================== --- test/Transforms/MemCpyOpt/lifetime.ll +++ test/Transforms/MemCpyOpt/lifetime.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -O1 -S | FileCheck %s +; RUN: opt -mco-mssa < %s -O1 -S | FileCheck %s --check-prefix=MCO-MSSA ; performCallSlotOptzn in MemCpy should not exchange the calls to ; @llvm.lifetime.start and @llvm.memcpy. @@ -15,6 +16,12 @@ ; CHECK-NEXT: store i8 0, i8* [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX]], align 1 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @_ZN4CordC2EOS_( +; MCO-MSSA-NEXT: bb: +; MCO-MSSA-NEXT: [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* %arg1, i64 7 +; MCO-MSSA-NEXT: store i8 0, i8* [[TMP_SROA_3_0_ARG1_SROA_RAW_IDX]], align 1 +; MCO-MSSA-NEXT: ret void +; bb: %tmp = alloca [8 x i8], align 8 %tmp5 = bitcast [8 x i8]* %tmp to i8* Index: test/Transforms/MemCpyOpt/loadstore-sret.ll =================================================================== --- test/Transforms/MemCpyOpt/loadstore-sret.ll +++ test/Transforms/MemCpyOpt/loadstore-sret.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S < %s -basicaa -memcpyopt | FileCheck %s +; RUN: opt -S < %s -basicaa -memcpyopt-mssa | FileCheck %s --check-prefix=MCO-MSSA ; target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" @@ -16,6 +17,14 @@ ; CHECK-NEXT: [[TMP_I_I4:%.*]] = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @_Z3foov( +; MCO-MSSA-NEXT: _ZNSt8auto_ptrIiED1Ev.exit: +; MCO-MSSA-NEXT: [[TEMP_LVALUE:%.*]] = alloca %"class.std::auto_ptr", align 8 +; MCO-MSSA-NEXT: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result) +; MCO-MSSA-NEXT: [[TMP_I_I:%.*]] = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* [[TEMP_LVALUE]], i64 0, i32 0 +; MCO-MSSA-NEXT: [[TMP_I_I4:%.*]] = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0 +; MCO-MSSA-NEXT: ret void +; _ZNSt8auto_ptrIiED1Ev.exit: %temp.lvalue = alloca %"class.std::auto_ptr", align 8 call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue) Index: test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll =================================================================== --- test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll +++ test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basicaa -memcpyopt -instcombine -S < %s | FileCheck %s +; RUN: opt -basicaa -memcpyopt-mssa -instcombine -S < %s | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -11,6 +12,12 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRET1]], i8 0, i64 64, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @foo( +; MCO-MSSA-NEXT: entry-block: +; MCO-MSSA-NEXT: [[SRET1:%.*]] = bitcast [8 x i64]* %sret to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRET1]], i8 0, i64 64, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; entry-block: %a = alloca [8 x i64], align 8 %a.cast = bitcast [8 x i64]* %a to i8* @@ -38,6 +45,20 @@ ; CHECK-NEXT: call void @llvm.lifetime.end(i64 64, i8* [[A_CAST]]) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @bar( +; MCO-MSSA-NEXT: entry-block: +; MCO-MSSA-NEXT: [[A:%.*]] = alloca [8 x i64], align 8 +; MCO-MSSA-NEXT: [[A_CAST:%.*]] = bitcast [8 x i64]* [[A]] to i8* +; MCO-MSSA-NEXT: call void @llvm.lifetime.start(i64 64, i8* [[A_CAST]]) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A_CAST]], i8 0, i64 64, i32 8, i1 false) +; MCO-MSSA-NEXT: [[SRET_CAST:%.*]] = bitcast [8 x i64]* %sret to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[SRET_CAST]], i8 0, i64 64, i32 8, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A_CAST]], i8 42, i64 32, i32 8, i1 false) +; MCO-MSSA-NEXT: [[OUT_CAST:%.*]] = bitcast [8 x i64]* %out to i8* +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[OUT_CAST]], i8* [[A_CAST]], i64 64, i32 8, i1 false) +; MCO-MSSA-NEXT: call void @llvm.lifetime.end(i64 64, i8* [[A_CAST]]) +; MCO-MSSA-NEXT: ret void +; entry-block: %a = alloca [8 x i64], align 8 %a.cast = bitcast [8 x i64]* %a to i8* Index: test/Transforms/MemCpyOpt/memcpy-to-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/memcpy-to-memset.ll +++ test/Transforms/MemCpyOpt/memcpy-to-memset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s +; RUN: opt -memcpyopt-mssa -S < %s | FileCheck %s --check-prefix=MCO-MSSA @cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4 @@ -15,6 +16,14 @@ ; CHECK-NEXT: call void @foo(i32* [[ARRAYDECAY]]) #1 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test1( +; MCO-MSSA-NEXT: [[ARR:%.*]] = alloca [3 x i32], align 4 +; MCO-MSSA-NEXT: [[ARR_I8:%.*]] = bitcast [3 x i32]* [[ARR]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[ARR_I8]], i8 -1, i64 12, i32 4, i1 false) +; MCO-MSSA-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[ARR]], i64 0, i64 0 +; MCO-MSSA-NEXT: call void @foo(i32* [[ARRAYDECAY]]) #1 +; MCO-MSSA-NEXT: ret void +; %arr = alloca [3 x i32], align 4 %arr_i8 = bitcast [3 x i32]* %arr to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arr_i8, i8* bitcast ([3 x i32]* @cst to i8*), i64 12, i32 4, i1 false) Index: test/Transforms/MemCpyOpt/memcpy-undef.ll =================================================================== --- test/Transforms/MemCpyOpt/memcpy-undef.ll +++ test/Transforms/MemCpyOpt/memcpy-undef.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s +; RUN: opt < %s -basicaa -memcpyopt-mssa -S | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -18,6 +19,16 @@ ; CHECK-NEXT: store i32 20, i32* [[TMP4]], align 4 ; CHECK-NEXT: ret i32 undef ; +; MCO-MSSA-LABEL: @test1( +; MCO-MSSA-NEXT: [[BLETCH_SROA_1:%.*]] = alloca [7 x i8], align 1 +; MCO-MSSA-NEXT: [[TMP1:%.*]] = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 0 +; MCO-MSSA-NEXT: store i8 98, i8* [[TMP1]], align 4 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 1, i64 0 +; MCO-MSSA-NEXT: [[TMP3:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[BLETCH_SROA_1]], i64 0, i64 0 +; MCO-MSSA-NEXT: [[TMP4:%.*]] = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 2 +; MCO-MSSA-NEXT: store i32 20, i32* [[TMP4]], align 4 +; MCO-MSSA-NEXT: ret i32 undef +; %bletch.sroa.1 = alloca [7 x i8], align 1 %1 = getelementptr inbounds %struct.foo, %struct.foo* %foobie, i64 0, i32 0 store i8 98, i8* %1, align 4 @@ -35,6 +46,10 @@ ; CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* %in) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test2( +; MCO-MSSA-NEXT: call void @llvm.lifetime.start(i64 8, i8* %in) +; MCO-MSSA-NEXT: ret void +; call void @llvm.lifetime.start(i64 8, i8* %in) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) ret void @@ -47,6 +62,11 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test3( +; MCO-MSSA-NEXT: call void @llvm.lifetime.start(i64 4, i8* %in) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.lifetime.start(i64 4, i8* %in) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 8, i32 1, i1 false) ret void Index: test/Transforms/MemCpyOpt/memcpy.ll =================================================================== --- test/Transforms/MemCpyOpt/memcpy.ll +++ test/Transforms/MemCpyOpt/memcpy.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s +; RUN: opt < %s -basicaa -memcpyopt-mssa -dse -S | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" @@ -18,6 +19,16 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT21]], i8* [[TMP219]], i32 32, i32 16, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test1( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[TMP2:%.*]] = alloca %0 +; MCO-MSSA-NEXT: [[TMP5:%.*]] = fsub x86_fp80 0xK80000000000000000000, %z.1 +; MCO-MSSA-NEXT: call void @ccoshl(%0* sret [[TMP2]], x86_fp80 [[TMP5]], x86_fp80 %z.0) #0 +; MCO-MSSA-NEXT: [[TMP219:%.*]] = bitcast %0* [[TMP2]] to i8* +; MCO-MSSA-NEXT: [[AGG_RESULT21:%.*]] = bitcast %0* %agg.result to i8* +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT21]], i8* [[TMP219]], i32 32, i32 16, i1 false) +; MCO-MSSA-NEXT: ret void +; entry: %tmp2 = alloca %0 %memtmp = alloca %0, align 16 @@ -45,6 +56,10 @@ ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %Q, i8* %P, i32 32, i32 16, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test2( +; MCO-MSSA-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %Q, i8* %P, i32 32, i32 16, i1 false) +; MCO-MSSA-NEXT: ret void +; %memtmp = alloca %0, align 16 %R = bitcast %0* %memtmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false) @@ -64,6 +79,11 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT1]], i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test3( +; MCO-MSSA-NEXT: [[AGG_RESULT1:%.*]] = bitcast %0* %agg.result to i8* +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT1]], i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false) +; MCO-MSSA-NEXT: ret void +; %x.0 = alloca %0 %x.01 = bitcast %0* %x.0 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false) @@ -79,6 +99,10 @@ ; CHECK-NEXT: call void @test4a(i8* byval align 1 %P) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test4( +; MCO-MSSA-NEXT: call void @test4a(i8* byval align 1 %P) +; MCO-MSSA-NEXT: ret void +; %A = alloca %1 %a = bitcast %1* %A to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false) @@ -109,6 +133,16 @@ ; CHECK-NEXT: call void @test5a(%struct.S* byval align 16 [[Y]]) ; CHECK-NEXT: ret i32 0 ; +; MCO-MSSA-LABEL: @test5( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[Y:%.*]] = alloca %struct.S, align 16 +; MCO-MSSA-NEXT: [[TMP:%.*]] = bitcast %struct.S* [[Y]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP]], i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false) +; MCO-MSSA-NEXT: [[A:%.*]] = getelementptr %struct.S, %struct.S* [[Y]], i64 0, i32 1, i64 0 +; MCO-MSSA-NEXT: store i8 4, i8* [[A]] +; MCO-MSSA-NEXT: call void @test5a(%struct.S* byval align 16 [[Y]]) +; MCO-MSSA-NEXT: ret i32 0 +; entry: %y = alloca %struct.S, align 16 %tmp = bitcast %struct.S* %y to i8* @@ -124,6 +158,9 @@ ; CHECK-LABEL: @test6( ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test6( +; MCO-MSSA-NEXT: ret void +; call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i32 4, i1 false) ret void } @@ -139,6 +176,11 @@ ; CHECK-NEXT: [[CALL:%.*]] = call i32 @g(%struct.p* byval align 8 %q) #0 ; CHECK-NEXT: ret i32 [[CALL]] ; +; MCO-MSSA-LABEL: @test7( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[CALL:%.*]] = call i32 @g(%struct.p* byval align 8 %q) #0 +; MCO-MSSA-NEXT: ret i32 [[CALL]] +; entry: %agg.tmp = alloca %struct.p, align 4 %tmp = bitcast %struct.p* %agg.tmp to i8* @@ -160,6 +202,9 @@ ; CHECK-LABEL: @test8( ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test8( +; MCO-MSSA-NEXT: ret void +; %A = tail call i8* @malloc(i32 10) %B = getelementptr inbounds i8, i8* %A, i64 2 tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @test8.str, i64 0, i64 0), i32 7, i32 1, i1 false) @@ -182,6 +227,13 @@ ; CHECK-NEXT: call void @f2(%struct.big* [[B]]) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test9_addrspacecast( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[B:%.*]] = alloca %struct.big, align 4 +; MCO-MSSA-NEXT: call void @f1(%struct.big* sret [[B]]) +; MCO-MSSA-NEXT: call void @f2(%struct.big* [[B]]) +; MCO-MSSA-NEXT: ret void +; entry: %b = alloca %struct.big, align 4 %tmp = alloca %struct.big, align 4 @@ -201,6 +253,13 @@ ; CHECK-NEXT: call void @f2(%struct.big* [[B]]) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test9( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[B:%.*]] = alloca %struct.big, align 4 +; MCO-MSSA-NEXT: call void @f1(%struct.big* sret [[B]]) +; MCO-MSSA-NEXT: call void @f2(%struct.big* [[B]]) +; MCO-MSSA-NEXT: ret void +; entry: %b = alloca %struct.big, align 4 %tmp = alloca %struct.big, align 4 @@ -229,6 +288,15 @@ ; CHECK-NEXT: store i32 [[C]], i32* [[D]] ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test10( +; MCO-MSSA-NEXT: [[A:%.*]] = alloca i32, align 4 +; MCO-MSSA-NEXT: store i32 %y, i32* [[A]] +; MCO-MSSA-NEXT: call void @foo(i32* noalias nocapture [[A]]) +; MCO-MSSA-NEXT: [[C:%.*]] = load i32, i32* [[A]] +; MCO-MSSA-NEXT: [[D:%.*]] = bitcast %opaque* %x to i32* +; MCO-MSSA-NEXT: store i32 [[C]], i32* [[D]] +; MCO-MSSA-NEXT: ret void +; %a = alloca i32, align 4 store i32 %y, i32* %a call void @foo(i32* noalias nocapture %a) Index: test/Transforms/MemCpyOpt/memmove.ll =================================================================== --- test/Transforms/MemCpyOpt/memmove.ll +++ test/Transforms/MemCpyOpt/memmove.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s +; RUN: opt < %s -basicaa -memcpyopt-mssa -S | FileCheck %s --check-prefix=MCO-MSSA ; These memmoves should get optimized to memcpys. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" @@ -16,6 +17,14 @@ ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CALL3_SUB]], i8* %src, i64 13, i32 1, i1 false) ; CHECK-NEXT: ret i8* [[CALL3_SUB]] ; +; MCO-MSSA-LABEL: @test1( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[MALLOCCALL:%.*]] = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32)) +; MCO-MSSA-NEXT: [[CALL3:%.*]] = bitcast i8* [[MALLOCCALL]] to [13 x i8]* +; MCO-MSSA-NEXT: [[CALL3_SUB:%.*]] = getelementptr inbounds [13 x i8], [13 x i8]* [[CALL3]], i64 0, i64 0 +; MCO-MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[CALL3_SUB]], i8* %src, i64 13, i32 1, i1 false) +; MCO-MSSA-NEXT: ret i8* [[CALL3_SUB]] +; entry: %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64), i64 13) to i32)) @@ -34,6 +43,12 @@ ; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* [[ADD_PTR]], i64 16, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test2( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* %P, i64 16 +; MCO-MSSA-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* [[ADD_PTR]], i64 16, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; entry: %add.ptr = getelementptr i8, i8* %P, i64 16 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false) @@ -48,6 +63,12 @@ ; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* [[ADD_PTR]], i64 17, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test3( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* %P, i64 16 +; MCO-MSSA-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* [[ADD_PTR]], i64 17, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; entry: %add.ptr = getelementptr i8, i8* %P, i64 16 tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false) Index: test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll +++ test/Transforms/MemCpyOpt/memset-memcpy-redundant-memset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basicaa -memcpyopt -S %s | FileCheck %s +; RUN: opt -basicaa -memcpyopt-mssa -S %s | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -13,6 +14,15 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = icmp ule i64 %dst_size, %src_size +; MCO-MSSA-NEXT: [[TMP2:%.*]] = sub i64 %dst_size, %src_size +; MCO-MSSA-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; MCO-MSSA-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* %dst, i64 %src_size +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 %c, i64 [[TMP3]], i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) ret void @@ -29,6 +39,16 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_different_types_i32_i64( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = zext i32 %dst_size to i64 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = icmp ule i64 [[TMP1]], %src_size +; MCO-MSSA-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], %src_size +; MCO-MSSA-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP3]] +; MCO-MSSA-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* %dst, i64 %src_size +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP5]], i8 %c, i64 [[TMP4]], i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) ret void @@ -45,6 +65,16 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_different_types_i128_i32( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = zext i32 %src_size to i128 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = icmp ule i128 %dst_size, [[TMP1]] +; MCO-MSSA-NEXT: [[TMP3:%.*]] = sub i128 %dst_size, [[TMP1]] +; MCO-MSSA-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i128 0, i128 [[TMP3]] +; MCO-MSSA-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* %dst, i128 [[TMP1]] +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i128(i8* [[TMP5]], i8 %c, i128 [[TMP4]], i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i128(i8* %dst, i8 %c, i128 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) ret void @@ -61,6 +91,16 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_different_types_i32_i128( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = zext i32 %dst_size to i128 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = icmp ule i128 [[TMP1]], %src_size +; MCO-MSSA-NEXT: [[TMP3:%.*]] = sub i128 [[TMP1]], %src_size +; MCO-MSSA-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i128 0, i128 [[TMP3]] +; MCO-MSSA-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* %dst, i128 %src_size +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i128(i8* [[TMP5]], i8 %c, i128 [[TMP4]], i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst, i8 %c, i32 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i128(i8* %dst, i8* %src, i128 %src_size, i32 1, i1 false) ret void @@ -77,6 +117,16 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_different_types_i64_i32( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = zext i32 %src_size to i64 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = icmp ule i64 %dst_size, [[TMP1]] +; MCO-MSSA-NEXT: [[TMP3:%.*]] = sub i64 %dst_size, [[TMP1]] +; MCO-MSSA-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i64 0, i64 [[TMP3]] +; MCO-MSSA-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* %dst, i64 [[TMP1]] +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP5]], i8 %c, i64 [[TMP4]], i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %src_size, i32 1, i1 false) ret void @@ -92,6 +142,15 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_align_same( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = icmp ule i64 %dst_size, 80 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = sub i64 %dst_size, 80 +; MCO-MSSA-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; MCO-MSSA-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* %dst, i64 80 +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 0, i64 [[TMP3]], i32 8, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 1, i1 false) ret void @@ -107,6 +166,15 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_align_min( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = icmp ule i64 %dst_size, 36 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = sub i64 %dst_size, 36 +; MCO-MSSA-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; MCO-MSSA-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* %dst, i64 36 +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 0, i64 [[TMP3]], i32 4, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 36, i32 1, i1 false) ret void @@ -122,6 +190,15 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_align_memcpy( +; MCO-MSSA-NEXT: [[TMP1:%.*]] = icmp ule i64 %dst_size, 80 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = sub i64 %dst_size, 80 +; MCO-MSSA-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; MCO-MSSA-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* %dst, i64 80 +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 0, i64 [[TMP3]], i32 8, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 80, i32 8, i1 false) ret void @@ -138,6 +215,16 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* %src, i64 %src_size, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_non_i8_dst_type( +; MCO-MSSA-NEXT: [[DST:%.*]] = bitcast i64* [[DST:%.*]]_pi64 to i8* +; MCO-MSSA-NEXT: [[TMP1:%.*]] = icmp ule i64 [[DST]]_size, %src_size +; MCO-MSSA-NEXT: [[TMP2:%.*]] = sub i64 [[DST]]_size, %src_size +; MCO-MSSA-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i64 0, i64 [[TMP2]] +; MCO-MSSA-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[DST]], i64 %src_size +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP4]], i8 %c, i64 [[TMP3]], i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DST]], i8* %src, i64 %src_size, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; %dst = bitcast i64* %dst_pi64 to i8* call void @llvm.memset.p0i8.i64(i8* %dst, i8 %c, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %src_size, i32 1, i1 false) @@ -150,6 +237,11 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_different_dst( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst, i8 0, i64 %dst_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %src, i64 %src_size, i32 1, i1 false) ret void @@ -164,6 +256,12 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false) ; CHECK-NEXT: ret i8 [[R]] ; +; MCO-MSSA-LABEL: @test_intermediate_read( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false) +; MCO-MSSA-NEXT: [[R:%.*]] = load i8, i8* %a +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false) +; MCO-MSSA-NEXT: ret i8 [[R]] +; call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false) %r = load i8, i8* %a call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 24, i32 1, i1 false) @@ -182,6 +280,15 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A0]], i8* %b, i64 8, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_intermediate_write( +; MCO-MSSA-NEXT: [[A:%.*]] = alloca %struct +; MCO-MSSA-NEXT: [[A0:%.*]] = getelementptr %struct, %struct* [[A]], i32 0, i32 0, i32 0 +; MCO-MSSA-NEXT: [[A1:%.*]] = getelementptr %struct, %struct* [[A]], i32 0, i32 1, i32 0 +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[A0]], i8 0, i64 16, i32 1, i1 false) +; MCO-MSSA-NEXT: store i8 1, i8* [[A1]] +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A0]], i8* %b, i64 8, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; %a = alloca %struct %a0 = getelementptr %struct, %struct* %a, i32 0, i32 0, i32 0 %a1 = getelementptr %struct, %struct* %a, i32 0, i32 1, i32 0 Index: test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll +++ test/Transforms/MemCpyOpt/memset-memcpy-to-2x-memset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S %s | FileCheck %s +; RUN: opt -memcpyopt-mssa -S %s | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @@ -9,6 +10,11 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 8, i1 false) ret void @@ -20,6 +26,11 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_smaller_memcpy( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false) ret void @@ -31,6 +42,11 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_smaller_memset( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 100, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) ret void @@ -42,6 +58,11 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_align_memset( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 128, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) ret void @@ -53,6 +74,11 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_different_types( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst2, i8 %c, i32 100, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %dst1, i32 100, i32 1, i1 false) ret void @@ -64,6 +90,11 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_different_types_2( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst2, i8 %c, i64 100, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i32(i8* %dst1, i8 %c, i32 128, i32 8, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 100, i32 1, i1 false) ret void @@ -76,6 +107,12 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* [[P]], i64 64, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_different_source_gep( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; MCO-MSSA-NEXT: [[P:%.*]] = getelementptr i8, i8* %dst1, i64 64 +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* [[P]], i64 64, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) ; FIXME: We could optimize this as well. %p = getelementptr i8, i8* %dst1, i64 64 @@ -89,6 +126,11 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_variable_size_1( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 %dst1_size, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 128, i32 1, i1 false) ret void @@ -100,6 +142,11 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test_variable_size_2( +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false) +; MCO-MSSA-NEXT: ret void +; call void @llvm.memset.p0i8.i64(i8* %dst1, i8 %c, i64 128, i32 1, i1 false) call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst2, i8* %dst1, i64 %dst2_size, i32 1, i1 false) ret void Index: test/Transforms/MemCpyOpt/nontemporal.ll =================================================================== --- test/Transforms/MemCpyOpt/nontemporal.ll +++ test/Transforms/MemCpyOpt/nontemporal.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s +; RUN: opt < %s -memcpyopt-mssa -S | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -25,6 +26,25 @@ ; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR7]], align 16, !nontemporal !0 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @nontemporal_stores_1( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 +; MCO-MSSA-NEXT: [[PTR1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR1]], align 16, !nontemporal !0 +; MCO-MSSA-NEXT: [[PTR2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 2 +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR2]], align 16, !nontemporal !0 +; MCO-MSSA-NEXT: [[PTR3:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 3 +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR3]], align 16, !nontemporal !0 +; MCO-MSSA-NEXT: [[PTR4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 4 +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR4]], align 16, !nontemporal !0 +; MCO-MSSA-NEXT: [[PTR5:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 5 +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR5]], align 16, !nontemporal !0 +; MCO-MSSA-NEXT: [[PTR6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 6 +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR6]], align 16, !nontemporal !0 +; MCO-MSSA-NEXT: [[PTR7:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 7 +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR7]], align 16, !nontemporal !0 +; MCO-MSSA-NEXT: ret void +; entry: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 @@ -52,6 +72,13 @@ ; CHECK-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR1]], align 16, !nontemporal !0 ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @nontemporal_stores_2( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 +; MCO-MSSA-NEXT: [[PTR1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 +; MCO-MSSA-NEXT: store <4 x float> zeroinitializer, <4 x float>* [[PTR1]], align 16, !nontemporal !0 +; MCO-MSSA-NEXT: ret void +; entry: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1 Index: test/Transforms/MemCpyOpt/pr29105.ll =================================================================== --- test/Transforms/MemCpyOpt/pr29105.ll +++ test/Transforms/MemCpyOpt/pr29105.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -instcombine -S %s | FileCheck %s +; RUN: opt -memcpyopt-mssa -instcombine -S %s | FileCheck %s --check-prefix=MCO-MSSA %Foo = type { [2048 x i64] } ; Make sure that all mempcy calls are converted to memset calls, or removed. @@ -14,6 +15,17 @@ ; CHECK-NEXT: call void @llvm.lifetime.end(i64 16384, i8* [[TMP0]]) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @baz( +; MCO-MSSA-NEXT: entry-block: +; MCO-MSSA-NEXT: [[TMP2:%.*]] = alloca %Foo, align 8 +; MCO-MSSA-NEXT: [[TMP223:%.*]] = bitcast %Foo* [[TMP2]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP223]], i8 0, i64 16384, i32 8, i1 false) +; MCO-MSSA-NEXT: [[TMP0:%.*]] = bitcast %Foo* [[TMP2]] to i8* +; MCO-MSSA-NEXT: call void @llvm.lifetime.start(i64 16384, i8* [[TMP0]]) +; MCO-MSSA-NEXT: call void @bar(%Foo* noalias nocapture nonnull dereferenceable(16384) [[TMP2]]) +; MCO-MSSA-NEXT: call void @llvm.lifetime.end(i64 16384, i8* [[TMP0]]) +; MCO-MSSA-NEXT: ret void +; entry-block: %x.sroa.0 = alloca [2048 x i64], align 8 %tmp0 = alloca [2048 x i64], align 8 Index: test/Transforms/MemCpyOpt/profitable-memset.ll =================================================================== --- test/Transforms/MemCpyOpt/profitable-memset.ll +++ test/Transforms/MemCpyOpt/profitable-memset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -memcpyopt -S | FileCheck %s +; RUN: opt < %s -memcpyopt-mssa -S | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" @@ -14,6 +15,16 @@ ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP2]], i8 0, i64 8, i32 2, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @foo( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[TMP0:%.*]] = bitcast i64* %P to i16* +; MCO-MSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 1 +; MCO-MSSA-NEXT: [[TMP1:%.*]] = bitcast i16* [[ARRAYIDX]] to i32* +; MCO-MSSA-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[TMP0]], i64 3 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP0]] to i8* +; MCO-MSSA-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP2]], i8 0, i64 8, i32 2, i1 false) +; MCO-MSSA-NEXT: ret void +; entry: %0 = bitcast i64* %P to i16* %arrayidx = getelementptr inbounds i16, i16* %0, i64 1 Index: test/Transforms/MemCpyOpt/smaller.ll =================================================================== --- test/Transforms/MemCpyOpt/smaller.ll +++ test/Transforms/MemCpyOpt/smaller.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -memcpyopt -S < %s | FileCheck %s ; RUN: opt -passes=memcpyopt -S < %s | FileCheck %s +; RUN: opt -memcpyopt-mssa -S < %s | FileCheck %s --check-prefix=MCO-MSSA +; RUN: opt -passes=memcpyopt-mssa -S < %s | FileCheck %s --check-prefix=MCO-MSSA ; rdar://8875553 ; Memcpyopt shouldn't optimize the second memcpy using the first @@ -27,6 +29,16 @@ ; CHECK-NEXT: call void @check(%struct.s* byval [[AGG_TMP]]) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @foo( +; MCO-MSSA-NEXT: entry: +; MCO-MSSA-NEXT: [[AGG_TMP:%.*]] = alloca %struct.s, align 4 +; MCO-MSSA-NEXT: store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4 +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1, i1 false) +; MCO-MSSA-NEXT: [[TMP:%.*]] = getelementptr inbounds %struct.s, %struct.s* [[AGG_TMP]], i32 0, i32 0, i32 0 +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP]], i8* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false) +; MCO-MSSA-NEXT: call void @check(%struct.s* byval [[AGG_TMP]]) +; MCO-MSSA-NEXT: ret void +; entry: %agg.tmp = alloca %struct.s, align 4 store i32 99, i32* getelementptr inbounds (%struct.s, %struct.s* @cell, i32 0, i32 1), align 4 Index: test/Transforms/MemCpyOpt/sret.ll =================================================================== --- test/Transforms/MemCpyOpt/sret.ll +++ test/Transforms/MemCpyOpt/sret.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basicaa -memcpyopt -S | not grep "call.*memcpy" +; RUN: opt < %s -basicaa -memcpyopt-mssa -S | not grep "call.*memcpy" target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin9" Index: test/Transforms/Util/combine-alias-scope-metadata.ll =================================================================== --- test/Transforms/Util/combine-alias-scope-metadata.ll +++ test/Transforms/Util/combine-alias-scope-metadata.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s +; RUN: opt < %s -S -basicaa -memcpyopt-mssa | FileCheck %s --check-prefix=MCO-MSSA target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @test(i8* noalias dereferenceable(1) %in, i8* noalias dereferenceable(1) %out) { @@ -9,6 +10,12 @@ ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i32 8, i1 false) ; CHECK-NEXT: ret void ; +; MCO-MSSA-LABEL: @test( +; MCO-MSSA-NEXT: [[TMP:%.*]] = alloca i8 +; MCO-MSSA-NEXT: [[TMP2:%.*]] = alloca i8 +; MCO-MSSA-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 1, i32 8, i1 false) +; MCO-MSSA-NEXT: ret void +; %tmp = alloca i8 %tmp2 = alloca i8 call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %in, i64 1, i32 8, i1 false), !alias.scope !4