Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -106,7 +106,7 @@ typedef MapVector StoreListMap; StoreListMap StoreRefsForMemset; StoreListMap StoreRefsForMemsetPattern; - StoreList StoreRefsForMemcpy; + StoreListMap StoreRefsForMemcpy; bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; @@ -140,7 +140,13 @@ SmallPtrSetImpl &Stores, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride, bool IsLoopMemset = false); - bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); + bool processLoopStoreOfLoopLoad(SmallVectorImpl &SL, + const SCEV *BECount); + bool processLoopStoreOfLoopLoadMain( + Value *DestPtr, unsigned StoreSize, StoreInst *TheStore,LoadInst *TheLoad, + SmallPtrSetImpl &Stores, const SCEVAddRecExpr *Ev, + Value *LoadPtr, const SCEVAddRecExpr *LoadEv, + const SCEV *BECount, bool NegStride); bool avoidLIRForMultiBlockLoop(bool IsMemset = false, bool IsLoopMemset = false); @@ -157,7 +163,7 @@ void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, PHINode *CntPhi, Value *Var, const DebugLoc DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); - + /// @} }; @@ -423,10 +429,6 @@ if (HasMemcpy) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. - APInt Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); - if (StoreSize != Stride && StoreSize != -Stride) - return LegalStoreKind::None; // The store must be feeding a non-volatile load. LoadInst *LI = dyn_cast(SI->getValueOperand()); @@ -484,9 +486,11 @@ StoreRefsForMemsetPattern[Ptr].push_back(SI); } break; case LegalStoreKind::Memcpy: - case LegalStoreKind::UnorderedAtomicMemcpy: - StoreRefsForMemcpy.push_back(SI); - break; + case LegalStoreKind::UnorderedAtomicMemcpy: { + //Find the base pointer. + Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); + StoreRefsForMemcpy[Ptr].push_back(SI); + } break; default: assert(false && "unhandled return value"); break; @@ -522,7 +526,7 @@ // Optimize the store into a memcpy, if it feeds an similarly strided load. for (auto &SI : StoreRefsForMemcpy) - MadeChange |= processLoopStoreOfLoopLoad(SI, BECount); + MadeChange |= processLoopStoreOfLoopLoad(SI.second, BECount); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *Inst = &*I++; @@ -879,7 +883,7 @@ GV->setAlignment(16); Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); - } +} DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" << " from store to: " << *Ev << " at: " << *TheStore << "\n"); @@ -893,28 +897,167 @@ return true; } -/// If the stored value is a strided load in the same loop with the same stride -/// this may be transformable into a memcpy. This kicks in for stuff like +/// If there are one or multiple stored values , which are strided loads +/// in the same loop with the same stride , then this amy be transformed into +/// a memcpy.This kicks in for stuff like /// for (i) A[i] = B[i]; -bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, - const SCEV *BECount) { - assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores."); +/// for (i) +/// { +/// A[i].a = B[i].a +/// A[i].b = B[i].b +/// } +bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( + SmallVectorImpl &SL,const SCEV *BECount) { + // Try to find consecutive stores and loads that can be transformed + // into memcpy. + SetVector Heads, Tails; + SmallDenseMap ConsecutiveChain; - Value *StorePtr = SI->getPointerOperand(); - const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); - APInt Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); - bool NegStride = StoreSize == -Stride; + // Do a quadratic search on all of the given stores and find + // all of the pairs of stores that follow each other. + SmallVector IndexQueue; + for (unsigned i = 0, e = SL.size(); i < e; ++i) { + assert(SL[i]->isUnordered() && + "Expected only non-volatile non-ordered stores."); - // The store must be feeding a non-volatile load. - LoadInst *LI = cast(SI->getValueOperand()); - assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads."); + Value *FirstStorePtr = SL[i]->getPointerOperand(); + const SCEVAddRecExpr *FirstStoreEv = + cast(SE->getSCEV(FirstStorePtr)); + APInt FirstStride = getStoreStride(FirstStoreEv); - // See if the pointer expression is an AddRec like {base,+,1} on the current - // loop, which indicates a strided load. If we have something else, it's a - // random load we can't handle. - const SCEVAddRecExpr *LoadEv = - cast(SE->getSCEV(LI->getPointerOperand())); + unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL); + + LoadInst *FirstStoreLoad = dyn_cast(SL[i]->getValueOperand()); + assert(FirstStoreLoad->isUnordered() && + "Expected only non-volatile non-ordered loads."); + Value *FirstLoadPtr = + GetUnderlyingObject(FirstStoreLoad->getPointerOperand(), *DL); + + const SCEVAddRecExpr *FirstLoadEv = + dyn_cast(SE->getSCEV(FirstStoreLoad->getPointerOperand())); + + // See if we can optimize just this store in isolation. + if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) { + Heads.insert(SL[i]); + continue; + } + + IndexQueue.clear(); + // If a store has multiple consecutive store candidates, search Stores + // array according to the sequence: from i+1 to e, then from i-1 to 0. + // This is because usually pairing with immediate succeeding or preceding + // candidate create the best chance to find memcpy opportunity. + + unsigned j = 0; + for (j = i + 1; j < e; ++j) + IndexQueue.push_back(j); + for (j = i; j > 0; --j) + IndexQueue.push_back(j - 1); + + for (auto &k : IndexQueue) { + assert(SL[k]->isUnordered() && + "Expected only non-volatile non-ordered stores."); + Value *SecondStorePtr = SL[k]->getPointerOperand(); + const SCEVAddRecExpr *SecondStoreEv = + cast(SE->getSCEV(SecondStorePtr)); + + APInt SecondStride = getStoreStride(SecondStoreEv); + if (FirstStride != SecondStride) + continue; + + LoadInst *SecondStoreLoad = dyn_cast(SL[k]->getValueOperand()); + assert(SecondStoreLoad->isUnordered() && + "Expected only non-volatile non-ordered loads."); + Value *SecondLoadPtr = + GetUnderlyingObject(SecondStoreLoad->getPointerOperand(), *DL); + + if (FirstLoadPtr != SecondLoadPtr) + continue; + + const SCEVAddRecExpr *SecondLoadEv = + dyn_cast( + SE->getSCEV(SecondStoreLoad->getPointerOperand())); + + // The stride of both loads should be equal + if (FirstLoadEv->getOperand(1) != SecondLoadEv->getOperand(1)) + continue; + + //Both stores and loads should be consecutive + if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { + if (isConsecutiveAccess( + FirstStoreLoad, SecondStoreLoad, *DL, *SE, false)) { + Tails.insert(SL[k]); + Heads.insert(SL[i]); + ConsecutiveChain[SL[i]] = SL[k]; + break; + } + } + } + } + + SmallPtrSet TransformedStores; + bool Changed = false; + // For stores that start but don't end a link in the chain: + for (SetVector::iterator it = Heads.begin(), e = Heads.end(); + it != e; ++it) { + if (Tails.count(*it)) + continue; + + // We found a store instr that starts a chain. Now follow the chain and try + // to transform it. + SmallPtrSet AdjacentStores; + StoreInst *I = *it; + + StoreInst *HeadStore = I; + unsigned StoreSize = 0; + + // Collect the chain into a list. + while (Tails.count(I) || Heads.count(I)) { + if (TransformedStores.count(I)) + break; + AdjacentStores.insert(I); + + StoreSize += getStoreSizeInBytes(I, DL); + // Move to the next value in the chain. + I = ConsecutiveChain[I]; + } + + Value *StorePtr = HeadStore->getPointerOperand(); + const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); + APInt Stride = getStoreStride(StoreEv); + + LoadInst *StoreLoadInst = dyn_cast(HeadStore->getValueOperand()); + Value *LoadPtr = + GetUnderlyingObject(StoreLoadInst->getPointerOperand(), *DL); + const SCEVAddRecExpr *LoadEv = + dyn_cast(SE->getSCEV(StoreLoadInst->getPointerOperand())); + + + // Check to see if the stride matches the size of the stores. If so, then + // we know that every byte is touched in the loop. + if (StoreSize != Stride && StoreSize != -Stride) + continue; + + bool NegStride = StoreSize == -Stride; + if (processLoopStoreOfLoopLoadMain(StorePtr, StoreSize, HeadStore, + StoreLoadInst, AdjacentStores, + StoreEv,LoadPtr,LoadEv, BECount, + NegStride)) { + TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end()); + Changed = true; + } + } + + return Changed; +} + +/// processLoopStoreOfLoopLoadMain - We see strided stores and loads , If we can +/// transform this into a memcpy in the loop preheader , do so. +bool LoopIdiomRecognize::processLoopStoreOfLoopLoadMain( + Value *DestPtr, unsigned StoreSize, StoreInst *TheStore, + LoadInst *TheLoad, SmallPtrSetImpl &Stores, + const SCEVAddRecExpr *Ev, Value *LoadPtr, + const SCEVAddRecExpr *LoadEv, const SCEV *BECount, bool NegStride) { // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the @@ -923,14 +1066,18 @@ IRBuilder<> Builder(Preheader->getTerminator()); SCEVExpander Expander(*SE, *DL, "loop-idiom"); - const SCEV *StrStart = StoreEv->getStart(); - unsigned StrAS = SI->getPointerAddressSpace(); + const SCEV *StrStart = Ev->getStart(); + unsigned StrAS = DestPtr->getType()->getPointerAddressSpace(); Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS); // Handle negative strided loops. if (NegStride) StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE); + // TODO: ideally we should still be able to generate memset if SCEV expander + // is taught to generate the dependencies at the latest point. + if (!isSafeToExpand(StrStart, *SE)) + return false; // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -940,8 +1087,6 @@ Value *StoreBasePtr = Expander.expandCodeFor( StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); - SmallPtrSet Stores; - Stores.insert(SI); if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, *AA, Stores)) { Expander.clear(); @@ -951,12 +1096,17 @@ } const SCEV *LdStart = LoadEv->getStart(); - unsigned LdAS = LI->getPointerAddressSpace(); + unsigned LdAS = LoadPtr->getType()->getPointerAddressSpace(); // Handle negative strided loops. if (NegStride) LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE); + // TODO: ideally we should still be able to generate memset if SCEV expander + // is taught to generate the dependencies at the latest point. + if (!isSafeToExpand(LdStart, *SE)) + return false; + // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. Value *LoadBasePtr = Expander.expandCodeFor( @@ -990,12 +1140,12 @@ Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); - unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); + unsigned Align = std::min(TheStore->getAlignment(), TheLoad->getAlignment()); CallInst *NewCall = nullptr; // Check whether to generate an unordered atomic memcpy: // If the load or store are atomic, then they must neccessarily be unordered // by previous checks. - if (!SI->isAtomic() && !LI->isAtomic()) + if (!TheStore->isAtomic() && !TheLoad->isAtomic()) NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); else { // We cannot allow unaligned ops for unordered load/store, so reject @@ -1017,19 +1167,20 @@ // atomic loads/stores are *required* by the spec to have an alignment // but non-atomic loads/stores may not. NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(), - SI->getAlignment())); + TheStore->getAlignment())); NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(), - LI->getAlignment())); + TheLoad->getAlignment())); } - NewCall->setDebugLoc(SI->getDebugLoc()); + NewCall->setDebugLoc(TheStore->getDebugLoc()); DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" - << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" - << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); + << " from load ptr=" << *LoadEv << " at: " << *TheLoad << "\n" + << " from store ptr=" << *Ev << " at: " << *TheStore << "\n"); // Okay, the memcpy has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(SI); + for (auto *I : Stores) + deleteDeadInstruction(I); ++NumMemCpy; return true; } Index: test/Transforms/LoopIdiom/memcpy_struct_pattern.ll =================================================================== --- /dev/null +++ test/Transforms/LoopIdiom/memcpy_struct_pattern.ll @@ -0,0 +1,423 @@ +; RUN: opt -globals-aa -loop-idiom < %s -S | FileCheck %s + +; ModuleID = '' +source_filename = "../Newmemcpytest/test17.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.foo = type { i32, i32 } +%struct.foo1 = type { i32 } +%struct.foo3 = type { i32, i32, i32 } + +@bar3.g = private unnamed_addr constant [14 x %struct.foo] [%struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }, %struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }], align 16 +@bar4.g = private unnamed_addr constant [14 x %struct.foo1] [%struct.foo1 { i32 2 }, %struct.foo1 { i32 4 }, %struct.foo1 { i32 5 }, %struct.foo1 { i32 6 }, %struct.foo1 { i32 9 }, %struct.foo1 { i32 5 }, %struct.foo1 { i32 1 }, %struct.foo1 { i32 2 }, %struct.foo1 { i32 3 }, %struct.foo1 { i32 3 }, %struct.foo1 { i32 5 }, %struct.foo1 { i32 7 }, %struct.foo1 { i32 8 }, %struct.foo1 { i32 19 }], align 16 +@bar5.g = private unnamed_addr constant [14 x %struct.foo] [%struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }, %struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }], align 16 +@bar6.g = private unnamed_addr constant [14 x %struct.foo] [%struct.foo { i32 12, i32 3 }, %struct.foo { i32 3, i32 54 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }, %struct.foo { i32 2, i32 53 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 74, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }], align 16 +@bar7.g = private unnamed_addr constant [14 x %struct.foo] [%struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }, %struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }], align 16 +@bar8.g = private unnamed_addr constant [14 x %struct.foo] [%struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }, %struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }], align 16 +@bar9.g = private unnamed_addr constant [14 x %struct.foo3] [%struct.foo3 { i32 2, i32 3, i32 4 }, %struct.foo3 { i32 3, i32 4, i32 1 }, %struct.foo3 { i32 1, i32 8, i32 34 }, %struct.foo3 { i32 3, i32 2, i32 21 }, %struct.foo3 { i32 4, i32 5, i32 78 }, %struct.foo3 { i32 4, i32 5, i32 123 }, %struct.foo3 { i32 1, i32 2, i32 3 }, %struct.foo3 { i32 2, i32 3, i32 34 }, %struct.foo3 { i32 3, i32 4, i32 34 }, %struct.foo3 { i32 1, i32 8, i32 53 }, %struct.foo3 { i32 3, i32 2, i32 87 }, %struct.foo3 { i32 4, i32 5, i32 43 }, %struct.foo3 { i32 4, i32 5, i32 34 }, %struct.foo3 { i32 1, i32 2, i32 45 }], align 16 +@bar10.g = private unnamed_addr constant [14 x %struct.foo3] [%struct.foo3 { i32 2, i32 3, i32 4 }, %struct.foo3 { i32 3, i32 4, i32 1 }, %struct.foo3 { i32 1, i32 8, i32 34 }, %struct.foo3 { i32 3, i32 2, i32 21 }, %struct.foo3 { i32 4, i32 5, i32 78 }, %struct.foo3 { i32 4, i32 5, i32 123 }, %struct.foo3 { i32 1, i32 2, i32 3 }, %struct.foo3 { i32 2, i32 3, i32 34 }, %struct.foo3 { i32 3, i32 4, i32 34 }, %struct.foo3 { i32 1, i32 8, i32 53 }, %struct.foo3 { i32 3, i32 2, i32 87 }, %struct.foo3 { i32 4, i32 5, i32 43 }, %struct.foo3 { i32 4, i32 5, i32 34 }, %struct.foo3 { i32 1, i32 2, i32 45 }], align 16 +@bar11.g = private unnamed_addr constant [14 x %struct.foo3] [%struct.foo3 { i32 2, i32 3, i32 4 }, %struct.foo3 { i32 3, i32 4, i32 1 }, %struct.foo3 { i32 1, i32 8, i32 34 }, %struct.foo3 { i32 3, i32 2, i32 21 }, %struct.foo3 { i32 4, i32 5, i32 78 }, %struct.foo3 { i32 4, i32 5, i32 123 }, %struct.foo3 { i32 1, i32 2, i32 3 }, %struct.foo3 { i32 2, i32 3, i32 34 }, %struct.foo3 { i32 3, i32 4, i32 34 }, %struct.foo3 { i32 1, i32 8, i32 53 }, %struct.foo3 { i32 3, i32 2, i32 87 }, %struct.foo3 { i32 4, i32 5, i32 43 }, %struct.foo3 { i32 4, i32 5, i32 34 }, %struct.foo3 { i32 1, i32 2, i32 45 }], align 16 +@bar12.g = private unnamed_addr constant [14 x %struct.foo3] [%struct.foo3 { i32 2, i32 3, i32 4 }, %struct.foo3 { i32 3, i32 4, i32 1 }, %struct.foo3 { i32 1, i32 8, i32 34 }, %struct.foo3 { i32 3, i32 2, i32 21 }, %struct.foo3 { i32 4, i32 5, i32 78 }, %struct.foo3 { i32 4, i32 5, i32 123 }, %struct.foo3 { i32 1, i32 2, i32 3 }, %struct.foo3 { i32 2, i32 3, i32 34 }, %struct.foo3 { i32 3, i32 4, i32 34 }, %struct.foo3 { i32 1, i32 8, i32 53 }, %struct.foo3 { i32 3, i32 2, i32 87 }, %struct.foo3 { i32 4, i32 5, i32 43 }, %struct.foo3 { i32 4, i32 5, i32 34 }, %struct.foo3 { i32 1, i32 2, i32 45 }], align 16 +@bar13.g = private unnamed_addr constant [14 x %struct.foo] [%struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }, %struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }], align 16 +@bar14.g = private unnamed_addr constant [14 x %struct.foo1] [%struct.foo1 { i32 2 }, %struct.foo1 { i32 4 }, %struct.foo1 { i32 5 }, %struct.foo1 { i32 6 }, %struct.foo1 { i32 9 }, %struct.foo1 { i32 5 }, %struct.foo1 { i32 1 }, %struct.foo1 { i32 2 }, %struct.foo1 { i32 3 }, %struct.foo1 { i32 3 }, %struct.foo1 { i32 5 }, %struct.foo1 { i32 7 }, %struct.foo1 { i32 8 }, %struct.foo1 { i32 19 }], align 16 +@bar15.g = private unnamed_addr constant [14 x %struct.foo] [%struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }, %struct.foo { i32 2, i32 3 }, %struct.foo { i32 3, i32 4 }, %struct.foo { i32 1, i32 8 }, %struct.foo { i32 3, i32 2 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 4, i32 5 }, %struct.foo { i32 1, i32 2 }], align 16 + +; Function Attrs: nounwind uwtable +define void @bar1(%struct.foo* nocapture %f) local_unnamed_addr #0 { +entry: + %g = alloca [14 x %struct.foo], align 16 + %0 = bitcast [14 x %struct.foo]* %g to i8* + call void @llvm.lifetime.start.p0i8(i64 112, i8* nonnull %0) #3 + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 13, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* %g, i64 0, i64 %indvars.iv, i32 0 + %1 = load i32, i32* %a, align 8, !tbaa !1 + %a3 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %1, i32* %a3, align 4, !tbaa !1 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + call void @llvm.lifetime.end.p0i8(i64 112, i8* nonnull %0) #3 + ret void +; CHECK-LABEL: @bar1( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 + +; Function Attrs: nounwind uwtable +define void @bar2(%struct.foo1* nocapture %f) local_unnamed_addr #0 { +entry: + %g = alloca [14 x %struct.foo1], align 16 + %0 = bitcast [14 x %struct.foo1]* %g to i8* + call void @llvm.lifetime.start.p0i8(i64 56, i8* nonnull %0) #3 + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 13, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo1], [14 x %struct.foo1]* %g, i64 0, i64 %indvars.iv, i32 0 + %1 = load i32, i32* %a, align 4, !tbaa !6 + %a3 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 + store i32 %1, i32* %a3, align 4, !tbaa !6 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + call void @llvm.lifetime.end.p0i8(i64 56, i8* nonnull %0) #3 + ret void +; CHECK-LABEL: @bar2( +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NOT: store +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar3(%struct.foo* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar3.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 8, !tbaa !1 + %a3 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !1 + %b = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar3.g, i64 0, i64 %indvars.iv, i32 1 + %1 = load i32, i32* %b, align 4, !tbaa !8 + %b8 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 %1, i32* %b8, align 4, !tbaa !8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 14 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar3( +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NOT: store +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #1 + +; Function Attrs: norecurse nounwind uwtable +define void @bar4(%struct.foo1* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo1], [14 x %struct.foo1]* @bar4.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !6 + %a3 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 14 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar4( +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NOT: store +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar5(%struct.foo* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar5.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 8, !tbaa !1 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 %0, i32* %b, align 4, !tbaa !8 + %b5 = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar5.g, i64 0, i64 %indvars.iv, i32 1 + %1 = load i32, i32* %b5, align 4, !tbaa !8 + %a8 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %1, i32* %a8, align 4, !tbaa !1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 14 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar5( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar6(%struct.foo* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 13, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar6.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 8, !tbaa !1 + %a3 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !1 + %b = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar6.g, i64 0, i64 %indvars.iv, i32 1 + %1 = load i32, i32* %b, align 4, !tbaa !8 + %b8 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 %1, i32* %b8, align 4, !tbaa !8 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar6( +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NOT: store +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar7(%struct.foo* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar7.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 8, !tbaa !1 + %a3 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 14 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar7( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar8(%struct.foo* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar8.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 8, !tbaa !1 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 %0, i32* %b, align 4, !tbaa !8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 14 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar8( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar9(%struct.foo3* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 13, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo3], [14 x %struct.foo3]* @bar9.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !9 + %a3 = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !9 + %b = getelementptr inbounds [14 x %struct.foo3], [14 x %struct.foo3]* @bar9.g, i64 0, i64 %indvars.iv, i32 1 + %1 = load i32, i32* %b, align 4, !tbaa !11 + %b8 = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 1 + store i32 %1, i32* %b8, align 4, !tbaa !11 + %c = getelementptr inbounds [14 x %struct.foo3], [14 x %struct.foo3]* @bar9.g, i64 0, i64 %indvars.iv, i32 2 + %2 = load i32, i32* %c, align 4, !tbaa !12 + %c13 = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 2 + store i32 %2, i32* %c13, align 4, !tbaa !12 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar9( +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NOT: store +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar10(%struct.foo3* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 13, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo3], [14 x %struct.foo3]* @bar10.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !9 + %a3 = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !9 + %b = getelementptr inbounds [14 x %struct.foo3], [14 x %struct.foo3]* @bar10.g, i64 0, i64 %indvars.iv, i32 1 + %1 = load i32, i32* %b, align 4, !tbaa !11 + %b8 = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 1 + store i32 %1, i32* %b8, align 4, !tbaa !11 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar10( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar11(%struct.foo3* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 13, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo3], [14 x %struct.foo3]* @bar11.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !9 + %a3 = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !9 + %c = getelementptr inbounds [14 x %struct.foo3], [14 x %struct.foo3]* @bar11.g, i64 0, i64 %indvars.iv, i32 2 + %1 = load i32, i32* %c, align 4, !tbaa !12 + %c8 = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 2 + store i32 %1, i32* %c8, align 4, !tbaa !12 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar11( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar12(%struct.foo3* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 13, %entry ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds [14 x %struct.foo3], [14 x %struct.foo3]* @bar12.g, i64 0, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !9 + %a3 = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !9 + %b = getelementptr inbounds [14 x %struct.foo3], [14 x %struct.foo3]* @bar12.g, i64 0, i64 %indvars.iv, i32 1 + %1 = load i32, i32* %b, align 4, !tbaa !11 + %b8 = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 1 + store i32 %1, i32* %b8, align 4, !tbaa !11 + %2 = load i32, i32* %a, align 4, !tbaa !9 + %c = getelementptr inbounds %struct.foo3, %struct.foo3* %f, i64 %indvars.iv, i32 2 + store i32 %2, i32* %c, align 4, !tbaa !12 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar12( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar13(%struct.foo* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %b = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar13.g, i64 0, i64 %indvars.iv, i32 1 + %0 = load i32, i32* %b, align 4, !tbaa !8 + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a, align 4, !tbaa !1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 14 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar13( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar14(%struct.foo1* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = sub nuw nsw i64 13, %indvars.iv + %a = getelementptr inbounds [14 x %struct.foo1], [14 x %struct.foo1]* @bar14.g, i64 0, i64 %0, i32 0 + %1 = load i32, i32* %a, align 4, !tbaa !6 + %a3 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 + store i32 %1, i32* %a3, align 4, !tbaa !6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 14 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar14( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar15(%struct.foo* nocapture %f) local_unnamed_addr #2 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = sub nuw nsw i64 13, %indvars.iv + %a = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar15.g, i64 0, i64 %0, i32 0 + %1 = load i32, i32* %a, align 8, !tbaa !1 + %a4 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %0, i32 0 + store i32 %1, i32* %a4, align 4, !tbaa !1 + %b = getelementptr inbounds [14 x %struct.foo], [14 x %struct.foo]* @bar15.g, i64 0, i64 %indvars.iv, i32 1 + %2 = load i32, i32* %b, align 4, !tbaa !8 + %b10 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %0, i32 1 + store i32 %2, i32* %b10, align 4, !tbaa !8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 14 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +; CHECK-LABEL: @bar15( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 "} +!1 = !{!2, !3, i64 0} +!2 = !{!"foo", !3, i64 0, !3, i64 4} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !3, i64 0} +!7 = !{!"foo1", !3, i64 0} +!8 = !{!2, !3, i64 4} +!9 = !{!10, !3, i64 0} +!10 = !{!"foo3", !3, i64 0, !3, i64 4, !3, i64 8} +!11 = !{!10, !3, i64 4} +!12 = !{!10, !3, i64 8}