Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -106,7 +106,7 @@ typedef MapVector StoreListMap; StoreListMap StoreRefsForMemset; StoreListMap StoreRefsForMemsetPattern; - StoreList StoreRefsForMemcpy; + StoreListMap StoreRefsForMemcpy; bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; @@ -131,7 +131,7 @@ void collectStores(BasicBlock *BB); LegalStoreKind isLegalStore(StoreInst *SI); bool processLoopStores(SmallVectorImpl &SL, const SCEV *BECount, - bool ForMemset); + LegalStoreKind MemIdiom); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, @@ -140,7 +140,12 @@ SmallPtrSetImpl &Stores, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride, bool IsLoopMemset = false); - bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); + bool processLoopStoreOfLoopLoad(Value *DestPtr, unsigned StoreSize, + StoreInst *TheStore, LoadInst *TheLoad, + SmallPtrSetImpl &Stores, + const SCEVAddRecExpr *Ev, Value *LoadPtr, + const SCEVAddRecExpr *LoadEv, + const SCEV *BECount, bool NegStride); bool avoidLIRForMultiBlockLoop(bool IsMemset = false, bool IsLoopMemset = false); @@ -421,13 +426,6 @@ // Otherwise, see if the store can be turned into a memcpy. if (HasMemcpy) { - // Check to see if the stride matches the size of the store. If so, then we - // know that every byte is touched in the loop. - APInt Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); - if (StoreSize != Stride && StoreSize != -Stride) - return LegalStoreKind::None; - // The store must be feeding a non-volatile load. LoadInst *LI = dyn_cast(SI->getValueOperand()); @@ -484,9 +482,11 @@ StoreRefsForMemsetPattern[Ptr].push_back(SI); } break; case LegalStoreKind::Memcpy: - case LegalStoreKind::UnorderedAtomicMemcpy: - StoreRefsForMemcpy.push_back(SI); - break; + case LegalStoreKind::UnorderedAtomicMemcpy: { + // Find the base pointer. + Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); + StoreRefsForMemcpy[Ptr].push_back(SI); + } break; default: assert(false && "unhandled return value"); break; @@ -515,14 +515,19 @@ // optimized into a memset (memset_pattern). The latter most commonly happens // with structs and handunrolled loops. for (auto &SL : StoreRefsForMemset) - MadeChange |= processLoopStores(SL.second, BECount, true); + MadeChange |= + processLoopStores(SL.second, BECount, LegalStoreKind::Memset); for (auto &SL : StoreRefsForMemsetPattern) - MadeChange |= processLoopStores(SL.second, BECount, false); + MadeChange |= + processLoopStores(SL.second, BECount, LegalStoreKind::MemsetPattern); - // Optimize the store into a memcpy, if it feeds an similarly strided load. + // Look for a single store with a corresponding load or a set of stores with a + // common base (the corresponding loads of those stores should also have a + // common base), which can be optimized into a memcpy. for (auto &SI : StoreRefsForMemcpy) - MadeChange |= processLoopStoreOfLoopLoad(SI, BECount); + MadeChange |= + processLoopStores(SI.second, BECount, LegalStoreKind::Memcpy); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { Instruction *Inst = &*I++; @@ -544,10 +549,10 @@ return MadeChange; } -/// processLoopStores - See if this store(s) can be promoted to a memset. +/// processLoopStores - See if this store(s) can be promoted to a memset or memcpy bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl &SL, const SCEV *BECount, - bool ForMemset) { + LegalStoreKind MemIdiom) { // Try to find consecutive stores that can be transformed into memsets. SetVector Heads, Tails; SmallDenseMap ConsecutiveChain; @@ -556,7 +561,11 @@ // all of the pairs of stores that follow each other. SmallVector IndexQueue; for (unsigned i = 0, e = SL.size(); i < e; ++i) { - assert(SL[i]->isSimple() && "Expected only non-volatile stores."); + if (MemIdiom == LegalStoreKind::Memcpy) + assert(SL[i]->isUnordered() && + "Expected only non-volatile non-ordered stores."); + else + assert(SL[i]->isSimple() && "Expected only non-volatile stores."); Value *FirstStoredVal = SL[i]->getValueOperand(); Value *FirstStorePtr = SL[i]->getPointerOperand(); @@ -565,6 +574,16 @@ APInt FirstStride = getStoreStride(FirstStoreEv); unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL); + LoadInst *FirstStoreLoad; + Value *FirstLoadPtr; + if (MemIdiom == LegalStoreKind::Memcpy) { + FirstStoreLoad = cast(SL[i]->getValueOperand()); + assert(FirstStoreLoad->isUnordered() && + "Expected only non-volatile non-ordered loads."); + FirstLoadPtr = + GetUnderlyingObject(FirstStoreLoad->getPointerOperand(), *DL); + } + // See if we can optimize just this store in isolation. if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) { Heads.insert(SL[i]); @@ -574,13 +593,16 @@ Value *FirstSplatValue = nullptr; Constant *FirstPatternValue = nullptr; - if (ForMemset) - FirstSplatValue = isBytewiseValue(FirstStoredVal); - else - FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL); + if (MemIdiom == LegalStoreKind::Memset || + MemIdiom == LegalStoreKind::MemsetPattern){ + if (MemIdiom == LegalStoreKind::Memset) + FirstSplatValue = isBytewiseValue(FirstStoredVal); + else + FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL); - assert((FirstSplatValue || FirstPatternValue) && - "Expected either splat value or pattern value."); + assert((FirstSplatValue || FirstPatternValue) && + "Expected either splat value or pattern value."); + } IndexQueue.clear(); // If a store has multiple consecutive store candidates, search Stores @@ -594,7 +616,11 @@ IndexQueue.push_back(j - 1); for (auto &k : IndexQueue) { - assert(SL[k]->isSimple() && "Expected only non-volatile stores."); + if (MemIdiom == LegalStoreKind::Memcpy) + assert(SL[k]->isUnordered() && + "Expected only non-volatile non-ordered stores."); + else + assert(SL[k]->isSimple() && "Expected only non-volatile stores."); Value *SecondStorePtr = SL[k]->getPointerOperand(); const SCEVAddRecExpr *SecondStoreEv = cast(SE->getSCEV(SecondStorePtr)); @@ -606,28 +632,50 @@ Value *SecondStoredVal = SL[k]->getValueOperand(); Value *SecondSplatValue = nullptr; Constant *SecondPatternValue = nullptr; + LoadInst *SecondStoreLoad ; + if (MemIdiom == LegalStoreKind::Memset || + MemIdiom == LegalStoreKind::MemsetPattern){ + if (MemIdiom == LegalStoreKind::Memset) + SecondSplatValue = isBytewiseValue(SecondStoredVal); + else + SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL); - if (ForMemset) - SecondSplatValue = isBytewiseValue(SecondStoredVal); - else - SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL); + assert((SecondSplatValue || SecondPatternValue) && + "Expected either splat value or pattern value."); - assert((SecondSplatValue || SecondPatternValue) && - "Expected either splat value or pattern value."); + } + else if (MemIdiom == LegalStoreKind::Memcpy) { + SecondStoreLoad = cast(SL[k]->getValueOperand()); + assert(SecondStoreLoad->isUnordered() && + "Expected only non-volatile non-ordered loads."); + Value *SecondLoadPtr = + GetUnderlyingObject(SecondStoreLoad->getPointerOperand(), *DL); + + if (FirstLoadPtr != SecondLoadPtr) + continue; + + if (SL[i]->isAtomic() || SL[k]->isAtomic() || + FirstStoreLoad->isAtomic() || SecondStoreLoad->isAtomic()) + return false; + } - if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { - if (ForMemset) { - if (FirstSplatValue != SecondSplatValue) - continue; - } else { - if (FirstPatternValue != SecondPatternValue) - continue; + if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { + if (MemIdiom == LegalStoreKind::Memset) { + if (FirstSplatValue != SecondSplatValue) + continue; + } else if (MemIdiom == LegalStoreKind::MemsetPattern) { + if (FirstPatternValue != SecondPatternValue) + continue; + } + else if (MemIdiom == LegalStoreKind::Memcpy) { + if (!isConsecutiveAccess(FirstStoreLoad, SecondStoreLoad, *DL, *SE, false)) + continue; + } + Tails.insert(SL[k]); + Heads.insert(SL[i]); + ConsecutiveChain[SL[i]] = SL[k]; + break; } - Tails.insert(SL[k]); - Heads.insert(SL[i]); - ConsecutiveChain[SL[i]] = SL[k]; - break; - } } } @@ -666,18 +714,36 @@ const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); APInt Stride = getStoreStride(StoreEv); + LoadInst *StoreLoadInst; + Value *LoadPtr; + const SCEVAddRecExpr *LoadEv; + if (MemIdiom == LegalStoreKind::Memcpy) { + StoreLoadInst = cast(HeadStore->getValueOperand()); + LoadPtr = GetUnderlyingObject(StoreLoadInst->getPointerOperand(), *DL); + LoadEv = + cast(SE->getSCEV(StoreLoadInst->getPointerOperand())); + } // Check to see if the stride matches the size of the stores. If so, then // we know that every byte is touched in the loop. if (StoreSize != Stride && StoreSize != -Stride) continue; bool NegStride = StoreSize == -Stride; - - if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(), - StoredVal, HeadStore, AdjacentStores, StoreEv, - BECount, NegStride)) { - TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end()); - Changed = true; + if (MemIdiom == LegalStoreKind::Memset || + MemIdiom == LegalStoreKind::MemsetPattern) { + if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(), + StoredVal, HeadStore, AdjacentStores, StoreEv, + BECount, NegStride)) { + TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end()); + Changed = true; + } + } else { + if (processLoopStoreOfLoopLoad(StorePtr, StoreSize, HeadStore, + StoreLoadInst, AdjacentStores, StoreEv, + LoadPtr, LoadEv, BECount, NegStride)) { + TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end()); + Changed = true; + } } } @@ -893,29 +959,21 @@ return true; } -/// If the stored value is a strided load in the same loop with the same stride -/// this may be transformable into a memcpy. This kicks in for stuff like +/// If there are one or multiple stored values , which are strided loads +/// in the same loop with the same stride ,(with the stores being consecutive +/// and having a common base, similarly the loads also being consecutive) then +/// this may be transformed into a memcpy.This kicks in for stuff like /// for (i) A[i] = B[i]; -bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, - const SCEV *BECount) { - assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores."); - - Value *StorePtr = SI->getPointerOperand(); - const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); - APInt Stride = getStoreStride(StoreEv); - unsigned StoreSize = getStoreSizeInBytes(SI, DL); - bool NegStride = StoreSize == -Stride; - - // The store must be feeding a non-volatile load. - LoadInst *LI = cast(SI->getValueOperand()); - assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads."); - - // See if the pointer expression is an AddRec like {base,+,1} on the current - // loop, which indicates a strided load. If we have something else, it's a - // random load we can't handle. - const SCEVAddRecExpr *LoadEv = - cast(SE->getSCEV(LI->getPointerOperand())); - +/// for (i) +/// { +/// A[i].a = B[i].a +/// A[i].b = B[i].b +/// } +bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( + Value *DestPtr, unsigned StoreSize, StoreInst *SI, LoadInst *LI, + SmallPtrSetImpl &Stores, const SCEVAddRecExpr *StoreEv, + Value *LoadPtr, const SCEVAddRecExpr *LoadEv, const SCEV *BECount, + bool NegStride) { // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. This allows us to insert code for it in the preheader. @@ -924,13 +982,18 @@ SCEVExpander Expander(*SE, *DL, "loop-idiom"); const SCEV *StrStart = StoreEv->getStart(); - unsigned StrAS = SI->getPointerAddressSpace(); + unsigned StrAS = DestPtr->getType()->getPointerAddressSpace(); Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS); // Handle negative strided loops. if (NegStride) StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE); + // TODO: ideally we should still be able to generate memcpy if SCEV expander + // is taught to generate the dependencies at the latest point. + if (!isSafeToExpand(StrStart, *SE)) + return false; + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -940,8 +1003,6 @@ Value *StoreBasePtr = Expander.expandCodeFor( StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); - SmallPtrSet Stores; - Stores.insert(SI); if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, *AA, Stores)) { Expander.clear(); @@ -951,12 +1012,18 @@ } const SCEV *LdStart = LoadEv->getStart(); - unsigned LdAS = LI->getPointerAddressSpace(); + unsigned LdAS = LoadPtr->getType()->getPointerAddressSpace(); // Handle negative strided loops. if (NegStride) LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE); + + // TODO: ideally we should still be able to generate memcpy if SCEV expander + // is taught to generate the dependencies at the latest point. + if (!isSafeToExpand(LdStart, *SE)) + return false; + // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. Value *LoadBasePtr = Expander.expandCodeFor( @@ -1029,7 +1096,8 @@ // Okay, the memcpy has been formed. Zap the original store and anything that // feeds into it. - deleteDeadInstruction(SI); + for (auto *I : Stores) + deleteDeadInstruction(I); ++NumMemCpy; return true; } Index: test/Transforms/LoopIdiom/memcpy_struct_pattern.ll =================================================================== --- /dev/null +++ test/Transforms/LoopIdiom/memcpy_struct_pattern.ll @@ -0,0 +1,288 @@ +; RUN: opt -loop-idiom -loop-deletion < %s -S | FileCheck %s + +; ModuleID = '' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc_linux" + +%struct.foo = type { i32, i32 } +%struct.foo1 = type { i32, i32, i32 } + +; Function Attrs: norecurse nounwind uwtable +define void @bar1(%struct.foo* noalias nocapture %f, %struct.foo* noalias nocapture readonly %g, i32 %n) local_unnamed_addr #0 { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %g, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !2 + %a3 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !2 + %b = getelementptr inbounds %struct.foo, %struct.foo* %g, i64 %indvars.iv, i32 1 + %1 = load i32, i32* %b, align 4, !tbaa !7 + %b8 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 %1, i32* %b8, align 4, !tbaa !7 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar1( +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NOT: store +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar2(%struct.foo* noalias nocapture %f, %struct.foo* noalias nocapture readonly %g, i32 %n) local_unnamed_addr #0 { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %g, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !2 + %a3 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !2 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 %0, i32* %b, align 4, !tbaa !7 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar2( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar3(%struct.foo* noalias nocapture %f, %struct.foo* noalias nocapture readonly %g, i32 %n) local_unnamed_addr #0 { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %b = getelementptr inbounds %struct.foo, %struct.foo* %g, i64 %indvars.iv, i32 1 + %0 = load i32, i32* %b, align 4, !tbaa !7 + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a, align 4, !tbaa !2 + %a5 = getelementptr inbounds %struct.foo, %struct.foo* %g, i64 %indvars.iv, i32 0 + %1 = load i32, i32* %a5, align 4, !tbaa !2 + %b8 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 %1, i32* %b8, align 4, !tbaa !7 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar3( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar4(%struct.foo* noalias nocapture %f, %struct.foo* noalias nocapture readonly %g, i32 %n) local_unnamed_addr #0 { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %g, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !2 + %a3 = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar4( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar5(i32* noalias nocapture %f, i32* noalias nocapture readonly %g, i32 %n) local_unnamed_addr #0 { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %g, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4, !tbaa !8 + %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %indvars.iv + store i32 %0, i32* %arrayidx2, align 4, !tbaa !8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar5( +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NOT: store +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar6(%struct.foo1* noalias nocapture %f, %struct.foo1* noalias nocapture readonly %g, i32 %n) local_unnamed_addr #0 { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo1, %struct.foo1* %g, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !9 + %a3 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !9 + %b = getelementptr inbounds %struct.foo1, %struct.foo1* %g, i64 %indvars.iv, i32 1 + %1 = load i32, i32* %b, align 4, !tbaa !11 + %b8 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1 + store i32 %1, i32* %b8, align 4, !tbaa !11 + %c = getelementptr inbounds %struct.foo1, %struct.foo1* %g, i64 %indvars.iv, i32 2 + %2 = load i32, i32* %c, align 4, !tbaa !12 + %c13 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 2 + store i32 %2, i32* %c13, align 4, !tbaa !12 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar6( +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 +; CHECK-NOT: store +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar7(%struct.foo1* noalias nocapture %f, %struct.foo1* noalias nocapture readonly %g, i32 %n) local_unnamed_addr #0 { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo1, %struct.foo1* %g, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !9 + %a3 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !9 + %b = getelementptr inbounds %struct.foo1, %struct.foo1* %g, i64 %indvars.iv, i32 1 + %1 = load i32, i32* %b, align 4, !tbaa !11 + %b8 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1 + store i32 %1, i32* %b8, align 4, !tbaa !11 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar7( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +; Function Attrs: norecurse nounwind uwtable +define void @bar8(%struct.foo1* noalias nocapture %f, %struct.foo1* noalias nocapture readonly %g, i32 %n) local_unnamed_addr #0 { +entry: + %cmp1 = icmp sgt i32 %n, 0 + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo1, %struct.foo1* %g, i64 %indvars.iv, i32 0 + %0 = load i32, i32* %a, align 4, !tbaa !9 + %a3 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 + store i32 %0, i32* %a3, align 4, !tbaa !9 + %c = getelementptr inbounds %struct.foo1, %struct.foo1* %g, i64 %indvars.iv, i32 2 + %1 = load i32, i32* %c, align 4, !tbaa !12 + %c8 = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 2 + store i32 %1, i32* %c8, align 4, !tbaa !12 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %wide.trip.count = zext i32 %n to i64 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar8( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 +} + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="slm" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 5.0.0 (https://github.com/llvm-mirror/clang.git 6cfb5bf41823be28bca09fe72dd3d4b83f4e1be8) (https://github.com/llvm-mirror/llvm.git 5ed4492ece68252fe8309cf11ea7e7105eabdeaf)"} +!2 = !{!3, !4, i64 0} +!3 = !{!"foo", !4, i64 0, !4, i64 4} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!3, !4, i64 4} +!8 = !{!4, !4, i64 0} +!9 = !{!10, !4, i64 0} +!10 = !{!"foo1", !4, i64 0, !4, i64 4, !4, i64 8} +!11 = !{!10, !4, i64 4} +!12 = !{!10, !4, i64 8}