diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -567,7 +567,13 @@ AAQueryInfo &AAQI); ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc, AAQueryInfo &AAQI); + ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc, + const std::optional &LoadLocation, + AAQueryInfo &AAQI); + ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc, + AAQueryInfo &AAQI); ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc, + const std::optional &StoreLocation, AAQueryInfo &AAQI); ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc, AAQueryInfo &AAQI); diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -463,9 +463,10 @@ // Helper method implementation //===----------------------------------------------------------------------===// -ModRefInfo AAResults::getModRefInfo(const LoadInst *L, - const MemoryLocation &Loc, - AAQueryInfo &AAQI) { +ModRefInfo +AAResults::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc, + const std::optional &LoadLocation, + AAQueryInfo &AAQI) { // Be conservative in the face of atomic. if (isStrongerThan(L->getOrdering(), AtomicOrdering::Unordered)) return ModRefInfo::ModRef; @@ -473,23 +474,31 @@ // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. if (Loc.Ptr) { - AliasResult AR = alias(MemoryLocation::get(L), Loc, AAQI, L); + AliasResult AR = + alias(LoadLocation.value_or(MemoryLocation::get(L)), Loc, AAQI, L); if (AR == AliasResult::NoAlias) return ModRefInfo::NoModRef; } // Otherwise, a load just reads. return ModRefInfo::Ref; } - -ModRefInfo AAResults::getModRefInfo(const StoreInst *S, +ModRefInfo AAResults::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc, AAQueryInfo &AAQI) { + return getModRefInfo(L, Loc, std::nullopt, AAQI); +} + +ModRefInfo +AAResults::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc, + const std::optional &StoreLocation, + AAQueryInfo &AAQI) { // Be conservative in the face of atomic. if (isStrongerThan(S->getOrdering(), AtomicOrdering::Unordered)) return ModRefInfo::ModRef; if (Loc.Ptr) { - AliasResult AR = alias(MemoryLocation::get(S), Loc, AAQI, S); + AliasResult AR = + alias(StoreLocation.value_or(MemoryLocation::get(S)), Loc, AAQI, S); // If the store address cannot alias the pointer in question, then the // specified memory cannot be modified by the store. if (AR == AliasResult::NoAlias) @@ -507,6 +516,12 @@ return ModRefInfo::Mod; } +ModRefInfo AAResults::getModRefInfo(const StoreInst *S, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { + return getModRefInfo(S, Loc, std::nullopt, AAQI); +} + ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc, AAQueryInfo &AAQI) { diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -232,6 +232,14 @@ const SCEV *BECount); bool avoidLIRForMultiBlockLoop(bool IsMemset = false, bool IsLoopMemset = false); + std::optional + calculateStoreLoadMemoryLocation(Value *InstrPtr, const SCEV *BECount, + SCEVExpander &Expander); + bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, + const SCEV *BECount, const SCEV *StoreSizeSCEV, + AliasAnalysis &AA, + SmallPtrSetImpl &IgnoredInsts, + SCEVExpander &Expander); /// @} /// \name Noncountable Loop Idiom Handling @@ -358,7 +366,7 @@ return MadeChange; } -static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) { +static APInt getInstrStride(const SCEVAddRecExpr *StoreEv) { const SCEVConstant *ConstStride = cast(StoreEv->getOperand(1)); return ConstStride->getAPInt(); } @@ -480,7 +488,7 @@ if (HasMemcpy && !DisableLIRP::Memcpy) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. - APInt Stride = getStoreStride(StoreEv); + APInt Stride = getInstrStride(StoreEv); unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); if (StoreSize != Stride && StoreSize != -Stride) return LegalStoreKind::None; @@ -606,7 +614,7 @@ Value *FirstStorePtr = SL[i]->getPointerOperand(); const SCEVAddRecExpr *FirstStoreEv = cast(SE->getSCEV(FirstStorePtr)); - APInt FirstStride = getStoreStride(FirstStoreEv); + APInt FirstStride = getInstrStride(FirstStoreEv); unsigned FirstStoreSize = DL->getTypeStoreSize(SL[i]->getValueOperand()->getType()); // See if we can optimize just this store in isolation. @@ -642,7 +650,7 @@ Value *SecondStorePtr = SL[k]->getPointerOperand(); const SCEVAddRecExpr *SecondStoreEv = cast(SE->getSCEV(SecondStorePtr)); - APInt SecondStride = getStoreStride(SecondStoreEv); + APInt SecondStride = getInstrStride(SecondStoreEv); if (FirstStride != SecondStride) continue; @@ -709,7 +717,7 @@ Value *StoredVal = HeadStore->getValueOperand(); Value *StorePtr = HeadStore->getPointerOperand(); const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); - APInt Stride = getStoreStride(StoreEv); + APInt Stride = getInstrStride(StoreEv); // Check to see if the stride matches the size of the stores. If so, then // we know that every byte is touched in the loop. @@ -937,11 +945,10 @@ /// mayLoopAccessLocation - Return true if the specified loop might access the /// specified pointer location, which is a loop-strided access. The 'Access' /// argument specifies what the verboten forms of access are (read or write). -static bool -mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, - const SCEV *BECount, const SCEV *StoreSizeSCEV, - AliasAnalysis &AA, - SmallPtrSetImpl &IgnoredInsts) { +bool LoopIdiomRecognize::mayLoopAccessLocation( + Value *Ptr, ModRefInfo Access, Loop *L, const SCEV *BECount, + const SCEV *StoreSizeSCEV, AliasAnalysis &AA, + SmallPtrSetImpl &IgnoredInsts, SCEVExpander &Expander) { // Get the location that may be stored across the loop. Since the access is // strided positively through memory, we say that the modified location starts // at the pointer and has infinite size. @@ -955,17 +962,33 @@ AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) * ConstSize->getValue()->getZExtValue()); - // TODO: For this to be really effective, we have to dive into the pointer - // operand in the store. Store to &A[i] of 100 will always return may alias - // with store of &A[100], we need to StoreLoc to be "A" with size of 100, - // which will then no-alias a store to &A[100]. MemoryLocation StoreLoc(Ptr, AccessSize); for (BasicBlock *B : L->blocks()) - for (Instruction &I : *B) - if (!IgnoredInsts.contains(&I) && - isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access)) + for (Instruction &I : *B) { + if (IgnoredInsts.contains(&I)) + continue; + ModRefInfo RefInfo = AA.getModRefInfo(&I, StoreLoc); + + // In case of store/load instructions try to calculate more precise + // memory location. + StoreInst *SI = dyn_cast(&I); + LoadInst *LI = dyn_cast(&I); + if (SI || LI) { + Value *InstPtr = SI ? SI->getPointerOperand() : LI->getPointerOperand(); + auto MemLocation = + calculateStoreLoadMemoryLocation(InstPtr, BECount, Expander); + if (MemLocation) { + SimpleAAQueryInfo AAQIP(AA); + if (SI) + RefInfo = AA.getModRefInfo(SI, StoreLoc, MemLocation, AAQIP); + else if (LI) + RefInfo = AA.getModRefInfo(LI, StoreLoc, MemLocation, AAQIP); + } + } + if (isModOrRefSet(RefInfo & Access)) return true; + } return false; } @@ -1027,6 +1050,51 @@ SCEV::FlagNUW); } +/// Calculate memory location used in store/load instructions in cases where +/// this is possible. +std::optional +LoopIdiomRecognize::calculateStoreLoadMemoryLocation(Value *InstrPtr, + const SCEV *BECount, + SCEVExpander &Expander) { + const SCEVAddRecExpr *StoreEv = + dyn_cast(SE->getSCEV(InstrPtr)); + // See if the pointer expression is an AddRec like {base,+,1} on the current + // loop, which indicates a strided istruction and more precise memory location + // can be calculated. + if (StoreEv && StoreEv->getLoop() == CurLoop && StoreEv->isAffine() && + isa(StoreEv->getOperand(1))) { + APInt Stride = getInstrStride(StoreEv); + uint64_t ConstSize = abs(Stride.getSExtValue()); + + // If the loop iterates a fixed number of times, we can define the used + // access size + const SCEVConstant *BECst = dyn_cast(BECount); + if (BECst && ConstSize) { + LocationSize AccessSize = LocationSize::precise( + (BECst->getValue()->getZExtValue() + 1) * ConstSize); + + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + IRBuilder<> Builder(Preheader->getTerminator()); + unsigned DestAS = InstrPtr->getType()->getPointerAddressSpace(); + Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS); + unsigned InstAS = InstrPtr->getType()->getPointerAddressSpace(); + Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(InstAS)); + const SCEV *Start = StoreEv->getStart(); + const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, ConstSize); + + // Calculate the right start in case of negative stride. + if (Stride.isNegative()) + Start = + getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE); + // Added instructions will be removed later on the next phases. + Value *BasePtr = Expander.expandCodeFor(Start, DestInt8PtrTy, + Preheader->getTerminator()); + return MemoryLocation(BasePtr, AccessSize); + } + } + return std::nullopt; +} + /// processLoopStridedStore - We see a strided store of some value. If we can /// transform this into a memset or memset_pattern in the loop preheader, do so. bool LoopIdiomRecognize::processLoopStridedStore( @@ -1085,7 +1153,7 @@ Changed = true; if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount, - StoreSizeSCEV, *AA, Stores)) + StoreSizeSCEV, *AA, Stores, Expander)) return Changed; if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset)) @@ -1280,7 +1348,7 @@ unsigned StrAS = DestPtr->getType()->getPointerAddressSpace(); Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS)); - APInt Stride = getStoreStride(StoreEv); + APInt Stride = getInstrStride(StoreEv); const SCEVConstant *ConstStoreSize = dyn_cast(StoreSizeSCEV); // TODO: Deal with non-constant size; Currently expect constant store size @@ -1320,7 +1388,7 @@ bool LoopAccessStore = mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, - StoreSizeSCEV, *AA, IgnoredInsts); + StoreSizeSCEV, *AA, IgnoredInsts, Expander); if (LoopAccessStore) { // For memmove case it's not enough to guarantee that loop doesn't access // TheStore and TheLoad. Additionally we need to make sure that TheStore is @@ -1329,7 +1397,8 @@ return Changed; IgnoredInsts.insert(TheLoad); if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, - BECount, StoreSizeSCEV, *AA, IgnoredInsts)) { + BECount, StoreSizeSCEV, *AA, IgnoredInsts, + Expander)) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore", TheStore) @@ -1362,7 +1431,7 @@ if (IsMemCpy && !Verifier.IsSameObject) IgnoredInsts.erase(TheStore); if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, - StoreSizeSCEV, *AA, IgnoredInsts)) { + StoreSizeSCEV, *AA, IgnoredInsts, Expander)) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad) << ore::NV("Inst", InstRemark) << " in " diff --git a/llvm/test/Transforms/LoopIdiom/memset-location-alias.ll b/llvm/test/Transforms/LoopIdiom/memset-location-alias.ll --- a/llvm/test/Transforms/LoopIdiom/memset-location-alias.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-location-alias.ll @@ -10,6 +10,16 @@ ; CHECK-NEXT: store i32 0, ptr [[QUEUE_START]], align 4 ; CHECK-NEXT: [[QUEUE_END:%.*]] = getelementptr inbounds [[STRUCT_CONNECTION_DATA]], ptr [[CONN]], i64 0, i32 7 ; CHECK-NEXT: store i32 0, ptr [[QUEUE_END]], align 4 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[CONN]], i64 1684 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[CONN]], i64 84 +; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[CONN]], i64 3284 +; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[CONN]], i64 4884 +; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[CONN]], i64 6484 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP2]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP3]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr [[CONN]], i64 84 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP4]], i8 0, i64 1516, i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[POS:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -17,13 +27,9 @@ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [400 x i32], ptr [[CONN]], i64 0, i64 [[IDXPROM]] ; CHECK-NEXT: store i32 100000, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 1 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 2 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 3 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 4 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[POS]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[POS]], 399 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] @@ -65,6 +71,14 @@ ; CHECK-NEXT: store i32 0, ptr [[QUEUE_START]], align 4 ; CHECK-NEXT: [[QUEUE_END:%.*]] = getelementptr inbounds [[STRUCT_CONNECTION_DATA]], ptr [[CONN]], i64 0, i32 7 ; CHECK-NEXT: store i32 0, ptr [[QUEUE_END]], align 4 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[CONN]], i64 3284 +; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[CONN]], i64 204 +; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[CONN]], i64 1684 +; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[CONN]], i64 4884 +; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr [[CONN]], i64 6484 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP4]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP5]], i8 0, i64 1516, i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[POS:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -77,11 +91,8 @@ ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[DELTAS21]], i64 0, i32 1 ; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[DELTAS21]], i64 0, i32 2 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[DELTAS21]], i64 0, i32 3 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[DELTAS21]], i64 0, i32 4 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[POS]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[POS]], 399 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] @@ -126,6 +137,16 @@ ; CHECK-NEXT: store i32 0, ptr [[QUEUE_START]], align 4 ; CHECK-NEXT: [[QUEUE_END:%.*]] = getelementptr inbounds [[STRUCT_CONNECTION_DATA]], ptr [[CONN]], i64 0, i32 7 ; CHECK-NEXT: store i32 0, ptr [[QUEUE_END]], align 4 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[CONN]], i64 1684 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[CONN]], i64 84 +; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[CONN]], i64 3284 +; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[CONN]], i64 4884 +; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[CONN]], i64 6484 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP2]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP3]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr [[CONN]], i64 84 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP4]], i8 0, i64 1516, i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[POS:%.*]] = phi i32 [ 399, [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] @@ -133,13 +154,9 @@ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [400 x i32], ptr [[CONN]], i64 0, i64 [[IDXPROM]] ; CHECK-NEXT: store i32 100000, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 1 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 2 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 3 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4 ; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 4 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 ; CHECK-NEXT: [[DEC]] = add nsw i32 [[POS]], -1 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[POS]], 21 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] @@ -181,6 +198,16 @@ ; CHECK-NEXT: store i32 0, ptr [[QUEUE_START]], align 4 ; CHECK-NEXT: [[QUEUE_END:%.*]] = getelementptr inbounds [[STRUCT_CONNECTION_DATA]], ptr [[CONN]], i64 0, i32 7 ; CHECK-NEXT: store i32 0, ptr [[QUEUE_END]], align 4 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[CONN]], i64 1684 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[CONN]], i64 84 +; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[CONN]], i64 3284 +; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[CONN]], i64 4884 +; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[CONN]], i64 6484 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP2]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP3]], i8 0, i64 1516, i1 false) +; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, ptr [[CONN]], i64 84 +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[UGLYGEP4]], i8 0, i64 1516, i1 false) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[POS:%.*]] = phi i32 [ 21, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -189,13 +216,9 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: store i32 [[TMP0]], ptr [[QUEUE_END]], align 4 ; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 1 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX3]], align 4 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 2 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 3 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr [[STRUCT_CONNECTION_DATA]], ptr [[ARRAYIDX]], i64 0, i32 4 -; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[POS]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[POS]], 399 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]