diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -40,6 +40,7 @@ class BasicBlock; class BatchAAResults; class LoadInst; +enum class ModRefInfo : uint8_t; class raw_ostream; class StoreInst; class VAArgInst; @@ -293,7 +294,8 @@ /// set return the appropriate AliasResult. Otherwise return NoAlias. AliasResult aliasesPointer(const Value *Ptr, LocationSize Size, const AAMDNodes &AAInfo, BatchAAResults &AA) const; - bool aliasesUnknownInst(const Instruction *Inst, BatchAAResults &AA) const; + ModRefInfo aliasesUnknownInst(const Instruction *Inst, + BatchAAResults &AA) const; }; inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) { diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -211,7 +211,7 @@ PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC, const TargetLibraryInfo *, TargetTransformInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, - bool AllowSpeculation); + bool AllowSpeculation, bool HasReadsOutsideSet); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -225,29 +225,34 @@ return AliasResult::NoAlias; } -bool AliasSet::aliasesUnknownInst(const Instruction *Inst, - BatchAAResults &AA) const { +ModRefInfo AliasSet::aliasesUnknownInst(const Instruction *Inst, + BatchAAResults &AA) const { if (AliasAny) - return true; + return ModRefInfo::ModRef; if (!Inst->mayReadOrWriteMemory()) - return false; + return ModRefInfo::NoModRef; for (Instruction *UnknownInst : UnknownInsts) { const auto *C1 = dyn_cast(UnknownInst); const auto *C2 = dyn_cast(Inst); if (!C1 || !C2 || isModOrRefSet(AA.getModRefInfo(C1, C2)) || - isModOrRefSet(AA.getModRefInfo(C2, C1))) - return true; + isModOrRefSet(AA.getModRefInfo(C2, C1))) { + // TODO: Could be more precise, but not really useful right now. + return ModRefInfo::ModRef; + } } - for (iterator I = begin(), E = end(); I != E; ++I) - if (isModOrRefSet(AA.getModRefInfo( - Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())))) - return true; + ModRefInfo MR = ModRefInfo::NoModRef; + for (iterator I = begin(), E = end(); I != E; ++I) { + MR |= AA.getModRefInfo( + Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())); + if (isModAndRefSet(MR)) + return MR; + } - return false; + return MR; } void AliasSetTracker::clear() { @@ -297,7 +302,7 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { AliasSet *FoundSet = nullptr; for (AliasSet &AS : llvm::make_early_inc_range(*this)) { - if (AS.Forward || !AS.aliasesUnknownInst(Inst, AA)) + if (AS.Forward || !isModOrRefSet(AS.aliasesUnknownInst(Inst, AA))) continue; if (!FoundSet) { // If this is the first alias set ptr can go into, remember it. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -179,7 +179,9 @@ static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, function_ref Fn); -static SmallVector, 0> +using PointersAndHasReadsOutsideSet = + std::pair, bool>; +static SmallVector collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L); namespace { @@ -489,12 +491,12 @@ bool LocalPromoted; do { LocalPromoted = false; - for (const SmallSetVector &PointerMustAliases : + for (auto [PointerMustAliases, HasReadsOutsideSet] : collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE, - LicmAllowSpeculation); + LicmAllowSpeculation, HasReadsOutsideSet); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -1953,7 +1955,8 @@ LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { + OptimizationRemarkEmitter *ORE, bool AllowSpeculation, + bool HasReadsOutsideSet) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -2028,7 +2031,12 @@ const DataLayout &MDL = Preheader->getModule()->getDataLayout(); - if (SafetyInfo->anyBlockMayThrow()) { + // If there are reads outside the promoted set, then promoting stores is + // definitely not safe. + if (HasReadsOutsideSet) + StoreSafety = StoreUnsafe; + + if (StoreSafety == StoreSafetyUnknown && SafetyInfo->anyBlockMayThrow()) { // If a loop can throw, we have to insert a store along each unwind edge. // That said, we can't actually make the unwind edge explicit. Therefore, // we have to prove that the store is dead along the unwind edge. We do @@ -2253,7 +2261,9 @@ Fn(MUD->getMemoryInst()); } -static SmallVector, 0> +// The bool indicates whether there might be reads outside the set, in which +// case only loads may be promoted. +static SmallVector collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { BatchAAResults BatchAA(*AA); AliasSetTracker AST(BatchAA); @@ -2276,10 +2286,10 @@ }); // We're only interested in must-alias sets that contain a mod. - SmallVector Sets; + SmallVector, 8> Sets; for (AliasSet &AS : AST) if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias()) - Sets.push_back(&AS); + Sets.push_back({&AS, false}); if (Sets.empty()) return {}; // Nothing to promote... @@ -2289,17 +2299,28 @@ if (AttemptingPromotion.contains(I)) return; - llvm::erase_if(Sets, [&](const AliasSet *AS) { - return AS->aliasesUnknownInst(I, BatchAA); + llvm::erase_if(Sets, [&](PointerIntPair &Pair) { + ModRefInfo MR = Pair.getPointer()->aliasesUnknownInst(I, BatchAA); + // Cannot promote if there are writes outside the set. + if (isModSet(MR)) + return true; + if (isRefSet(MR)) { + // Remember reads outside the set. + Pair.setInt(true); + // If this is a mod-only set and there are reads outside the set, + // we will not be able to promote, so bail out early. + return !Pair.getPointer()->isRef(); + } + return false; }); }); - SmallVector, 0> Result; - for (const AliasSet *Set : Sets) { + SmallVector, bool>, 0> Result; + for (auto [Set, HasReadsOutsideSet] : Sets) { SmallSetVector PointerMustAliases; for (const auto &ASI : *Set) PointerMustAliases.insert(ASI.getValue()); - Result.push_back(std::move(PointerMustAliases)); + Result.emplace_back(std::move(PointerMustAliases), HasReadsOutsideSet); } return Result; diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -1329,7 +1329,7 @@ // can't reroll. if (RootInst->mayReadFromMemory()) { for (auto &K : AST) { - if (K.aliasesUnknownInst(RootInst, BatchAA)) { + if (isModOrRefSet(K.aliasesUnknownInst(RootInst, BatchAA))) { LLVM_DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << " vs. " << *RootInst << " (depends on future store)\n"); diff --git a/llvm/test/Transforms/LICM/guards.ll b/llvm/test/Transforms/LICM/guards.ll --- a/llvm/test/Transforms/LICM/guards.ll +++ b/llvm/test/Transforms/LICM/guards.ll @@ -27,7 +27,7 @@ br label %loop } -; Can't hoist over a side effect +; Can't hoist over a side effect, but can still promote and fold the load. define void @test2(i1 %cond, ptr %ptr) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: @@ -36,8 +36,7 @@ ; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ] ; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND:%.*]]) [ "deopt"(i32 0) ] -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]] +; CHECK-NEXT: [[X_INC]] = add i32 [[X]], 0 ; CHECK-NEXT: br label [[LOOP]] ; diff --git a/llvm/test/Transforms/LICM/invariant.start.ll b/llvm/test/Transforms/LICM/invariant.start.ll --- a/llvm/test/Transforms/LICM/invariant.start.ll +++ b/llvm/test/Transforms/LICM/invariant.start.ll @@ -87,8 +87,7 @@ ; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ] ; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = call ptr @llvm.invariant.start.p0(i64 4, ptr [[PTR]]) -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: [[X_INC]] = add i32 [[X]], [[VAL]] +; CHECK-NEXT: [[X_INC]] = add i32 [[X]], 0 ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/LICM/promote-unknown-load.ll b/llvm/test/Transforms/LICM/promote-unknown-load.ll --- a/llvm/test/Transforms/LICM/promote-unknown-load.ll +++ b/llvm/test/Transforms/LICM/promote-unknown-load.ll @@ -1,22 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=licm < %s | FileCheck %s -; FIXME: The %val.ptr load might alias the %pos.ptr load/stores, but it's still +; The %val.ptr load might alias the %pos.ptr load/stores, but it's still ; fine to promote the load as long as the store is retained. define i32 @test(ptr %ary, i64 %len) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[POS_PTR:%.*]] = getelementptr i8, ptr [[ARY:%.*]], i64 32 +; CHECK-NEXT: [[POS_PTR_PROMOTED:%.*]] = load i64, ptr [[POS_PTR]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[POS:%.*]] = load i64, ptr [[POS_PTR]], align 4 -; CHECK-NEXT: [[POS_NEXT:%.*]] = add i64 [[POS]], 1 +; CHECK-NEXT: [[POS_NEXT1:%.*]] = phi i64 [ [[POS_PTR_PROMOTED]], [[ENTRY:%.*]] ], [ [[POS_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[POS_NEXT]] = add i64 [[POS_NEXT1]], 1 ; CHECK-NEXT: store i64 [[POS_NEXT]], ptr [[POS_PTR]], align 4 -; CHECK-NEXT: [[VAL_PTR:%.*]] = getelementptr i32, ptr [[ARY]], i64 [[POS]] +; CHECK-NEXT: [[VAL_PTR:%.*]] = getelementptr i32, ptr [[ARY]], i64 [[POS_NEXT1]] ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[VAL_PTR]], align 4 ; CHECK-NEXT: [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[VAL]] -; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ult i64 [[POS]], [[LEN:%.*]] +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ult i64 [[POS_NEXT1]], [[LEN:%.*]] ; CHECK-NEXT: br i1 [[EXIT_COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -333,7 +333,6 @@ } ; Multiple variant stores to the same uniform address -; We do not vectorize such loops currently. ; for(; i < itr; i++) { ; for(; j < itr; j++) { ; var1[i] = var2[j] + var1[i]; @@ -347,28 +346,80 @@ ; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i32 [[ITR:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP20]], label [[FOR_END10:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]] ; CHECK: for.cond1.preheader.preheader: +; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[VAR2:%.*]], i64 4 ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; CHECK: for.cond1.preheader: ; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ] ; CHECK-NEXT: [[J_022:%.*]] = phi i32 [ [[J_1_LCSSA:%.*]], [[FOR_INC8]] ], [ 0, [[FOR_COND1_PREHEADER_PREHEADER]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV23]], 2 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[VAR1:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[VAR1]], i64 [[TMP1]] ; CHECK-NEXT: [[CMP218:%.*]] = icmp ult i32 [[J_022]], [[ITR]] ; CHECK-NEXT: br i1 [[CMP218]], label [[FOR_BODY3_LR_PH:%.*]], label [[FOR_INC8]] ; CHECK: for.body3.lr.ph: -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1:%.*]], i64 [[INDVARS_IV23]] -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[J_022]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1]], i64 [[INDVARS_IV23]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[J_022]] to i64 +; CHECK-NEXT: [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[J_022]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], [[ITR]] +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 3 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[UGLYGEP2:%.*]] = getelementptr i8, ptr [[VAR2]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = xor i32 [[J_022]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[ITR]] +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP2]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP11]], 2 +; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[UGLYGEP3]], i64 [[TMP12]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[UGLYGEP]], [[UGLYGEP4]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP6]], -4 +; CHECK-NEXT: [[IND_END:%.*]] = add nuw nsw i64 [[N_VEC]], [[TMP2]] +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> , i32 [[ARRAYIDX5_PROMOTED]], i64 0 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP13]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[VAR2]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4, !alias.scope !22 +; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP16]] = add <4 x i32> [[TMP15]], +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP16]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]]) +; CHECK-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[TMP2]], [[FOR_BODY3_LR_PH]] ], [ [[TMP2]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[ARRAYIDX5_PROMOTED]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[ADD]], 1 -; CHECK-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY3]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +; CHECK-NEXT: [[TMP21]] = add nsw i32 [[ADD]], 1 +; CHECK-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[ITR]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[FOR_BODY3]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK: for.inc8.loopexit.loopexit: +; CHECK-NEXT: br label [[FOR_INC8_LOOPEXIT]] ; CHECK: for.inc8.loopexit: ; CHECK-NEXT: br label [[FOR_INC8]] ; CHECK: for.inc8: @@ -440,21 +491,22 @@ ; CHECK: for.body3.lr.ph: ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[VAR1:%.*]], i64 [[INDVARS_IV23]] ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[J_022]] to i64 +; CHECK-NEXT: [[ARRAYIDX5_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[ARRAYIDX5_PROMOTED]], [[FOR_BODY3_LR_PH]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY3_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR2:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[ADD]], 42 ; CHECK-NEXT: br i1 [[TMP3]], label [[COND_STORE:%.*]], label [[LATCH]] ; CHECK: cond_store: ; CHECK-NEXT: [[TMP4:%.*]] = add nsw i32 [[ADD]], 1 ; CHECK-NEXT: br label [[LATCH]] ; CHECK: latch: -; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ [[TMP4]], [[COND_STORE]] ], [ [[ADD]], [[FOR_BODY3]] ] -; CHECK-NEXT: store i32 [[STOREMERGE]], ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[TMP5]] = phi i32 [ [[TMP4]], [[COND_STORE]] ], [ [[ADD]], [[FOR_BODY3]] ] +; CHECK-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[ITR]] diff --git a/llvm/unittests/Analysis/AliasSetTrackerTest.cpp b/llvm/unittests/Analysis/AliasSetTrackerTest.cpp --- a/llvm/unittests/Analysis/AliasSetTrackerTest.cpp +++ b/llvm/unittests/Analysis/AliasSetTrackerTest.cpp @@ -83,7 +83,7 @@ for (AliasSet &AS : AST) { if (!Inst.mayReadOrWriteMemory()) continue; - if (!AS.aliasesUnknownInst(&Inst, BatchAA)) + if (!isModOrRefSet(AS.aliasesUnknownInst(&Inst, BatchAA))) continue; ASSERT_NE(FoundAS, true); FoundAS = true;