diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -1214,6 +1214,8 @@ BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); } + bool performedPhiTranslation() const { return PerformedPhiTranslation; } + private: void fillInCurrentPair() { CurrentPair.first = *DefIterator; @@ -1226,6 +1228,7 @@ false)) { if (Translator.getAddr() != Location.Ptr) { CurrentPair.second = Location.getWithNewPtr(Translator.getAddr()); + PerformedPhiTranslation = true; return; } } else { @@ -1242,6 +1245,7 @@ MemoryAccess *OriginalAccess = nullptr; bool WalkingPhi = false; DominatorTree *DT = nullptr; + bool PerformedPhiTranslation = false; }; inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair, diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -522,6 +522,7 @@ // Phi optimization bookkeeping SmallVector Paths; DenseSet VisitedPhis; + bool PerformedPhiTranslation = false; /// Find the nearest def or phi that `From` can legally be optimized to. const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { @@ -596,12 +597,13 @@ void addSearches(MemoryPhi *Phi, SmallVectorImpl &PausedSearches, ListIndex PriorNode) { - auto UpwardDefs = make_range( - upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT), upward_defs_end()); + auto UpwardDefsBegin = upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT); + auto UpwardDefs = make_range(UpwardDefsBegin, upward_defs_end()); for (const MemoryAccessPair &P : UpwardDefs) { PausedSearches.push_back(Paths.size()); Paths.emplace_back(P.second, P.first, PriorNode); } + PerformedPhiTranslation |= UpwardDefsBegin.performedPhiTranslation(); } /// Represents a search that terminated after finding a clobber. This clobber @@ -651,8 +653,16 @@ // - We still cache things for A, so C only needs to walk up a bit. // If this behavior becomes problematic, we can fix without a ton of extra // work. - if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) + if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) { + if (PerformedPhiTranslation) { + // If visiting this path performed Phi translation, don't continue, + // since it may not be correct to merge results from two paths if one + // relies on the phi translation. + TerminatedPath Term{Node.Last, PathIndex}; + return Term; + } continue; + } const MemoryAccess *SkipStopWhere = nullptr; if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) { @@ -765,7 +775,7 @@ /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, const MemoryLocation &Loc) { - assert(Paths.empty() && VisitedPhis.empty() && + assert(Paths.empty() && VisitedPhis.empty() && !PerformedPhiTranslation && "Reset the optimization state."); Paths.emplace_back(Loc, Start, Phi, None); @@ -921,6 +931,7 @@ void resetPhiOptznState() { Paths.clear(); VisitedPhis.clear(); + PerformedPhiTranslation = false; } public: diff --git a/llvm/test/Analysis/MemorySSA/phi-translation.ll b/llvm/test/Analysis/MemorySSA/phi-translation.ll --- a/llvm/test/Analysis/MemorySSA/phi-translation.ll +++ b/llvm/test/Analysis/MemorySSA/phi-translation.ll @@ -287,3 +287,85 @@ ret void } + + +@c = local_unnamed_addr global [2 x i16] zeroinitializer, align 2 + +define i32 @dont_merge_noalias_simple(i32* noalias %ptr) { +; CHECK-LABEL: define i32 @dont_merge_noalias_simple +; CHECK-LABEL: entry: +; CHECK: ; 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i16 1, i16* %s1.ptr, align 2 + +; CHECK-LABEL: %for.body +; CHECK: ; MemoryUse(4) MayAlias +; CHECK-NEXT: %lv = load i16, i16* %arrayidx, align 2 + +entry: + %s1.ptr = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 0 + store i16 1, i16* %s1.ptr, align 2 + br label %for.body + +for.body: ; preds = %for.body, %entry + %storemerge2 = phi i32 [ 1, %entry ], [ %dec, %for.body ] + %idxprom1 = zext i32 %storemerge2 to i64 + %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 %idxprom1 + %lv = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %lv to i32 + store i32 %conv, i32* %ptr, align 4 + %dec = add nsw i32 %storemerge2, -1 + %cmp = icmp sgt i32 %storemerge2, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %s2.ptr = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 0 + store i16 0, i16* %s2.ptr, align 2 + ret i32 0 +} + + +define i32 @dont_merge_noalias_complex(i32* noalias %ptr, i32* noalias %another) { +; CHECK-LABEL: define i32 @dont_merge_noalias_complex +; CHECK-LABEL: entry: +; CHECK: ; 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i16 1, i16* %s1.ptr, align 2 + +; CHECK-LABEL: %for.body +; CHECK: ; MemoryUse(7) MayAlias +; CHECK-NEXT: %lv = load i16, i16* %arrayidx, align 2 + +entry: + %s1.ptr = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 0 + store i16 1, i16* %s1.ptr, align 2 + br label %for.body + +for.body: ; preds = %for.body, %entry + %storemerge2 = phi i32 [ 1, %entry ], [ %dec, %merge.body ] + %idxprom1 = zext i32 %storemerge2 to i64 + %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 %idxprom1 + %lv = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %lv to i32 + store i32 %conv, i32* %ptr, align 4 + %dec = add nsw i32 %storemerge2, -1 + + %cmpif = icmp sgt i32 %storemerge2, 1 + br i1 %cmpif, label %if.body, label %else.body + +if.body: + store i32 %conv, i32* %another, align 4 + br label %merge.body + +else.body: + store i32 %conv, i32* %another, align 4 + br label %merge.body + +merge.body: + %cmp = icmp sgt i32 %storemerge2, 0 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %s2.ptr = getelementptr inbounds [2 x i16], [2 x i16]* @c, i64 0, i64 0 + store i16 0, i16* %s2.ptr, align 2 + ret i32 0 +} +