Skip to content

Commit faa857d

Browse files
committedOct 21, 2016
[SCEV] Memoize visitMulExpr results in SCEVRewriteVisitor.
Summary: When SCEVRewriteVisitor traverses the SCEV DAG, it may visit the same SCEV multiple times if this SCEV is referenced by multiple other SCEVs. This has exponential time complexity in the worst case. Memoizing the results will avoid re-visiting the same SCEV. Add a map to save the results, and override the visit function of SCEVVisitor. Now SCEVRewriteVisitor only visit each SCEV once and thus returns the same result for the same input SCEV. This patch fixes PR18606, PR18607. Reviewers: Sanjoy Das, Mehdi Amini, Michael Zolotukhin Differential Revision: https://reviews.llvm.org/D25810 llvm-svn: 284868
1 parent 2f9d8d0 commit faa857d

File tree

2 files changed

+87
-1
lines changed

2 files changed

+87
-1
lines changed
 

‎llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h

+20-1
Original file line numberDiff line numberDiff line change
@@ -537,14 +537,33 @@ namespace llvm {
537537
T.visitAll(Root);
538538
}
539539

540-
/// Recursively visits a SCEV expression and re-writes it.
540+
/// This visitor recursively visits a SCEV expression and re-writes it.
541+
/// The result from each visit is cached, so it will return the same
542+
/// SCEV for the same input.
541543
template<typename SC>
542544
class SCEVRewriteVisitor : public SCEVVisitor<SC, const SCEV *> {
543545
protected:
544546
ScalarEvolution &SE;
547+
// Memoize the result of each visit so that we only compute once for
548+
// the same input SCEV. This is to avoid redundant computations when
549+
// a SCEV is referenced by multiple SCEVs. Without memoization, this
550+
// visit algorithm would have exponential time complexity in the worst
551+
// case, causing the compiler to hang on certain tests.
552+
DenseMap<const SCEV *, const SCEV *> RewriteResults;
553+
545554
public:
546555
SCEVRewriteVisitor(ScalarEvolution &SE) : SE(SE) {}
547556

557+
const SCEV *visit(const SCEV *S) {
558+
auto It = RewriteResults.find(S);
559+
if (It != RewriteResults.end())
560+
return It->second;
561+
auto *Result = SCEVVisitor<SC, const SCEV *>::visit(S);
562+
assert(RewriteResults.insert({S, Result}).second &&
563+
"Should insert a new entry");
564+
return Result;
565+
}
566+
548567
const SCEV *visitConstant(const SCEVConstant *Constant) {
549568
return Constant;
550569
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; RUN: opt -S -indvars < %s | FileCheck %s
2+
3+
; CHECK: @main
4+
; CHECK: %mul.lcssa5 = phi i32 [ %a.promoted4, %entry ], [ %mul.30, %for.body3 ]
5+
; CEHCK: %mul = mul nsw i32 %mul.lcssa5, %mul.lcssa5
6+
; CHECK: %mul.30 = mul nsw i32 %mul.29, %mul.29
7+
8+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
9+
target triple = "x86_64-unknown-linux-gnu"
10+
11+
@a = local_unnamed_addr global i32 0, align 4
12+
@b = local_unnamed_addr global i32 0, align 4
13+
14+
; Function Attrs: norecurse nounwind uwtable
15+
define i32 @main() local_unnamed_addr {
16+
entry:
17+
%a.promoted4 = load i32, i32* @a, align 4
18+
br label %for.cond1.preheader
19+
20+
for.cond1.preheader: ; preds = %entry, %for.body3
21+
%mul.lcssa5 = phi i32 [ %a.promoted4, %entry ], [ %mul.30, %for.body3 ]
22+
%i.03 = phi i32 [ 0, %entry ], [ %inc5, %for.body3 ]
23+
br label %for.body3
24+
25+
for.body3: ; preds = %for.cond1.preheader
26+
%mul = mul nsw i32 %mul.lcssa5, %mul.lcssa5
27+
%mul.1 = mul nsw i32 %mul, %mul
28+
%mul.2 = mul nsw i32 %mul.1, %mul.1
29+
%mul.3 = mul nsw i32 %mul.2, %mul.2
30+
%mul.4 = mul nsw i32 %mul.3, %mul.3
31+
%mul.5 = mul nsw i32 %mul.4, %mul.4
32+
%mul.6 = mul nsw i32 %mul.5, %mul.5
33+
%mul.7 = mul nsw i32 %mul.6, %mul.6
34+
%mul.8 = mul nsw i32 %mul.7, %mul.7
35+
%mul.9 = mul nsw i32 %mul.8, %mul.8
36+
%mul.10 = mul nsw i32 %mul.9, %mul.9
37+
%mul.11 = mul nsw i32 %mul.10, %mul.10
38+
%mul.12 = mul nsw i32 %mul.11, %mul.11
39+
%mul.13 = mul nsw i32 %mul.12, %mul.12
40+
%mul.14 = mul nsw i32 %mul.13, %mul.13
41+
%mul.15 = mul nsw i32 %mul.14, %mul.14
42+
%mul.16 = mul nsw i32 %mul.15, %mul.15
43+
%mul.17 = mul nsw i32 %mul.16, %mul.16
44+
%mul.18 = mul nsw i32 %mul.17, %mul.17
45+
%mul.19 = mul nsw i32 %mul.18, %mul.18
46+
%mul.20 = mul nsw i32 %mul.19, %mul.19
47+
%mul.21 = mul nsw i32 %mul.20, %mul.20
48+
%mul.22 = mul nsw i32 %mul.21, %mul.21
49+
%mul.23 = mul nsw i32 %mul.22, %mul.22
50+
%mul.24 = mul nsw i32 %mul.23, %mul.23
51+
%mul.25 = mul nsw i32 %mul.24, %mul.24
52+
%mul.26 = mul nsw i32 %mul.25, %mul.25
53+
%mul.27 = mul nsw i32 %mul.26, %mul.26
54+
%mul.28 = mul nsw i32 %mul.27, %mul.27
55+
%mul.29 = mul nsw i32 %mul.28, %mul.28
56+
%mul.30 = mul nsw i32 %mul.29, %mul.29
57+
%inc5 = add nuw nsw i32 %i.03, 1
58+
%exitcond = icmp ne i32 %inc5, 10
59+
br i1 %exitcond, label %for.cond1.preheader, label %for.end6
60+
61+
for.end6: ; preds = %for.body3
62+
%mul.lcssa.lcssa = phi i32 [ %mul.30, %for.body3 ]
63+
%inc.lcssa.lcssa = phi i32 [ 31, %for.body3 ]
64+
store i32 %mul.lcssa.lcssa, i32* @a, align 4
65+
store i32 %inc.lcssa.lcssa, i32* @b, align 4
66+
ret i32 0
67+
}

0 commit comments

Comments
 (0)
Please sign in to comment.