Skip to content

Commit 52578f9

Browse files
committedNov 9, 2018
[CallSiteSplitting] Only record conditions up to the IDom(call site).
We can stop recording conditions once we reached the immediate dominator for the block containing the call site. Conditions in predecessors of the that node will be the same for all paths to the call site and splitting is not beneficial. This patch makes CallSiteSplitting dependent on the DT anlysis. because the immediate dominators seem to be the easiest way of finding the node to stop at. I had to update some exiting tests, because they were checking for conditions that were true/false on all paths to the call site. Those should now be handled by instcombine/ipsccp. Reviewers: davide, junbuml Reviewed By: junbuml Differential Revision: https://reviews.llvm.org/D44627 llvm-svn: 346483
1 parent e6b727e commit 52578f9

File tree

4 files changed

+74
-25
lines changed

4 files changed

+74
-25
lines changed
 

‎llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp

+38-15
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,14 @@ static void recordCondition(CallSite CS, BasicBlock *From, BasicBlock *To,
149149

150150
/// Record ICmp conditions relevant to any argument in CS following Pred's
151151
/// single predecessors. If there are conflicting conditions along a path, like
152-
/// x == 1 and x == 0, the first condition will be used.
152+
/// x == 1 and x == 0, the first condition will be used. We stop once we reach
153+
/// an edge to StopAt.
153154
static void recordConditions(CallSite CS, BasicBlock *Pred,
154-
ConditionsTy &Conditions) {
155-
recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
155+
ConditionsTy &Conditions, BasicBlock *StopAt) {
156156
BasicBlock *From = Pred;
157157
BasicBlock *To = Pred;
158158
SmallPtrSet<BasicBlock *, 4> Visited;
159-
while (!Visited.count(From->getSinglePredecessor()) &&
159+
while (To != StopAt && !Visited.count(From->getSinglePredecessor()) &&
160160
(From = From->getSinglePredecessor())) {
161161
recordCondition(CS, From, To, Conditions);
162162
Visited.insert(From);
@@ -302,7 +302,7 @@ static void copyMustTailReturn(BasicBlock *SplitBB, Instruction *CI,
302302
static void splitCallSite(
303303
CallSite CS,
304304
const SmallVectorImpl<std::pair<BasicBlock *, ConditionsTy>> &Preds,
305-
DominatorTree *DT) {
305+
DominatorTree &DT) {
306306
Instruction *Instr = CS.getInstruction();
307307
BasicBlock *TailBB = Instr->getParent();
308308
bool IsMustTailCall = CS.isMustTailCall();
@@ -327,7 +327,7 @@ static void splitCallSite(
327327
BasicBlock *PredBB = Preds[i].first;
328328
BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
329329
TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i],
330-
DT);
330+
&DT);
331331
assert(SplitBlock && "Unexpected new basic block split.");
332332

333333
Instruction *NewCI =
@@ -438,7 +438,7 @@ static bool isPredicatedOnPHI(CallSite CS) {
438438
return false;
439439
}
440440

441-
static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree *DT) {
441+
static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree &DT) {
442442
if (!isPredicatedOnPHI(CS))
443443
return false;
444444

@@ -449,15 +449,25 @@ static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree *DT) {
449449
return true;
450450
}
451451

452-
static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree *DT) {
452+
static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree &DT) {
453453
auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
454454
if (Preds[0] == Preds[1])
455455
return false;
456456

457+
// We can stop recording conditions once we reached the immediate dominator
458+
// for the block containing the call site. Conditions in predecessors of the
459+
// that node will be the same for all paths to the call site and splitting
460+
// is not beneficial.
461+
auto *CSDTNode = DT.getNode(CS.getInstruction()->getParent());
462+
BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
463+
457464
SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
458465
for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
459466
ConditionsTy Conditions;
460-
recordConditions(CS, Pred, Conditions);
467+
// Record condition on edge BB(CS) <- Pred
468+
recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
469+
// Record conditions followng Pred's single predecessors.
470+
recordConditions(CS, Pred, Conditions, StopAt);
461471
PredsCS.push_back({Pred, Conditions});
462472
}
463473

@@ -466,20 +476,32 @@ static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree *DT) {
466476
}))
467477
return false;
468478

479+
// Record common conditions starting from StopAt. Those conditions hold for
480+
// all paths to CS. Adding them gives the inliner a better chance at inlining
481+
// CS.
482+
ConditionsTy CommonConditions;
483+
if (StopAt)
484+
recordConditions(CS, StopAt, CommonConditions, nullptr);
485+
if (!CommonConditions.empty())
486+
for (auto &Pred : PredsCS) {
487+
Pred.second.insert(Pred.second.end(), CommonConditions.begin(),
488+
CommonConditions.end());
489+
}
490+
469491
splitCallSite(CS, PredsCS, DT);
470492
return true;
471493
}
472494

473495
static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI,
474-
DominatorTree *DT) {
496+
DominatorTree &DT) {
475497
if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
476498
return false;
477499
return tryToSplitOnPredicatedArgument(CS, DT) ||
478500
tryToSplitOnPHIPredicatedArgument(CS, DT);
479501
}
480502

481503
static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
482-
TargetTransformInfo &TTI, DominatorTree *DT) {
504+
TargetTransformInfo &TTI, DominatorTree &DT) {
483505
bool Changed = false;
484506
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
485507
BasicBlock &BB = *BI++;
@@ -524,6 +546,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
524546
void getAnalysisUsage(AnalysisUsage &AU) const override {
525547
AU.addRequired<TargetLibraryInfoWrapperPass>();
526548
AU.addRequired<TargetTransformInfoWrapperPass>();
549+
AU.addRequired<DominatorTreeWrapperPass>();
527550
AU.addPreserved<DominatorTreeWrapperPass>();
528551
FunctionPass::getAnalysisUsage(AU);
529552
}
@@ -534,9 +557,8 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
534557

535558
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
536559
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
537-
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
538-
return doCallSiteSplitting(F, TLI, TTI,
539-
DTWP ? &DTWP->getDomTree() : nullptr);
560+
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
561+
return doCallSiteSplitting(F, TLI, TTI, DT);
540562
}
541563
};
542564
} // namespace
@@ -546,6 +568,7 @@ INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting",
546568
"Call-site splitting", false, false)
547569
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
548570
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
571+
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
549572
INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
550573
"Call-site splitting", false, false)
551574
FunctionPass *llvm::createCallSiteSplittingPass() {
@@ -556,7 +579,7 @@ PreservedAnalyses CallSiteSplittingPass::run(Function &F,
556579
FunctionAnalysisManager &AM) {
557580
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
558581
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
559-
auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
582+
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
560583

561584
if (!doCallSiteSplitting(F, TLI, TTI, DT))
562585
return PreservedAnalyses::all();

‎llvm/test/Other/new-pm-lto-defaults.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@
3838
; CHECK-O2-NEXT: Running pass: CallSiteSplittingPass on foo
3939
; CHECK-O2-NEXT: Running analysis: TargetLibraryAnalysis on foo
4040
; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo
41+
; CHECK-O2-NEXT: Running analysis: DominatorTreeAnalysis on foo
4142
; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
4243
; CHECK-O2-NEXT: PGOIndirectCallPromotion
4344
; CHECK-O2-NEXT: Running analysis: ProfileSummaryAnalysis
4445
; CHECK-O2-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
4546
; CHECK-O2-NEXT: Running pass: IPSCCPPass
46-
; CHECK-O2-DAG: Running analysis: AssumptionAnalysis on foo
47-
; CHECK-O2-DAG: Running analysis: DominatorTreeAnalysis on foo
47+
; CHECK-O2-NEXT: Running analysis: AssumptionAnalysis on foo
4848
; CHECK-O2-NEXT: Running pass: CalledValuePropagationPass
4949
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass>
5050
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC

‎llvm/test/Other/opt-O3-pipeline.ll

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
; CHECK-NEXT: Force set function attributes
2929
; CHECK-NEXT: Infer set function attributes
3030
; CHECK-NEXT: FunctionPass Manager
31+
; CHECK-NEXT: Dominator Tree Construction
3132
; CHECK-NEXT: Call-site splitting
3233
; CHECK-NEXT: Interprocedural Sparse Conditional Constant Propagation
3334
; CHECK-NEXT: Unnamed pass: implement Pass::getPassName()

‎llvm/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll

+33-8
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,14 @@ End:
123123
;CHECK-LABEL: Header2.split:
124124
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10)
125125
;CHECK-LABEL: TBB.split:
126-
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
126+
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
127127
;CHECK-LABEL: Tail
128128
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
129129
;CHECK: ret i32 %[[MERGED]]
130130
define i32 @test_ne_eq_ne(i32* %a, i32 %v, i32 %p) {
131131
Header:
132132
%tobool1 = icmp ne i32* %a, null
133-
br i1 %tobool1, label %Header2, label %End
133+
br i1 %tobool1, label %Header2, label %TBB
134134

135135
Header2:
136136
%tobool2 = icmp eq i32 %p, 10
@@ -178,14 +178,14 @@ End:
178178
;CHECK-LABEL: Header2.split:
179179
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
180180
;CHECK-LABEL: TBB.split:
181-
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
181+
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
182182
;CHECK-LABEL: Tail
183183
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
184184
;CHECK: ret i32 %[[MERGED]]
185185
define i32 @test_ne_ne_ne_constrain_same_pointer_arg(i32* %a, i32 %v, i32 %p, i32* %a2, i32* %a3) {
186186
Header:
187187
%tobool1 = icmp ne i32* %a, null
188-
br i1 %tobool1, label %Header2, label %End
188+
br i1 %tobool1, label %Header2, label %TBB
189189

190190
Header2:
191191
%tobool2 = icmp ne i32* %a, %a2
@@ -235,14 +235,14 @@ End:
235235
;CHECK-LABEL: Header2.split:
236236
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10)
237237
;CHECK-LABEL: TBB.split:
238-
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 %p)
238+
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 %p)
239239
;CHECK-LABEL: Tail
240240
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
241241
;CHECK: ret i32 %[[MERGED]]
242242
define i32 @test_eq_eq_eq_untaken(i32* %a, i32 %v, i32 %p) {
243243
Header:
244244
%tobool1 = icmp eq i32* %a, null
245-
br i1 %tobool1, label %End, label %Header2
245+
br i1 %tobool1, label %TBB, label %Header2
246246

247247
Header2:
248248
%tobool2 = icmp eq i32 %p, 10
@@ -290,14 +290,14 @@ End:
290290
;CHECK-LABEL: Header2.split:
291291
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 10)
292292
;CHECK-LABEL: TBB.split:
293-
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p)
293+
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
294294
;CHECK-LABEL: Tail
295295
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
296296
;CHECK: ret i32 %[[MERGED]]
297297
define i32 @test_ne_eq_ne_untaken(i32* %a, i32 %v, i32 %p) {
298298
Header:
299299
%tobool1 = icmp ne i32* %a, null
300-
br i1 %tobool1, label %End, label %Header2
300+
br i1 %tobool1, label %TBB, label %Header2
301301

302302
Header2:
303303
%tobool2 = icmp eq i32 %p, 10
@@ -489,6 +489,31 @@ End:
489489
ret i32 %v
490490
}
491491

492+
;CHECK-LABEL: @test_cond_no_effect
493+
;CHECK-NOT: Header.split:
494+
;CHECK-NOT: TBB.split:
495+
;CHECK-LABEL: Tail:
496+
;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 0)
497+
;CHECK: ret i32 %r
498+
define i32 @test_cond_no_effect(i32* %a, i32 %v) {
499+
Entry:
500+
%tobool1 = icmp eq i32* %a, null
501+
br i1 %tobool1, label %Header, label %End
502+
503+
Header:
504+
br i1 undef, label %Tail, label %TBB
505+
506+
TBB:
507+
br i1 undef, label %Tail, label %End
508+
509+
Tail:
510+
%r = call i32 @callee(i32* %a, i32 %v, i32 0)
511+
ret i32 %r
512+
513+
End:
514+
ret i32 %v
515+
}
516+
492517
;CHECK-LABEL: @test_unreachable
493518
;CHECK-LABEL: Header.split:
494519
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 10)

0 commit comments

Comments
 (0)
Please sign in to comment.