Index: lib/Transforms/Scalar/CallSiteSplitting.cpp =================================================================== --- lib/Transforms/Scalar/CallSiteSplitting.cpp +++ lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -145,15 +145,16 @@ /// Record ICmp conditions relevant to any argument in CS following Pred's /// single successors. If there are conflicting conditions along a path, like -/// x == 1 and x == 0, the first condition will be used. +/// x == 1 and x == 0, the first condition will be used. We stop once we reach +/// an edge to StopAt. static void recordConditions(CallSite CS, BasicBlock *Pred, - ConditionsTy &Conditions) { + ConditionsTy &Conditions, BasicBlock *StopAt) { recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions); BasicBlock *From = Pred; BasicBlock *To = Pred; SmallPtrSet Visited; while (!Visited.count(From->getSinglePredecessor()) && - (From = From->getSinglePredecessor())) { + (From = From->getSinglePredecessor()) && To != StopAt) { recordCondition(CS, From, To, Conditions); Visited.insert(From); To = From; @@ -293,7 +294,7 @@ static void splitCallSite( CallSite CS, const SmallVectorImpl> &Preds, - DominatorTree *DT) { + DominatorTree &DT) { Instruction *Instr = CS.getInstruction(); BasicBlock *TailBB = Instr->getParent(); bool IsMustTailCall = CS.isMustTailCall(); @@ -315,7 +316,8 @@ for (unsigned i = 0; i < Preds.size(); i++) { BasicBlock *PredBB = Preds[i].first; BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween( - TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i], DT); + TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i], + &DT); assert(SplitBlock && "Unexpected new basic block split."); Instruction *NewCI = @@ -425,7 +427,7 @@ return false; } -static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree *DT) { +static bool tryToSplitOnPHIPredicatedArgument(CallSite CS, DominatorTree &DT) { if (!isPredicatedOnPHI(CS)) return false; @@ -436,15 +438,22 @@ return true; } -static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree *DT) { +static bool tryToSplitOnPredicatedArgument(CallSite CS, DominatorTree &DT) { auto Preds = getTwoPredecessors(CS.getInstruction()->getParent()); if (Preds[0] == Preds[1]) return false; + // We can stop recording conditions once we reached the immediate dominator + // for the block containing the call site. Conditions in predecessors of the + // that node will be the same for all paths to the call site and splitting + // is not beneficial. + auto *CSDTNode = DT.getNode(CS.getInstruction()->getParent()); + BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr; + SmallVector, 2> PredsCS; for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) { ConditionsTy Conditions; - recordConditions(CS, Pred, Conditions); + recordConditions(CS, Pred, Conditions, StopAt); PredsCS.push_back({Pred, Conditions}); } @@ -458,16 +467,16 @@ return true; } -static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI, DominatorTree *DT) { - if (!CS.arg_size() || !canSplitCallSite(CS)) +static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI, + DominatorTree &DT) { + if (!CS.arg_size() || !canSplitCallSite(CS, TTI)) return false; return tryToSplitOnPredicatedArgument(CS, DT) || tryToSplitOnPHIPredicatedArgument(CS, DT); } static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI, - TargetTransformInfo &TTI, - DominatorTree *DT) { + TargetTransformInfo &TTI, DominatorTree &DT) { bool Changed = false; for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) { BasicBlock &BB = *BI++; @@ -512,6 +521,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); } @@ -522,8 +532,8 @@ auto &TLI = getAnalysis().getTLI(); auto &TTI = getAnalysis().getTTI(F); - auto *DTWP = getAnalysisIfAvailable(); - return doCallSiteSplitting(F, TLI, TTI, DTWP ? &DTWP->getDomTree() : nullptr); + auto &DT = getAnalysis().getDomTree(); + return doCallSiteSplitting(F, TLI, TTI, DT); } }; } // namespace @@ -533,6 +543,7 @@ "Call-site splitting", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting", "Call-site splitting", false, false) FunctionPass *llvm::createCallSiteSplittingPass() { @@ -543,7 +554,7 @@ FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); auto &TTI = AM.getResult(F); - auto *DT = AM.getCachedResult(F); + auto &DT = AM.getResult(F); if (!doCallSiteSplitting(F, TLI, TTI, DT)) return PreservedAnalyses::all(); Index: test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll =================================================================== --- test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll +++ test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll @@ -39,14 +39,14 @@ ;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 10) ;CHECK-LABEL: TBB.split: -;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 %p) +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 %p) ;CHECK-LABEL: Tail ;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_eq_eq_eq(i32* %a, i32 %v, i32 %p) { Header: %tobool1 = icmp eq i32* %a, null - br i1 %tobool1, label %Header2, label %End + br i1 %tobool1, label %Header2, label %TBB Header2: %tobool2 = icmp eq i32 %p, 10 @@ -123,14 +123,14 @@ ;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10) ;CHECK-LABEL: TBB.split: -;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p) ;CHECK-LABEL: Tail ;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_eq_ne(i32* %a, i32 %v, i32 %p) { Header: %tobool1 = icmp ne i32* %a, null - br i1 %tobool1, label %Header2, label %End + br i1 %tobool1, label %Header2, label %TBB Header2: %tobool2 = icmp eq i32 %p, 10 @@ -178,14 +178,14 @@ ;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) ;CHECK-LABEL: TBB.split: -;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p) ;CHECK-LABEL: Tail ;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_ne_ne_constrain_same_pointer_arg(i32* %a, i32 %v, i32 %p, i32* %a2, i32* %a3) { Header: %tobool1 = icmp ne i32* %a, null - br i1 %tobool1, label %Header2, label %End + br i1 %tobool1, label %Header2, label %TBB Header2: %tobool2 = icmp ne i32* %a, %a2 @@ -235,14 +235,14 @@ ;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10) ;CHECK-LABEL: TBB.split: -;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 %p) +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 %p) ;CHECK-LABEL: Tail ;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_eq_eq_eq_untaken(i32* %a, i32 %v, i32 %p) { Header: %tobool1 = icmp eq i32* %a, null - br i1 %tobool1, label %End, label %Header2 + br i1 %tobool1, label %TBB, label %Header2 Header2: %tobool2 = icmp eq i32 %p, 10 @@ -290,14 +290,14 @@ ;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 10) ;CHECK-LABEL: TBB.split: -;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p) +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p) ;CHECK-LABEL: Tail ;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_eq_ne_untaken(i32* %a, i32 %v, i32 %p) { Header: %tobool1 = icmp ne i32* %a, null - br i1 %tobool1, label %End, label %Header2 + br i1 %tobool1, label %TBB, label %Header2 Header2: %tobool2 = icmp eq i32 %p, 10 @@ -489,6 +489,31 @@ ret i32 %v } +;CHECK-LABEL: @test_cond_no_effect +;CHECK-NOT: Header.split: +;CHECK-NOT: TBB.split: +;CHECK-LABEL: Tail: +;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 0) +;CHECK: ret i32 %r +define i32 @test_cond_no_effect(i32* %a, i32 %v) { +Entry: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %Header, label %End + +Header: + br i1 undef, label %Tail, label %TBB + +TBB: + br i1 undef, label %Tail, label %End + +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 0) + ret i32 %r + +End: + ret i32 %v +} + ;CHECK-LABEL: @test_unreachable ;CHECK-LABEL: Header.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 10)