Index: llvm/trunk/lib/Transforms/Scalar/CallSiteSplitting.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ llvm/trunk/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -59,11 +59,13 @@ #include "llvm/Transforms/Scalar/CallSiteSplitting.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -73,6 +75,15 @@ STATISTIC(NumCallSiteSplit, "Number of call-site split"); +/// Only allow instructions before a call, if their CodeSize cost is below +/// DuplicationThreshold. Those instructions need to be duplicated in all +/// split blocks. +static cl::opt + DuplicationThreshold("callsite-splitting-duplication-threshold", cl::Hidden, + cl::desc("Only allow instructions before a call, if " + "their cost is below DuplicationThreshold"), + cl::init(5)); + static void addNonNullAttribute(CallSite CS, Value *Op) { unsigned ArgNo = 0; for (auto &I : CS.args()) { @@ -168,20 +179,26 @@ return Preds; } -static bool canSplitCallSite(CallSite CS) { +static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) { // FIXME: As of now we handle only CallInst. InvokeInst could be handled // without too much effort. Instruction *Instr = CS.getInstruction(); if (!isa(Instr)) return false; - // Allow splitting a call-site only when there is no instruction before the - // call-site in the basic block. Based on this constraint, we only clone the - // call instruction, and we do not move a call-site across any other - // instruction. BasicBlock *CallSiteBB = Instr->getParent(); - if (Instr != CallSiteBB->getFirstNonPHIOrDbg()) - return false; + // Allow splitting a call-site only when the CodeSize cost of the + // instructions before the call is less then DuplicationThreshold. The + // instructions before the call will be duplicated in the split blocks and + // corresponding uses will be updated. + unsigned Cost = 0; + for (auto &InstBeforeCall : + llvm::make_range(CallSiteBB->begin(), Instr->getIterator())) { + Cost += TTI.getInstructionCost(&InstBeforeCall, + TargetTransformInfo::TCK_CodeSize); + if (Cost >= DuplicationThreshold) + return false; + } // Need 2 predecessors and cannot split an edge from an IndirectBrInst. SmallVector Preds(predecessors(CallSiteBB)); @@ -246,16 +263,22 @@ CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call"); DEBUG(dbgs() << "split call-site : " << *Instr << " into \n"); - for (const auto &P : Preds) { - BasicBlock *PredBB = P.first; - BasicBlock *SplitBlock = - SplitBlockPredecessors(TailBB, PredBB, ".predBB.split"); + + assert(Preds.size() == 2 && "The ValueToValueMaps array has size 2."); + // ValueToValueMapTy is neither copy nor moveable, so we use a simple array + // here. + ValueToValueMapTy ValueToValueMaps[2]; + for (unsigned i = 0; i < Preds.size(); i++) { + new (&ValueToValueMaps[i]) ValueToValueMapTy; + BasicBlock *PredBB = Preds[i].first; + BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween( + TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i]); assert(SplitBlock && "Unexpected new basic block split."); - Instruction *NewCI = Instr->clone(); + Instruction *NewCI = + &*std::prev(SplitBlock->getTerminator()->getIterator()); CallSite NewCS(NewCI); - addConditions(NewCS, P.second); - NewCI->insertBefore(&*SplitBlock->getFirstInsertionPt()); + addConditions(NewCS, Preds[i].second); // Handle PHIs used as arguments in the call-site. for (PHINode &PN : TailBB->phis()) { @@ -273,13 +296,40 @@ CallPN->addIncoming(NewCI, SplitBlock); } + auto OriginalBegin = TailBB->begin(); // Replace users of the original call with a PHI mering call-sites split. if (CallPN) { - CallPN->insertBefore(TailBB->getFirstNonPHI()); + CallPN->insertBefore(&*OriginalBegin); Instr->replaceAllUsesWith(CallPN); } - Instr->eraseFromParent(); + // Remove instructions moved to split blocks from TailBB, from the duplicated + // call instruction to the beginning of the basic block. If an instruction + // has any uses, add a new PHI node to combine the values coming from the + // split blocks. The new PHI nodes are placed before the first original + // instruction, so we do not end up deleting them. By using reverse-order, we + // do not introduce unnecessary PHI nodes for def-use chains from the call + // instruction to the beginning of the block. + auto I = Instr->getReverseIterator(); + while (I != TailBB->rend()) { + Instruction *CurrentI = &*I++; + if (!CurrentI->use_empty()) { + // If an existing PHI has users after the call, there is no need to create + // a new one. + if (isa(CurrentI)) + continue; + PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size()); + for (auto &Mapping : ValueToValueMaps) + NewPN->addIncoming(Mapping[CurrentI], + cast(Mapping[CurrentI])->getParent()); + NewPN->insertBefore(&*TailBB->begin()); + CurrentI->replaceAllUsesWith(NewPN); + } + CurrentI->eraseFromParent(); + // We are done once we handled the first original instruction in TailBB. + if (CurrentI == &*OriginalBegin) + break; + } NumCallSiteSplit++; } @@ -344,14 +394,15 @@ return true; } -static bool tryToSplitCallSite(CallSite CS) { - if (!CS.arg_size() || !canSplitCallSite(CS)) +static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI) { + if (!CS.arg_size() || !canSplitCallSite(CS, TTI)) return false; return tryToSplitOnPredicatedArgument(CS) || tryToSplitOnPHIPredicatedArgument(CS); } -static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) { +static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI, + TargetTransformInfo &TTI) { bool Changed = false; for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) { BasicBlock &BB = *BI++; @@ -364,7 +415,7 @@ Function *Callee = CS.getCalledFunction(); if (!Callee || Callee->isDeclaration()) continue; - Changed |= tryToSplitCallSite(CS); + Changed |= tryToSplitCallSite(CS, TTI); } } return Changed; @@ -379,6 +430,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); FunctionPass::getAnalysisUsage(AU); } @@ -387,7 +439,8 @@ return false; auto &TLI = getAnalysis().getTLI(); - return doCallSiteSplitting(F, TLI); + auto &TTI = getAnalysis().getTTI(F); + return doCallSiteSplitting(F, TLI, TTI); } }; } // namespace @@ -396,6 +449,7 @@ INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting", "Call-site splitting", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting", "Call-site splitting", false, false) FunctionPass *llvm::createCallSiteSplittingPass() { @@ -405,8 +459,9 @@ PreservedAnalyses CallSiteSplittingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult(F); + auto &TTI = AM.getResult(F); - if (!doCallSiteSplitting(F, TLI)) + if (!doCallSiteSplitting(F, TLI, TTI)) return PreservedAnalyses::all(); PreservedAnalyses PA; return PA; Index: llvm/trunk/test/Transforms/CallSiteSplitting/callsite-instructions-before-call.ll =================================================================== --- llvm/trunk/test/Transforms/CallSiteSplitting/callsite-instructions-before-call.ll +++ llvm/trunk/test/Transforms/CallSiteSplitting/callsite-instructions-before-call.ll @@ -0,0 +1,253 @@ +; RUN: opt -S -callsite-splitting < %s | FileCheck --check-prefix=CHECK %s +; RUN: opt -S -callsite-splitting -callsite-splitting-duplication-threshold=0 < %s | FileCheck --check-prefix=NODUP %s + +; Instructions before a call that will be pushed to its predecessors +; with uses after the callsite, must be patched up as PHI nodes in +; the join block. +define i32* @test_split_branch_phi(i32* %ptrarg, i32 %i) { +Header: + %tobool = icmp ne i32* %ptrarg, null + br i1 %tobool, label %TBB, label %CallSite + +TBB: ; preds = %Header + %arrayidx = getelementptr inbounds i32, i32* %ptrarg, i64 42 + %0 = load i32, i32* %arrayidx, align 4 + %tobool1 = icmp ne i32 %0, 0 + br i1 %tobool1, label %CallSite, label %End + +CallSite: ; preds = %TBB, %Header + %somepointer = getelementptr i32, i32* %ptrarg, i64 18 + call void @bar(i32* %ptrarg, i32 %i) + br label %End + +End: ; preds = %CallSite, %TBB + %somepointerphi = phi i32* [ %somepointer, %CallSite ], [ null, %TBB ] + ret i32* %somepointerphi +} +; NODUP-LABEL: test_split_branch_phi +; NODUP-NOT: split +; CHECK-LABEL: Header.split +; CHECK: %[[V1:somepointer[0-9]+]] = getelementptr i32, i32* %ptrarg, i64 18 +; CHECK: call void @bar(i32* null, i32 %i) +; CHECK: br label %CallSite +; CHECK-LABEL: TBB.split: +; CHECK: %[[V2:somepointer[0-9]+]] = getelementptr i32, i32* %ptrarg, i64 18 +; CHECK: call void @bar(i32* nonnull %ptrarg, i32 %i) +; CHECK: br label %CallSite +; CHECK: CallSite: +; CHECK: phi i32* [ %[[V1]], %Header.split ], [ %[[V2]], %TBB.split ] + + +define void @split_branch_no_extra_phi(i32* %ptrarg, i32 %i) { +Header: + %tobool = icmp ne i32* %ptrarg, null + br i1 %tobool, label %TBB, label %CallSite + +TBB: ; preds = %Header + %arrayidx = getelementptr inbounds i32, i32* %ptrarg, i64 42 + %0 = load i32, i32* %arrayidx, align 4 + %tobool1 = icmp ne i32 %0, 0 + br i1 %tobool1, label %CallSite, label %End + +CallSite: ; preds = %TBB, %Header + %i.add = add i32 %i, 99 + call void @bar(i32* %ptrarg, i32 %i.add) + br label %End + +End: ; preds = %CallSite, %TBB + ret void +} +; NODUP-LABEL: split_branch_no_extra_phi +; NODUP-NOT: split +; CHECK-LABEL: split_branch_no_extra_phi +; CHECK-LABEL: Header.split +; CHECK: %[[V1:.+]] = add i32 %i, 99 +; CHECK: call void @bar(i32* null, i32 %[[V1]]) +; CHECK: br label %CallSite +; CHECK-LABEL: TBB.split: +; CHECK: %[[V2:.+]] = add i32 %i, 99 +; CHECK: call void @bar(i32* nonnull %ptrarg, i32 %[[V2]]) +; CHECK: br label %CallSite +; CHECK: CallSite: +; CHECK-NOT: phi + + +; In this test case, the codesize cost of the instructions before the call to +; bar() is equal to the default DuplicationThreshold of 5, because calls are +; more expensive. +define void @test_no_split_threshold(i32* %ptrarg, i32 %i) { +Header: + %tobool = icmp ne i32* %ptrarg, null + br i1 %tobool, label %TBB, label %CallSite + +TBB: ; preds = %Header + %arrayidx = getelementptr inbounds i32, i32* %ptrarg, i64 42 + %0 = load i32, i32* %arrayidx, align 4 + %tobool1 = icmp ne i32 %0, 0 + br i1 %tobool1, label %CallSite, label %End + +CallSite: ; preds = %TBB, %Header + %i2 = add i32 %i, 10 + call void @bari(i32 %i2) + call void @bari(i32 %i2) + call void @bar(i32* %ptrarg, i32 %i2) + br label %End + +End: ; preds = %CallSite, %TBB + ret void +} +; NODUP-LABEL: test_no_split_threshold +; NODUP-NOT: split +; CHECK-LABEL: test_no_split_threshold +; CHECK-NOT: split +; CHECK-LABEL: CallSite: +; CHECK: call void @bar(i32* %ptrarg, i32 %i2) + +; In this test case, the phi node %l in CallSite should be removed, as after +; moving the call to the split blocks we can use the values directly. +define void @test_remove_unused_phi(i32* %ptrarg, i32 %i) { +Header: + %l1 = load i32, i32* undef, align 16 + %tobool = icmp ne i32* %ptrarg, null + br i1 %tobool, label %TBB, label %CallSite + +TBB: ; preds = %Header + %arrayidx = getelementptr inbounds i32, i32* %ptrarg, i64 42 + %0 = load i32, i32* %arrayidx, align 4 + %l2 = load i32, i32* undef, align 16 + %tobool1 = icmp ne i32 %0, 0 + br i1 %tobool1, label %CallSite, label %End + +CallSite: ; preds = %TBB, %Header + %l = phi i32 [ %l1, %Header ], [ %l2, %TBB ] + call void @bar(i32* %ptrarg, i32 %l) + br label %End + +End: ; preds = %CallSite, %TBB + ret void +} +; NODUP-LABEL: test_remove_unused_phi +; NODUP-NOT: split +; CHECK-LABEL: test_remove_unused_phi +; CHECK-LABEL: Header.split +; CHECK: call void @bar(i32* null, i32 %l1) +; CHECK: br label %CallSite +; CHECK-LABEL: TBB.split: +; CHECK: call void @bar(i32* nonnull %ptrarg, i32 %l2) +; CHECK: br label %CallSite +; CHECK-LABEL: CallSite: +; CHECK-NOT: phi + +; In this test case, we need to insert a new PHI node in TailBB to combine +; the loads we moved to the predecessors. +define void @test_add_new_phi(i32* %ptrarg, i32 %i) { +Header: + %tobool = icmp ne i32* %ptrarg, null + br i1 %tobool, label %TBB, label %CallSite + +TBB: + br i1 undef, label %CallSite, label %End + +CallSite: + %arrayidx112 = getelementptr inbounds i32, i32* undef, i64 1 + %0 = load i32, i32* %arrayidx112, align 4 + call void @bar(i32* %ptrarg, i32 %i) + %sub = sub nsw i32 %0, undef + br label %End + +End: ; preds = %CallSite, %TBB + ret void +} +; NODUP-LABEL: test_add_new_phi +; NODUP-NOT: split +; CHECK-LABEL: test_add_new_phi +; CHECK-LABEL: Header.split +; CHECK: %[[V1:.+]] = load i32, i32* +; CHECK: call void @bar(i32* null, i32 %i) +; CHECK: br label %CallSite +; CHECK-LABEL: TBB.split: +; CHECK: %[[V2:.+]] = load i32, i32* +; CHECK: call void @bar(i32* nonnull %ptrarg, i32 %i) +; CHECK: br label %CallSite +; CHECK-LABEL: CallSite: +; CHECK-NEXT: %[[V3:.+]] = phi i32 [ %[[V1]], %Header.split ], [ %[[V2]], %TBB.split ] +; CHECK: %sub = sub nsw i32 %[[V3]], undef + +define i32 @test_firstnophi(i32* %a, i32 %v) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %Tail, label %TBB + +TBB: + %cmp = icmp eq i32 %v, 1 + br i1 %cmp, label %Tail, label %End + +Tail: + %p = phi i32[1,%Header], [2, %TBB] + store i32 %v, i32* %a + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} +; NODUP-LABEL: @test_firstnophi +; NODUP-NOT: split: +; CHECK-LABEL: @test_firstnophi +; CHECK-LABEL: Header.split: +; CHECK-NEXT: store i32 %v, i32* %a +; CHECK-NEXT: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 1) +; CHECK-NEXT: br label %Tail +; CHECK-LABEL: TBB.split: +; CHECK-NEXT: store i32 %v, i32* %a +; CHECK-NEXT: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 2) +; CHECK-NEXT br label %Tail +; CHECK-LABEL: Tail: +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] +; CHECK: ret i32 %[[MERGED]] +define i32 @callee(i32* %a, i32 %v, i32 %p) { + ret i32 0 +} + +define void @test_no_remove_used_phi(i32* %ptrarg, i32 %i) { +Header: + %l1 = load i32, i32* undef, align 16 + %tobool = icmp ne i32* %ptrarg, null + br i1 %tobool, label %TBB, label %CallSite + +TBB: ; preds = %Header + %arrayidx = getelementptr inbounds i32, i32* %ptrarg, i64 42 + %0 = load i32, i32* %arrayidx, align 4 + %l2 = load i32, i32* undef, align 16 + %tobool1 = icmp ne i32 %0, 0 + br i1 %tobool1, label %CallSite, label %End + +CallSite: ; preds = %TBB, %Header + %l = phi i32 [ %l1, %Header ], [ %l2, %TBB ] + call void @bar(i32* %ptrarg, i32 %l) + call void @bari(i32 %l) + br label %End + +End: ; preds = %CallSite, %TBB + ret void +} +; NODUP-LABEL: @test_no_remove_used_phi +; NODUP-NOT: split +; CHECK-LABEL: @test_no_remove_used_phi +; CHECK-LABEL: Header.split: +; CHECK: call void @bar(i32* null, i32 %l1) +; CHECK-NEXT: br label %CallSite +; CHECK-LABEL: TBB.split: +; CHECK: call void @bar(i32* nonnull %ptrarg, i32 %l2) +; CHECK-NEXT br label %CallSite +; CHECK-LABEL: CallSite: +; CHECK-NEXT: %l = phi i32 [ %l1, %Header.split ], [ %l2, %TBB.split ] +; CHECK: call void @bari(i32 %l) + +define void @bar(i32*, i32) { + ret void +} + +define void @bari(i32) { + ret void +} Index: llvm/trunk/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll =================================================================== --- llvm/trunk/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll +++ llvm/trunk/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll @@ -3,15 +3,15 @@ ; CHECK-LABEL: @test_simple ; CHECK-LABEL: Header: -; CHECK-NEXT: br i1 undef, label %Tail.predBB.split -; CHECK-LABEL: TBB: -; CHECK: br i1 %cmp, label %Tail.predBB.split1 -; CHECK-LABEL: Tail.predBB.split: +; CHECK-NEXT: br i1 undef, label %Header.split +; CHECK-LABEL: Header.split: ; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p) -; CHECK-LABEL: Tail.predBB.split1: +; CHECK-LABEL: TBB: +; CHECK: br i1 %cmp, label %TBB.split +; CHECK-LABEL: TBB.split: ; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p) ; CHECK-LABEL: Tail -; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ; CHECK: ret i32 %[[MERGED]] define i32 @test_simple(i32* %a, i32 %v, i32 %p) { Header: @@ -31,15 +31,15 @@ ; CHECK-LABEL: @test_eq_eq_eq_untaken ; CHECK-LABEL: Header: -; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB.split -; CHECK-LABEL: TBB2: -; CHECK: br i1 %cmp2, label %Tail.predBB.split1, label %End -; CHECK-LABEL: Tail.predBB.split: +; CHECK: br i1 %tobool1, label %TBB1, label %Header.split +; CHECK-LABEL: Header.split: ; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) -; CHECK-LABEL: Tail.predBB.split1: +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %TBB2.split, label %End +; CHECK-LABEL: TBB2.split: ; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 99) ; CHECK-LABEL: Tail -; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB2.split ] ; CHECK: ret i32 %[[MERGED]] define i32 @test_eq_eq_eq_untaken2(i32* %a, i32 %v, i32 %p) { Header: @@ -64,15 +64,15 @@ ; CHECK-LABEL: @test_eq_ne_eq_untaken ; CHECK-LABEL: Header: -; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB.split -; CHECK-LABEL: TBB2: -; CHECK: br i1 %cmp2, label %Tail.predBB.split1, label %End -; CHECK-LABEL: Tail.predBB.split: +; CHECK: br i1 %tobool1, label %TBB1, label %Header.split +; CHECK-LABEL: Header.split: ; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) -; CHECK-LABEL: Tail.predBB.split1: +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %TBB2.split, label %End +; CHECK-LABEL: TBB2.split: ; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 99) ; CHECK-LABEL: Tail -; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB2.split ] ; CHECK: ret i32 %[[MERGED]] define i32 @test_eq_ne_eq_untaken(i32* %a, i32 %v, i32 %p) { Header: @@ -97,17 +97,17 @@ ; CHECK-LABEL: @test_header_header2_tbb ; CHECK: Header2: -; CHECK:br i1 %tobool2, label %Tail.predBB.split, label %TBB1 -; CHECK-LABEL: TBB2: -; CHECK: br i1 %cmp2, label %Tail.predBB.split1, label %End -; CHECK-LABEL: Tail.predBB.split: +; CHECK:br i1 %tobool2, label %Header2.split, label %TBB1 +; CHECK-LABEL: Header2.split: ; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10) -; CHECK-LABEL: Tail.predBB.split1: +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %TBB2.split, label %End +; CHECK-LABEL: TBB2.split: ; NOTE: CallSiteSplitting cannot infer that %a is null here, as it currently ; only supports recording conditions along a single predecessor path. ; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 99) ; CHECK-LABEL: Tail -; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB2.split ] ; CHECK: ret i32 %[[MERGED]] define i32 @test_header_header2_tbb(i32* %a, i32 %v, i32 %p) { Header: Index: llvm/trunk/test/Transforms/CallSiteSplitting/callsite-split-debug.ll =================================================================== --- llvm/trunk/test/Transforms/CallSiteSplitting/callsite-split-debug.ll +++ llvm/trunk/test/Transforms/CallSiteSplitting/callsite-split-debug.ll @@ -48,10 +48,9 @@ ; CallSiteBB. ; CHECK-LABEL: @foo -; CHECK-LABEL: CallsiteBB.predBB.split: -; CHECK: [[TMP1:%[0-9]+]] = call i16 @bar(i16 1, i16 5) -; CHECK-LABEL: CallsiteBB.predBB.split1: -; CHECK: [[TMP2:%[0-9]+]] = call i16 @bar(i16 0, i16 5) +; CHECK-LABEL: bb1.split: +; CHECK: [[TMP1:%[0-9]+]] = call i16 @bar(i16 0, i16 5) +; CHECK-LABEL: bb2.split: +; CHECK: [[TMP2:%[0-9]+]] = call i16 @bar(i16 1, i16 5) ; CHECK-LABEL: CallsiteBB -; CHECK: %phi.call = phi i16 [ [[TMP1]], %CallsiteBB.predBB.split ], [ [[TMP2]], %CallsiteBB.predBB.split1 - +; CHECK: %phi.call = phi i16 [ [[TMP2]], %bb2.split ], [ [[TMP1]], %bb1.split Index: llvm/trunk/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll =================================================================== --- llvm/trunk/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll +++ llvm/trunk/test/Transforms/CallSiteSplitting/callsite-split-or-phi.ll @@ -5,13 +5,17 @@ target triple = "aarch64-linaro-linux-gnueabi" ;CHECK-LABEL: @test_eq_eq -;CHECK-LABEL: Tail.predBB.split: + +;CHECK-LABEL: Header: +;CHECK: br i1 %tobool1, label %Header.split, label %TBB +;CHECK-LABEL: Header.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 1) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB: +;CHECK: br i1 %cmp, label %TBB.split, label %End +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 2) ;CHECK-LABEL: Tail -;CHECK: %p = phi i32 [ 1, %Tail.predBB.split ], [ 2, %Tail.predBB.split1 ] -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_eq_eq(i32* %a, i32 %v) { Header: @@ -32,12 +36,12 @@ } ;CHECK-LABEL: @test_eq_eq_eq -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 10) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 %p) ;CHECK-LABEL: Tail -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_eq_eq_eq(i32* %a, i32 %v, i32 %p) { Header: @@ -61,12 +65,12 @@ } ;CHECK-LABEL: @test_eq_eq_eq_constrain_same_i32_arg -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 222, i32 %p) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 333, i32 %p) ;CHECK-LABEL: Tail -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_eq_eq_eq_constrain_same_i32_arg(i32* %a, i32 %v, i32 %p) { Header: @@ -90,13 +94,12 @@ } ;CHECK-LABEL: @test_ne_eq -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 1) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 2) ;CHECK-LABEL: Tail -;CHECK: %p = phi i32 [ 1, %Tail.predBB.split ], [ 2, %Tail.predBB.split1 ] -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_eq(i32* %a, i32 %v) { Header: @@ -117,12 +120,12 @@ } ;CHECK-LABEL: @test_ne_eq_ne -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) ;CHECK-LABEL: Tail -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_eq_ne(i32* %a, i32 %v, i32 %p) { Header: @@ -146,13 +149,12 @@ } ;CHECK-LABEL: @test_ne_ne -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 1) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 2) ;CHECK-LABEL: Tail -;CHECK: %p = phi i32 [ 1, %Tail.predBB.split ], [ 2, %Tail.predBB.split1 ] -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_ne(i32* %a, i32 %v) { Header: @@ -173,12 +175,12 @@ } ;CHECK-LABEL: @test_ne_ne_ne_constrain_same_pointer_arg -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) ;CHECK-LABEL: Tail -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_ne_ne_constrain_same_pointer_arg(i32* %a, i32 %v, i32 %p, i32* %a2, i32* %a3) { Header: @@ -204,13 +206,12 @@ ;CHECK-LABEL: @test_eq_eq_untaken -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 1) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 2) ;CHECK-LABEL: Tail -;CHECK: %p = phi i32 [ 1, %Tail.predBB.split ], [ 2, %Tail.predBB.split1 ] -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_eq_eq_untaken(i32* %a, i32 %v) { Header: @@ -231,12 +232,12 @@ } ;CHECK-LABEL: @test_eq_eq_eq_untaken -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 %p) ;CHECK-LABEL: Tail -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_eq_eq_eq_untaken(i32* %a, i32 %v, i32 %p) { Header: @@ -260,13 +261,12 @@ } ;CHECK-LABEL: @test_ne_eq_untaken -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 1) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 2) ;CHECK-LABEL: Tail -;CHECK: %p = phi i32 [ 1, %Tail.predBB.split ], [ 2, %Tail.predBB.split1 ] -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_eq_untaken(i32* %a, i32 %v) { Header: @@ -287,12 +287,12 @@ } ;CHECK-LABEL: @test_ne_eq_ne_untaken -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header2.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 10) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p) ;CHECK-LABEL: Tail -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_eq_ne_untaken(i32* %a, i32 %v, i32 %p) { Header: @@ -316,13 +316,12 @@ } ;CHECK-LABEL: @test_ne_ne_untaken -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 1) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 2) ;CHECK-LABEL: Tail -;CHECK: %p = phi i32 [ 1, %Tail.predBB.split ], [ 2, %Tail.predBB.split1 ] -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_ne_ne_untaken(i32* %a, i32 %v) { Header: @@ -343,13 +342,12 @@ } ;CHECK-LABEL: @test_nonconst_const_phi -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 1) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 2) ;CHECK-LABEL: Tail -;CHECK: %p = phi i32 [ 1, %Tail.predBB.split ], [ 2, %Tail.predBB.split1 ] -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_nonconst_const_phi(i32* %a, i32* %b, i32 %v) { Header: @@ -370,13 +368,12 @@ } ;CHECK-LABEL: @test_nonconst_nonconst_phi -;CHECK-LABEL: Tail.predBB.split: -;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 2) -;CHECK-LABEL: Tail.predBB.split1: -;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 1) +;CHECK-LABEL: Header.split: +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 1) +;CHECK-LABEL: TBB.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 2) ;CHECK-LABEL: Tail -;CHECK: %p = phi i32 [ 2, %Tail.predBB.split ], [ 1, %Tail.predBB.split1 ] -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL2]], %TBB.split ], [ %[[CALL1]], %Header.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_nonconst_nonconst_phi(i32* %a, i32* %b, i32 %v, i32 %v2) { Header: @@ -397,13 +394,12 @@ } ;CHECK-LABEL: @test_cfg_no_or_phi -;CHECK-LABEL: Tail.predBB.split -;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 2) -;CHECK-LABEL: Tail.predBB.split1: -;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 1) +;CHECK-LABEL: TBB0.split +;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 1) +;CHECK-LABEL: TBB1.split: +;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 2) ;CHECK-LABEL: Tail -;CHECK: %p = phi i32 [ 2, %Tail.predBB.split ], [ 1, %Tail.predBB.split1 ] -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL2]], %TBB1.split ], [ %[[CALL1]], %TBB0.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_cfg_no_or_phi(i32* %a, i32 %v) { entry: @@ -421,8 +417,8 @@ } ;CHECK-LABEL: @test_nonconst_nonconst_phi_noncost -;CHECK-NOT: Tail.predBB.split: -;CHECK-NOT: Tail.predBB.split1: +;CHECK-NOT: Header.split: +;CHECK-NOT: TBB.split: ;CHECK-LABEL: Tail: ;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 %p) ;CHECK: ret i32 %r @@ -444,34 +440,9 @@ ret i32 %v } -;CHECK-LABEL: @test_fisrtnonphi -;CHECK-NOT: Tail.predBB.split: -;CHECK-NOT: Tail.predBB.split1: -;CHECK-LABEL: Tail: -;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 %p) -;CHECK: ret i32 %r -define i32 @test_fisrtnonphi(i32* %a, i32 %v) { -Header: - %tobool1 = icmp eq i32* %a, null - br i1 %tobool1, label %Tail, label %TBB - -TBB: - %cmp = icmp eq i32 %v, 1 - br i1 %cmp, label %Tail, label %End - -Tail: - %p = phi i32[1,%Header], [2, %TBB] - store i32 %v, i32* %a - %r = call i32 @callee(i32* %a, i32 %v, i32 %p) - ret i32 %r - -End: - ret i32 %v -} - ;CHECK-LABEL: @test_3preds_constphi -;CHECK-NOT: Tail.predBB.split: -;CHECK-NOT: Tail.predBB.split1: +;CHECK-NOT: Header.split: +;CHECK-NOT: TBB.split: ;CHECK-LABEL: Tail: ;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 %p) ;CHECK: ret i32 %r @@ -495,8 +466,8 @@ } ;CHECK-LABEL: @test_indirectbr_phi -;CHECK-NOT: Tail.predBB.split: -;CHECK-NOT: Tail.predBB.split1: +;CHECK-NOT: Header.split: +;CHECK-NOT: TBB.split: ;CHECK-LABEL: Tail: ;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 %p) ;CHECK: ret i32 %r @@ -519,12 +490,12 @@ } ;CHECK-LABEL: @test_unreachable -;CHECK-LABEL: Tail.predBB.split: +;CHECK-LABEL: Header.split: ;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 10) -;CHECK-LABEL: Tail.predBB.split1: +;CHECK-LABEL: TBB.split: ;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 %p) ;CHECK-LABEL: Tail -;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ;CHECK: ret i32 %[[MERGED]] define i32 @test_unreachable(i32* %a, i32 %v, i32 %p) { Entry: Index: llvm/trunk/test/Transforms/CallSiteSplitting/callsite-split.ll =================================================================== --- llvm/trunk/test/Transforms/CallSiteSplitting/callsite-split.ll +++ llvm/trunk/test/Transforms/CallSiteSplitting/callsite-split.ll @@ -7,10 +7,10 @@ %struct.bitmap = type { i32, %struct.bitmap* } ;CHECK-LABEL: @caller +;CHECK-LABEL: Top.split: +;CHECK: call void @callee(%struct.bitmap* null, %struct.bitmap* null, %struct.bitmap* %b_elt, i1 false) ;CHECK-LABEL: NextCond: ;CHECK: br {{.*}} label %callee.exit -;CHECK-LABEL: CallSiteBB.predBB.split: -;CHECK: call void @callee(%struct.bitmap* null, %struct.bitmap* null, %struct.bitmap* %b_elt, i1 false) ;CHECK-LABEL: callee.exit: ;CHECK: call void @dummy2(%struct.bitmap* %a_elt) @@ -69,12 +69,12 @@ ;CHECK-LABEL: @caller2 -;CHECK-LABEL: CallSiteBB.predBB.split: -;CHECK: call void @dummy3() -;CHECK-LABEL: CallSiteBB.predBB.split1: +;CHECK-LABEL: Top.split: ;CHECK: call void @dummy4() +;CHECK-LABEL: NextCond.split: +;CHECK: call void @dummy3() ;CheCK-LABEL: CallSiteBB: -;CHECK: %phi.call = phi i1 [ true, %CallSiteBB.predBB.split ], [ false, %CallSiteBB.predBB.split1 ] +;CHECK: %phi.call = phi i1 [ true, %NextCond.split ], [ false, %Top.split ] ;CHECK: call void @foo(i1 %phi.call) define void @caller2(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, %struct.bitmap* %c_elt) { entry: