Index: lib/Transforms/Scalar/CallSiteSplitting.cpp =================================================================== --- lib/Transforms/Scalar/CallSiteSplitting.cpp +++ lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -64,6 +64,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -73,6 +74,10 @@ STATISTIC(NumCallSiteSplit, "Number of call-site split"); +/// Allow up to DuplicationThreshold instructions before call instructions. +/// Those instructions need to be duplicated in all split blocks. +static unsigned DuplicationThreshold = 5; + static void addNonNullAttribute(Instruction *CallI, Instruction *NewCallI, Value *Op) { CallSite CS(NewCallI); @@ -178,12 +183,13 @@ if (!isa(Instr)) return false; - // Allow splitting a call-site only when there is no instruction before the - // call-site in the basic block. Based on this constraint, we only clone the - // call instruction, and we do not move a call-site across any other - // instruction. + // Allow splitting a call-site only when there are less than + // DuplicationThreshold instructions before the call. The instructions + // before the call will be duplicated in the split blocks and corresponding + // uses will be updated. BasicBlock *CallSiteBB = Instr->getParent(); - if (Instr != CallSiteBB->getFirstNonPHIOrDbg()) + if (std::distance(CallSiteBB->getFirstNonPHIOrDbg()->getIterator(), + Instr->getIterator()) >= DuplicationThreshold) return false; // Need 2 predecessors and cannot split an edge from an IndirectBrInst. @@ -198,6 +204,7 @@ struct SplitInfo { BasicBlock *BB; Instruction *CallInst; + DenseMap Mapping; }; /// Return true if the CS is split into its new predecessors. @@ -253,13 +260,13 @@ SmallVector Splits; for (const auto &P : Preds) { BasicBlock *PredBB = P.first; - BasicBlock *SplitBlock = - SplitBlockPredecessors(TailBB, PredBB, ".predBB.split"); + ValueToValueMapTy Remap; + BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween( + TailBB, PredBB, &*std::next(Instr->getIterator()), Remap); assert(SplitBlock && "Unexpected new basic block split."); - Instruction *NewCI = Instr->clone(); + Instruction *NewCI = &*std::prev(SplitBlock->getTerminator()->getIterator()); addConditions(CS, NewCI, P.second); - NewCI->insertBefore(&*SplitBlock->getFirstInsertionPt()); CallSite NewCS(NewCI); // Handle PHIs used as arguments in the call-site. @@ -272,9 +279,16 @@ ++ArgNo; } } + DEBUG(dbgs() << " " << *NewCI << " in " << SplitBlock->getName() << "\n"); - Splits.push_back({SplitBlock, NewCI}); + SplitInfo SI; + SI.CallInst = NewCI; + SI.BB = SplitBlock; + for (const auto &KV : Remap) { + SI.Mapping[KV.first] = KV.second; + } + Splits.push_back(SI); } // Replace users of the original call with a PHI mering call-sites split. @@ -286,7 +300,26 @@ CallPN->addIncoming(P.CallInst, P.BB); Instr->replaceAllUsesWith(CallPN); } - Instr->eraseFromParent(); + + // Remove instructions moved to split blocks from TailBB, from the duplicated + // call instruction to the beginning of the basic block. If an instruction + // has any uses, add a new PHI node to combine the values coming from the + // split blocks. By using reverse-order, we do not introduce unnecessary PHI + // nodes for def-use chains from the call instruction to the beginning of the + // block. + auto I = Instr->getReverseIterator(); + auto End = std::next(TailBB->getFirstNonPHIOrDbg()->getReverseIterator()); + while (I != End) { + Instruction *CurrentI = &*I++; + if (!CurrentI->use_empty()) { + PHINode *NewPN = PHINode::Create(CurrentI->getType(), 2); + for (auto &SI: Splits) + NewPN->addIncoming(SI.Mapping[CurrentI], SI.BB); + NewPN->insertBefore(&*TailBB->getFirstInsertionPt()); + CurrentI->replaceAllUsesWith(NewPN); + } + CurrentI->eraseFromParent(); + } NumCallSiteSplit++; } Index: test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll =================================================================== --- test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll +++ test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll @@ -3,15 +3,15 @@ ; CHECK-LABEL: @test_simple ; CHECK-LABEL: Header: -; CHECK-NEXT: br i1 undef, label %Tail.predBB.split -; CHECK-LABEL: TBB: -; CHECK: br i1 %cmp, label %Tail.predBB.split1 -; CHECK-LABEL: Tail.predBB.split: +; CHECK-NEXT: br i1 undef, label %Header.split +; CHECK-LABEL: Header.split: ; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p) -; CHECK-LABEL: Tail.predBB.split1: +; CHECK-LABEL: TBB: +; CHECK: br i1 %cmp, label %TBB.split +; CHECK-LABEL: TBB.split: ; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p) ; CHECK-LABEL: Tail -; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB.split ] ; CHECK: ret i32 %[[MERGED]] define i32 @test_simple(i32* %a, i32 %v, i32 %p) { Header: @@ -31,15 +31,15 @@ ; CHECK-LABEL: @test_eq_eq_eq_untaken ; CHECK-LABEL: Header: -; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB.split -; CHECK-LABEL: TBB2: -; CHECK: br i1 %cmp2, label %Tail.predBB.split1, label %End -; CHECK-LABEL: Tail.predBB.split: +; CHECK: br i1 %tobool1, label %TBB1, label %Header.split +; CHECK-LABEL: Header.split: ; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) -; CHECK-LABEL: Tail.predBB.split1: +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %TBB2.split, label %End +; CHECK-LABEL: TBB2.split: ; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 99) ; CHECK-LABEL: Tail -; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB2.split ] ; CHECK: ret i32 %[[MERGED]] define i32 @test_eq_eq_eq_untaken2(i32* %a, i32 %v, i32 %p) { Header: @@ -64,15 +64,15 @@ ; CHECK-LABEL: @test_eq_ne_eq_untaken ; CHECK-LABEL: Header: -; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB.split -; CHECK-LABEL: TBB2: -; CHECK: br i1 %cmp2, label %Tail.predBB.split1, label %End -; CHECK-LABEL: Tail.predBB.split: +; CHECK: br i1 %tobool1, label %TBB1, label %Header.split +; CHECK-LABEL: Header.split: ; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) -; CHECK-LABEL: Tail.predBB.split1: +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %TBB2.split, label %End +; CHECK-LABEL: TBB2.split: ; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 99) ; CHECK-LABEL: Tail -; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header.split ], [ %[[CALL2]], %TBB2.split ] ; CHECK: ret i32 %[[MERGED]] define i32 @test_eq_ne_eq_untaken(i32* %a, i32 %v, i32 %p) { Header: @@ -97,17 +97,17 @@ ; CHECK-LABEL: @test_header_header2_tbb ; CHECK: Header2: -; CHECK:br i1 %tobool2, label %Tail.predBB.split, label %TBB1 -; CHECK-LABEL: TBB2: -; CHECK: br i1 %cmp2, label %Tail.predBB.split1, label %End -; CHECK-LABEL: Tail.predBB.split: +; CHECK:br i1 %tobool2, label %Header2.split, label %TBB1 +; CHECK-LABEL: Header2.split: ; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10) -; CHECK-LABEL: Tail.predBB.split1: +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %TBB2.split, label %End +; CHECK-LABEL: TBB2.split: ; NOTE: CallSiteSplitting cannot infer that %a is null here, as it currently ; only supports recording conditions along a single predecessor path. ; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 99) ; CHECK-LABEL: Tail -; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB.split ], [ %[[CALL2]], %Tail.predBB.split1 ] +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB2.split ] ; CHECK: ret i32 %[[MERGED]] define i32 @test_header_header2_tbb(i32* %a, i32 %v, i32 %p) { Header: Index: test/Transforms/CallSiteSplitting/callsite-split-debug.ll =================================================================== --- test/Transforms/CallSiteSplitting/callsite-split-debug.ll +++ test/Transforms/CallSiteSplitting/callsite-split-debug.ll @@ -48,10 +48,9 @@ ; CallSiteBB. ; CHECK-LABEL: @foo -; CHECK-LABEL: CallsiteBB.predBB.split: -; CHECK: [[TMP1:%[0-9]+]] = call i16 @bar(i16 1, i16 5) -; CHECK-LABEL: CallsiteBB.predBB.split1: -; CHECK: [[TMP2:%[0-9]+]] = call i16 @bar(i16 0, i16 5) +; CHECK-LABEL: bb1.split: +; CHECK: [[TMP1:%[0-9]+]] = call i16 @bar(i16 0, i16 5) +; CHECK-LABEL: bb2.split: +; CHECK: [[TMP2:%[0-9]+]] = call i16 @bar(i16 1, i16 5) ; CHECK-LABEL: CallsiteBB -; CHECK: %phi.call = phi i16 [ [[TMP1]], %CallsiteBB.predBB.split ], [ [[TMP2]], %CallsiteBB.predBB.split1 - +; CHECK: %phi.call = phi i16 [ [[TMP2]], %bb2.split ], [ [[TMP1]], %bb1.split Index: test/Transforms/CallSiteSplitting/callsite-split.ll =================================================================== --- test/Transforms/CallSiteSplitting/callsite-split.ll +++ test/Transforms/CallSiteSplitting/callsite-split.ll @@ -9,7 +9,7 @@ ;CHECK-LABEL: @caller ;CHECK-LABEL: NextCond: ;CHECK: br {{.*}} label %callee.exit -;CHECK-LABEL: CallSiteBB.predBB.split: +;CHECK-LABEL: NextCond.split: ;CHECK: call void @callee(%struct.bitmap* null, %struct.bitmap* null, %struct.bitmap* %b_elt, i1 false) ;CHECK-LABEL: callee.exit: ;CHECK: call void @dummy2(%struct.bitmap* %a_elt)