Index: llvm/trunk/lib/Transforms/Scalar/CallSiteSplitting.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ llvm/trunk/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -13,10 +13,11 @@ // threading, or IPA-CP based function cloning, etc.). // As of now we support two cases : // -// 1) If a call site is dominated by an OR condition and if any of its arguments -// are predicated on this OR condition, try to split the condition with more -// constrained arguments. For example, in the code below, we try to split the -// call site since we can predicate the argument(ptr) based on the OR condition. +// 1) Try to a split call-site with constrained arguments, if any constraints +// on any argument can be found by following the single predecessors of the +// all site's predecessors. Currently this pass only handles call-sites with 2 +// predecessors. For example, in the code below, we try to split the call-site +// since we can predicate the argument(ptr) based on the OR condition. // // Split from : // if (!ptr || c) @@ -200,16 +201,15 @@ } /// Return true if the CS is split into its new predecessors which are directly -/// hooked to each of its orignial predecessors pointed by PredBB1 and PredBB2. -/// In OR predicated case, PredBB1 will point the header, and PredBB2 will point -/// to the second compare block. CallInst1 and CallInst2 will be the new -/// call-sites placed in the new predecessors split for PredBB1 and PredBB2, -/// repectively. Therefore, CallInst1 will be the call-site placed -/// between Header and Tail, and CallInst2 will be the call-site between TBB and -/// Tail. For example, in the IR below with an OR condition, the call-site can -/// be split +/// hooked to each of its original predecessors pointed by PredBB1 and PredBB2. +/// CallInst1 and CallInst2 will be the new call-sites placed in the new +/// predecessors split for PredBB1 and PredBB2, respectively. +/// For example, in the IR below with an OR condition, the call-site can +/// be split. Assuming PredBB1=Header and PredBB2=TBB, CallInst1 will be the +/// call-site placed between Header and Tail, and CallInst2 will be the +/// call-site between TBB and Tail. /// -/// from : +/// From : /// /// Header: /// %c = icmp eq i32* %a, null @@ -237,9 +237,9 @@ /// Tail: /// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2] /// -/// Note that for an OR predicated case, CallInst1 and CallInst2 should be -/// created with more constrained arguments in -/// createCallSitesOnOrPredicatedArgument(). +/// Note that in case any arguments at the call-site are constrained by its +/// predecessors, new call-sites with more constrained arguments will be +/// created in createCallSitesOnPredicatedArgument(). static void splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, Instruction *CallInst1, Instruction *CallInst2) { Instruction *Instr = CS.getInstruction(); @@ -332,18 +332,10 @@ splitCallSite(CS, Preds[0], Preds[1], nullptr, nullptr); return true; } -// Check if one of the predecessors is a single predecessors of the other. -// This is a requirement for control flow modeling an OR. HeaderBB points to -// the single predecessor and OrBB points to other node. HeaderBB potentially -// contains the first compare of the OR and OrBB the second. -static bool isOrHeader(BasicBlock *HeaderBB, BasicBlock *OrBB) { - return OrBB->getSinglePredecessor() == HeaderBB && - HeaderBB->getTerminator()->getNumSuccessors() == 2; -} -static bool tryToSplitOnOrPredicatedArgument(CallSite CS) { +static bool tryToSplitOnPredicatedArgument(CallSite CS) { auto Preds = getTwoPredecessors(CS.getInstruction()->getParent()); - if (!isOrHeader(Preds[0], Preds[1]) && !isOrHeader(Preds[1], Preds[0])) + if (Preds[0] == Preds[1]) return false; SmallVector, 2> C1, C2; @@ -362,7 +354,7 @@ static bool tryToSplitCallSite(CallSite CS) { if (!CS.arg_size() || !canSplitCallSite(CS)) return false; - return tryToSplitOnOrPredicatedArgument(CS) || + return tryToSplitOnPredicatedArgument(CS) || tryToSplitOnPHIPredicatedArgument(CS); } Index: llvm/trunk/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll =================================================================== --- llvm/trunk/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll +++ llvm/trunk/test/Transforms/CallSiteSplitting/callsite-no-or-structure.ll @@ -0,0 +1,139 @@ +; RUN: opt < %s -callsite-splitting -S | FileCheck %s +; RUN: opt < %s -passes='function(callsite-splitting)' -S | FileCheck %s + +; CHECK-LABEL: @test_simple +; CHECK-LABEL: Header: +; CHECK-NEXT: br i1 undef, label %Tail.predBB1.split +; CHECK-LABEL: TBB: +; CHECK: br i1 %cmp, label %Tail.predBB2.split +; CHECK-LABEL: Tail.predBB1.split: +; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p) +; CHECK-LABEL: Tail.predBB2.split: +; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p) +; CHECK-LABEL: Tail +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +; CHECK: ret i32 %[[MERGED]] +define i32 @test_simple(i32* %a, i32 %v, i32 %p) { +Header: + br i1 undef, label %Tail, label %End + +TBB: + %cmp = icmp eq i32* %a, null + br i1 %cmp, label %Tail, label %End + +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +; CHECK-LABEL: @test_eq_eq_eq_untaken +; CHECK-LABEL: Header: +; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB1.split +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %Tail.predBB2.split, label %End +; CHECK-LABEL: Tail.predBB1.split: +; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) +; CHECK-LABEL: Tail.predBB2.split: +; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 99) +; CHECK-LABEL: Tail +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +; CHECK: ret i32 %[[MERGED]] +define i32 @test_eq_eq_eq_untaken2(i32* %a, i32 %v, i32 %p) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %TBB1, label %Tail + +TBB1: + %cmp1 = icmp eq i32 %v, 1 + br i1 %cmp1, label %TBB2, label %End + +TBB2: + %cmp2 = icmp eq i32 %p, 99 + br i1 %cmp2, label %Tail, label %End + +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +; CHECK-LABEL: @test_eq_ne_eq_untaken +; CHECK-LABEL: Header: +; CHECK: br i1 %tobool1, label %TBB1, label %Tail.predBB1.split +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %Tail.predBB2.split, label %End +; CHECK-LABEL: Tail.predBB1.split: +; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p) +; CHECK-LABEL: Tail.predBB2.split: +; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 99) +; CHECK-LABEL: Tail +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +; CHECK: ret i32 %[[MERGED]] +define i32 @test_eq_ne_eq_untaken(i32* %a, i32 %v, i32 %p) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %TBB1, label %Tail + +TBB1: + %cmp1 = icmp ne i32 %v, 1 + br i1 %cmp1, label %TBB2, label %End + +TBB2: + %cmp2 = icmp eq i32 %p, 99 + br i1 %cmp2, label %Tail, label %End + +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +; CHECK-LABEL: @test_header_header2_tbb +; CHECK: Header2: +; CHECK:br i1 %tobool2, label %Tail.predBB1.split, label %TBB1 +; CHECK-LABEL: TBB2: +; CHECK: br i1 %cmp2, label %Tail.predBB2.split, label %End +; CHECK-LABEL: Tail.predBB1.split: +; CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10) +; CHECK-LABEL: Tail.predBB2.split: +; NOTE: CallSiteSplitting cannot infer that %a is null here, as it currently +; only supports recording conditions along a single predecessor path. +; CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 99) +; CHECK-LABEL: Tail +; CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Tail.predBB1.split ], [ %[[CALL2]], %Tail.predBB2.split ] +; CHECK: ret i32 %[[MERGED]] +define i32 @test_header_header2_tbb(i32* %a, i32 %v, i32 %p) { +Header: + %tobool1 = icmp eq i32* %a, null + br i1 %tobool1, label %TBB1, label %Header2 + +Header2: + %tobool2 = icmp eq i32 %p, 10 + br i1 %tobool2, label %Tail, label %TBB1 + +TBB1: + %cmp1 = icmp eq i32 %v, 1 + br i1 %cmp1, label %TBB2, label %End + +TBB2: + %cmp2 = icmp eq i32 %p, 99 + br i1 %cmp2, label %Tail, label %End + +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r + +End: + ret i32 %v +} + +define i32 @callee(i32* %a, i32 %v, i32 %p) { + ret i32 10 +} Index: llvm/trunk/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll =================================================================== --- llvm/trunk/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll +++ llvm/trunk/test/Transforms/CallSiteSplitting/callsite-no-splitting.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -callsite-splitting -S | FileCheck %s +; RUN: opt < %s -passes='function(callsite-splitting)' -S | FileCheck %s + +define i32 @callee(i32*, i32, i32) { + ret i32 10 +} + +; CHECK-LABEL: @test_preds_equal +; CHECK-NOT: split +; CHECK: br i1 %cmp, label %Tail, label %Tail +define i32 @test_preds_equal(i32* %a, i32 %v, i32 %p) { +TBB: + %cmp = icmp eq i32* %a, null + br i1 %cmp, label %Tail, label %Tail +Tail: + %r = call i32 @callee(i32* %a, i32 %v, i32 %p) + ret i32 %r +}