Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -80,6 +80,7 @@ void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); +void initializeCallSiteSplittingLegacyPassPass(PassRegistry&); void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); void initializeCFGPrinterLegacyPassPass(PassRegistry&); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -73,6 +73,14 @@ // FunctionPass *createDeadStoreEliminationPass(); + +//===----------------------------------------------------------------------===// +// +// CallSiteSplitting - This pass split call-site based on its known argument +// values. +FunctionPass *createCallSiteSplittingPass(); + + //===----------------------------------------------------------------------===// // // AggressiveDCE - This pass uses the SSA based Aggressive DCE algorithm. This Index: include/llvm/Transforms/Scalar/CallSiteSplitting.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/CallSiteSplitting.h @@ -0,0 +1,29 @@ +//===- CallSiteSplitting..h - Callsite Splitting ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H +#define LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H + +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/Compiler.h" +#include + +namespace llvm { + +struct CallSiteSplittingPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -88,6 +88,7 @@ #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" #include "llvm/Transforms/Scalar/DCE.h" @@ -541,6 +542,10 @@ EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); + + if (Level == O3) + EarlyFPM.addPass(CallSiteSplittingPass()); + // In SamplePGO ThinLTO backend, we need instcombine before profile annotation // to convert bitcast to direct calls so that they can be inlined during the // profile annotation prepration step. @@ -911,6 +916,10 @@ MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); + FunctionPassManager EarlyFPM(DebugLogging); + EarlyFPM.addPass(CallSiteSplittingPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -139,6 +139,7 @@ FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass()) FUNCTION_PASS("bdce", BDCEPass()) FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) +FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -459,6 +459,9 @@ addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + if (OptLevel > 2) + MPM.add(createCallSiteSplittingPass()); + MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. @@ -692,6 +695,9 @@ PM.add(createInferFunctionAttrsLegacyPass()); if (OptLevel > 1) { + // Split call-site with more constrained arguments. + PM.add(createCallSiteSplittingPass()); + // Indirect call promotion. This should promote all the targets that are // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -2,6 +2,7 @@ ADCE.cpp AlignmentFromAssumptions.cpp BDCE.cpp + CallSiteSplitting.cpp ConstantHoisting.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp Index: lib/Transforms/Scalar/CallSiteSplitting.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -0,0 +1,482 @@ +//===- CallSiteSplitting.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a transformation that tries to split a call-site to pass +// more constrained arguments if its argument is predicated in the control flow +// so that we can expose better context to the later passes (e.g, inliner, jump +// threading, or IPA-CP based function cloning, etc.). +// As of now we support two cases : +// +// 1) If a call site is dominated by an OR condition and if any of its arguments +// are predicated on this OR condition, try to split the condition with more +// constrained arguments. For example, in the code below, we try to split the +// call site since we can predicate the argument(ptr) based on the OR condition. +// +// Split from : +// if (!ptr || c) +// callee(ptr); +// to : +// if (!ptr) +// callee(null) // set the known constant value +// else if (c) +// callee(nonnull ptr) // set non-null attribute in the argument +// +// +// 2) We can also split a call-site based on constant incoming values of a PHI +// For example, +// from : +// Header: +// %c = icmp eq i32 %i1, %i2 +// br i1 %c, label %Tail, label %TBB +// TBB: +// br label Tail% +// Tail: +// %p = phi i32 [ 0, %Header], [ 1, %TBB] +// call void @bar(i32 %p) +// to +// Header: +// %c = icmp eq i32 %i1, %i2 +// br i1 %c, label %Tail-split0, label %TBB +// TBB: +// br label %Tail-split1 +// Tail-split0: +// call void @bar(i32 0) +// br label %Tail +// Tail-split1: +// call void @bar(i32 1) +// br label %Tail +// Tail: +// %p = phi i32 [ 0, %Tail-split0 ], [ 1, %Tail-split1 ] +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "callsite-splitting" + +STATISTIC(NumCallSiteSplit, "Number of call-site split"); + +static void addNonNullAttribute(Instruction *CallI, Instruction *&NewCallI, + Value *Op, Constant *ConstValue) { + if (!NewCallI) + NewCallI = CallI->clone(); + CallSite CS(NewCallI); + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) + if (*I == Op) + CS.addParamAttr(ArgNo, Attribute::NonNull); +} + +static void setConstantInArgument(Instruction *CallI, Instruction *&NewCallI, + Value *Op, Constant *ConstValue) { + if (!NewCallI) + NewCallI = CallI->clone(); + CallSite CS(NewCallI); + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) + if (*I == Op) + CS.setArgument(ArgNo, ConstValue); +} + +static bool createCallSitesOnOrPredicatedArgument( + CallSite CS, Instruction *&NewCSTakendFromHeader, + Instruction *&NewCSTakenFromNextCond, + SmallVectorImpl &BranchInsts, BasicBlock *HeaderBB) { + assert(BranchInsts.size() <= 2 && + "Unexpected number of blocks in the OR predicated condition"); + Instruction *Instr = CS.getInstruction(); + BasicBlock *CallSiteBB = Instr->getParent(); + TerminatorInst *HeaderTI = HeaderBB->getTerminator(); + bool IsCSInTakenPath = CallSiteBB == HeaderTI->getSuccessor(0); + + for (unsigned I = 0, E = BranchInsts.size(); I != E; ++I) { + BranchInst *PBI = BranchInsts[I]; + assert(PBI->isConditional() && + "Expected predecessor block to end in a conditional branch."); + ICmpInst *Cmp = cast(PBI->getCondition()); + Value *Op0 = Cmp->getOperand(0); + Constant *Op1 = cast(Cmp->getOperand(1)); + CmpInst::Predicate Pred = Cmp->getPredicate(); + + if (PBI->getParent() == HeaderBB) { + Instruction *&CallTakenFromHeader = + IsCSInTakenPath ? NewCSTakendFromHeader : NewCSTakenFromNextCond; + Instruction *&CallUntakenFromHeader = + IsCSInTakenPath ? NewCSTakenFromNextCond : NewCSTakendFromHeader; + + assert(Pred == ICmpInst::ICMP_EQ || + Pred == ICmpInst::ICMP_NE && + "Unexpected predicate in an OR condition"); + + // Set the constant value for agruments in the call predicated based on + // the OR condition. + Instruction *&CallToSetConst = Pred == ICmpInst::ICMP_EQ + ? CallTakenFromHeader + : CallUntakenFromHeader; + setConstantInArgument(Instr, CallToSetConst, Op0, Op1); + + // Add the NonNull attribute if compared with the null pointer. + if (Op1->getType()->isPointerTy() && Op1->isNullValue()) { + Instruction *&CallToSetAttr = Pred == ICmpInst::ICMP_EQ + ? CallUntakenFromHeader + : CallTakenFromHeader; + addNonNullAttribute(Instr, CallToSetAttr, Op0, Op1); + } + + } else { + if (Pred == ICmpInst::ICMP_EQ) { + if (PBI->getSuccessor(0) == Instr->getParent()) { + // Set the constant value for the call taken from the second block in + // the OR condition. + setConstantInArgument(Instr, NewCSTakenFromNextCond, Op0, Op1); + } else { + // Add the NonNull attribute if compared with the null pointer for the + // call taken from the second block in the OR condition. + if (Op1->getType()->isPointerTy() && Op1->isNullValue()) + addNonNullAttribute(Instr, NewCSTakenFromNextCond, Op0, Op1); + } + } else { + if (PBI->getSuccessor(0) == Instr->getParent()) { + // Add the NonNull attribute if compared with the null pointer for the + // call taken from the second block in the OR condition. + if (Op1->getType()->isPointerTy() && Op1->isNullValue()) + addNonNullAttribute(Instr, NewCSTakenFromNextCond, Op0, Op1); + } else if (Pred == ICmpInst::ICMP_NE) { + // Set the constant value for the call in the untaken path from the + // header block. + setConstantInArgument(Instr, NewCSTakenFromNextCond, Op0, Op1); + } else + llvm_unreachable("Unexpected condition"); + } + } + } + return NewCSTakendFromHeader || NewCSTakenFromNextCond; +} + +/// Return true if the CS is split into its new predecessors which are directly +/// hooked to each of its orignial predecessors pointed by PredBB1 and PredBB2. +/// Note that PredBB1 and PredBB2 are decided in findPredicatedArgument(), +/// especially for the OR predicated case where PredBB1 will point the header, +/// and PredBB2 will point the the second compare block. CallInst1 and CallInst2 +/// will be the new call-sites placed in the new predecessors split for PredBB1 +/// and PredBB2, repectively. Therefore, CallInst1 will be the call-site placed +/// between Header and Tail, and CallInst2 will be the call-site between TBB and +/// Tail. For example, in the IR below with an OR condition, the call-site can +/// be split +/// +/// from : +/// +/// Header: +/// %c = icmp eq i32* %a, null +/// br i1 %c %Tail, %TBB +/// TBB: +/// %c2 = icmp eq i32* %b, null +/// br i1 %c %Tail, %End +/// Tail: +/// %ca = call i1 @callee (i32* %a, i32* %b) +/// +/// to : +/// +/// Header: // PredBB1 is Header +/// %c = icmp eq i32* %a, null +/// br i1 %c %Tail-split1, %TBB +/// TBB: // PredBB2 is TBB +/// %c2 = icmp eq i32* %b, null +/// br i1 %c %Tail-split2, %End +/// Tail-split1: +/// %ca1 = call @callee (i32* null, i32* %b) // CallInst1 +/// br %Tail +/// Tail-split2: +/// %ca2 = call @callee (i32* nonnull %a, i32* null) // CallInst2 +/// br %Tail +/// Tail: +/// %p = phi i1 [%ca1, %Tail-split1],[%ca2, %Tail-split2] +/// +/// Note that for an OR predicated case, CallInst1 and CallInst2 should be +/// created with more constrained arguments in +/// createCallSitesOnOrPredicatedArgument(). +static bool splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, + Instruction *CallInst1, Instruction *CallInst2) { + Instruction *Instr = CS.getInstruction(); + assert(Instr == (Instr->getParent()->getFirstNonPHI()) && + "Unexpected call-site"); + + BasicBlock *TailBB = Instr->getParent(); + BasicBlock *SplitBlock1 = + SplitBlockPredecessors(TailBB, PredBB1, ".predBB1.split"); + BasicBlock *SplitBlock2 = + SplitBlockPredecessors(TailBB, PredBB2, ".predBB2.split"); + + if (!SplitBlock1 || !SplitBlock2) + return false; + + if (!CallInst1) + CallInst1 = Instr->clone(); + if (!CallInst2) + CallInst2 = Instr->clone(); + + CallInst1->insertBefore(&*SplitBlock1->getFirstInsertionPt()); + CallInst2->insertBefore(&*SplitBlock2->getFirstInsertionPt()); + + CallSite CS1(CallInst1); + CallSite CS2(CallInst2); + + // Handle PHIs used as arguments in the call-site. + for (BasicBlock::iterator BI = Instr->getParent()->begin(), + BE = Instr->getParent()->end(); + BI != BE; ++BI) { + PHINode *PN = dyn_cast(&*BI); + if (!PN) + break; + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) + if (*I == PN) { + CS1.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock1)); + CS2.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock2)); + } + } + + // Replace users of the original call with a PHI mering call-sites split. + if (Instr->getNumUses()) { + PHINode *PN = PHINode::Create(Instr->getType(), 2, "phi.call", Instr); + PN->addIncoming(CallInst1, SplitBlock1); + PN->addIncoming(CallInst2, SplitBlock2); + Instr->replaceAllUsesWith(PN); + } + Instr->eraseFromParent(); + return true; +} + +static bool isCondRelevantToAnyCallArgument(ICmpInst *Cmp, CallSite CS) { + assert(isa(Cmp->getOperand(1)) && "Expected a constant operand."); + Value *Op0 = Cmp->getOperand(0); + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) { + // Don't consider constant or arguments that are already known non-null. + if (isa(*I) || CS.paramHasAttr(ArgNo, Attribute::NonNull)) + continue; + + if (*I == Op0) + return true; + } + return false; +} + +static void findOrCondRelevantToCallArgument( + CallSite CS, BasicBlock *PredBB, BasicBlock *OtherPredBB, + SmallVectorImpl &BranchInsts, BasicBlock *&HeaderBB) { + auto *PBI = dyn_cast(PredBB->getTerminator()); + if (!PBI || !PBI->isConditional()) + return; + + if (PBI->getSuccessor(0) == OtherPredBB || + PBI->getSuccessor(1) == OtherPredBB) + if (PredBB == OtherPredBB->getSinglePredecessor()) { + assert(HeaderBB == nullptr && + "Expect to find only a single header block"); + HeaderBB = PredBB; + } + + CmpInst::Predicate Pred; + Value *Cond = PBI->getCondition(); + if (!match(Cond, m_ICmp(Pred, m_Value(), m_Constant()))) + return; + ICmpInst *Cmp = cast(Cond); + if (isCondRelevantToAnyCallArgument(Cmp, CS)) + if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) + BranchInsts.push_back(PBI); +} + +// Return true if the call-site has an argument which is a PHI with only +// constant incoming values. +static bool isPredicatedOnPHI(CallSite CS) { + Instruction *Instr = CS.getInstruction(); + BasicBlock *Parent = Instr->getParent(); + if (Instr != (Parent->getFirstNonPHI())) + return false; + + for (BasicBlock::iterator BI = Parent->begin(), BE = Parent->end(); + BI != BE; ++BI) { + if (PHINode *PN = dyn_cast(&*BI)) { + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) + if (*I == PN) { + assert(PN->getNumIncomingValues() == 2 && + "Unexpected number of incoming values"); + if (PN->getIncomingBlock(0) == PN->getIncomingBlock(1)) + return false; + if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) + continue; + if (isa(PN->getIncomingValue(0)) && + isa(PN->getIncomingValue(1))) + return true; + } + } + break; + } + return false; +} + +// Return true if an agument in CS is predicated on an 'or' condition. +// Create new call-site with arguments constrained based on the OR condition. +static bool findPredicatedOnOrCondition(CallSite CS, BasicBlock *PredBB1, + BasicBlock *PredBB2, + Instruction *&NewCallTakenFromHeader, + Instruction *&NewCallTakenFromNextCond, + BasicBlock *&HeaderBB) { + SmallVector BranchInsts; + findOrCondRelevantToCallArgument(CS, PredBB1, PredBB2, BranchInsts, HeaderBB); + findOrCondRelevantToCallArgument(CS, PredBB2, PredBB1, BranchInsts, HeaderBB); + if (BranchInsts.empty() || HeaderBB == nullptr) + return false; + + // If an OR condition is detected, try to create call sites with constrained + // arguments (e.g., NonNull attribute or constant value). + return createCallSitesOnOrPredicatedArgument(CS, NewCallTakenFromHeader, + NewCallTakenFromNextCond, + BranchInsts, HeaderBB); +} + +static bool findPredicatedArgument(CallSite CS, Instruction *&CallInst1, + Instruction *&CallInst2, + BasicBlock *&PredBB1, BasicBlock *&PredBB2) { + BasicBlock *ParentBB = CS.getInstruction()->getParent(); + pred_iterator PII = pred_begin(ParentBB); + pred_iterator PIE = pred_end(ParentBB); + unsigned NumPreds = std::distance(PII, PIE); + + // Allow only one extra call-site. No more than two from one call-site. + if (NumPreds != 2) + return false; + + BasicBlock *Preds[2] = {*PII++, *PII}; + BasicBlock *&HeaderBB = PredBB1; + if (!findPredicatedOnOrCondition(CS, Preds[0], Preds[1], CallInst1, CallInst2, + HeaderBB) && + !isPredicatedOnPHI(CS)) + return false; + + if (!PredBB1) + PredBB1 = Preds[0]; + + PredBB2 = PredBB1 == Preds[0] ? Preds[1] : Preds[0]; + return true; +} + +static bool tryToSplitCallSite(CallSite CS) { + if (!CS.arg_size()) + return false; + + Instruction *Instr = CS.getInstruction(); + // Allow splitting a call-site only when there is no instruction before the + // call-site in the basic block. Based on this constraint, we only clone the + // call instruction, and we do not move a call-site across any other + // instruction. + if (Instr != (Instr->getParent()->getFirstNonPHI())) + return false; + + // FIXME: As of now we handle only CallInst. InvokeInst could be handled + // without too much effort. + if (!isa(Instr)) + return false; + + BasicBlock *PredBB1 = nullptr; + BasicBlock *PredBB2 = nullptr; + Instruction *CallInst1 = nullptr; + Instruction *CallInst2 = nullptr; + if (!findPredicatedArgument(CS, CallInst1, CallInst2, PredBB1, PredBB2)) + return false; + + if (!splitCallSite(CS, PredBB1, PredBB2, CallInst1, CallInst2)) { + if (CallInst1) + CallInst1->deleteValue(); + if (CallInst2) + CallInst2->deleteValue(); + return false; + } + NumCallSiteSplit++; + return true; +} + +static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) { + bool Changed = false; + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) { + BasicBlock &BB = *BI++; + for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) { + Instruction *I = &*II++; + CallSite CS(cast(I)); + if (!CS || isa(I) || isInstructionTriviallyDead(I, &TLI)) + continue; + + Function *Callee = CS.getCalledFunction(); + if (!Callee || Callee->isDeclaration()) + continue; + Changed |= tryToSplitCallSite(CS); + } + } + return Changed; +} + +namespace { +struct CallSiteSplittingLegacyPass : public FunctionPass { + static char ID; + CallSiteSplittingLegacyPass() : FunctionPass(ID) { + initializeCallSiteSplittingLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + FunctionPass::getAnalysisUsage(AU); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + auto &TLI = getAnalysis().getTLI(); + return doCallSiteSplitting(F, TLI); + } +}; +} // namespace + +char CallSiteSplittingLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting", + "Call-site splitting", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting", + "Call-site splitting", false, false) +FunctionPass *llvm::createCallSiteSplittingPass() { + return new CallSiteSplittingLegacyPass(); +} + +PreservedAnalyses CallSiteSplittingPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &TLI = AM.getResult(F); + + if (!doCallSiteSplitting(F, TLI)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + return PA; +} Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -35,6 +35,7 @@ initializeADCELegacyPassPass(Registry); initializeBDCELegacyPassPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); + initializeCallSiteSplittingLegacyPassPass(Registry); initializeConstantHoistingLegacyPassPass(Registry); initializeConstantPropagationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -76,6 +76,7 @@ ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass +; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: GlobalOptPass Index: test/Other/new-pm-lto-defaults.ll =================================================================== --- test/Other/new-pm-lto-defaults.ll +++ test/Other/new-pm-lto-defaults.ll @@ -33,6 +33,11 @@ ; CHECK-O2-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O2-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Function ; CHECK-O2-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis +; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> +; CHECK-O2-NEXT: Starting llvm::Function pass manager run. +; CHECK-O2-NEXT: Running pass: CallSiteSplittingPass +; CHECK-O2-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O2-NEXT: Finished llvm::Function pass manager run. ; CHECK-O2-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass> ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC @@ -41,7 +46,7 @@ ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}> ; CHECK-O-NEXT: Running analysis: AAManager -; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: GlobalSplitPass Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -72,6 +72,7 @@ ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass +; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: GlobalOptPass Index: test/Transforms/CallSiteSplitting/callsite-split.ll =================================================================== --- /dev/null +++ test/Transforms/CallSiteSplitting/callsite-split.ll @@ -0,0 +1,119 @@ +; RUN: opt < %s -callsite-splitting -inline -instcombine -jump-threading -S | FileCheck %s +; RUN: opt < %s -passes='function(callsite-splitting),cgscc(inline),function(instcombine,jump-threading)' -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linaro-linux-gnueabi" + +%struct.bitmap = type { i32, %struct.bitmap* } + +;CHECK-LABEL: @caller +;CHECK-LABEL: NextCond: +;CHECK: br {{.*}} label %callee.exit +;CHECK-LABEL: CallSiteBB.predBB1.split: +;CHECK: call void @callee(%struct.bitmap* null, %struct.bitmap* null, %struct.bitmap* %b_elt, i1 false) +;CHECK-LABEL: callee.exit: +;CHECK: call void @dummy2(%struct.bitmap* %a_elt) + +define void @caller(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt) { +entry: + br label %Top + +Top: + %tobool1 = icmp eq %struct.bitmap* %a_elt, null + br i1 %tobool1, label %CallSiteBB, label %NextCond + +NextCond: + %cmp = icmp ne %struct.bitmap* %b_elt, null + br i1 %cmp, label %CallSiteBB, label %End + +CallSiteBB: + %p = phi i1 [0, %Top], [%c, %NextCond] + call void @callee(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %p) + br label %End + +End: + ret void +} + +define void @callee(%struct.bitmap* %dst_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %c) { +entry: + %tobool = icmp ne %struct.bitmap* %a_elt, null + %tobool1 = icmp ne %struct.bitmap* %b_elt, null + %or.cond = and i1 %tobool, %tobool1 + br i1 %or.cond, label %Cond, label %Big + +Cond: + %cmp = icmp eq %struct.bitmap* %dst_elt, %a_elt + br i1 %cmp, label %Small, label %Big + +Small: + call void @dummy2(%struct.bitmap* %a_elt) + br label %End + +Big: + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + br label %End + +End: + ret void +} + +declare void @dummy2(%struct.bitmap*) +declare void @dummy1(%struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.bitmap*) + + +;CHECK-LABEL: @caller2 +;CHECK-LABEL: CallSiteBB.predBB1.split: +;CHECK: call void @dummy4() +;CHECK-LABEL: CallSiteBB.predBB2.split: +;CHECK: call void @dummy3() +;CheCK-LABEL: CallSiteBB: +;CHECK: %phi.call = phi i1 [ false, %CallSiteBB.predBB1.split ], [ true, %CallSiteBB.predBB2.split ] +;CHECK: call void @foo(i1 %phi.call) +define void @caller2(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, %struct.bitmap* %c_elt) { +entry: + br label %Top + +Top: + %tobool1 = icmp eq %struct.bitmap* %a_elt, %b_elt + br i1 %tobool1, label %CallSiteBB, label %NextCond + +NextCond: + %cmp = icmp ne %struct.bitmap* %b_elt, %c_elt + br i1 %cmp, label %CallSiteBB, label %End + +CallSiteBB: + %phi = phi i1 [0, %Top],[1, %NextCond] + %u = call i1 @callee2(i1 %phi) + call void @foo(i1 %u) + br label %End + +End: + ret void +} + +define i1 @callee2(i1 %b) { +entry: + br i1 %b, label %BB1, label %BB2 + +BB1: + call void @dummy3() + br label %End + +BB2: + call void @dummy4() + br label %End + +End: + ret i1 %b +} + +declare void @dummy3() +declare void @dummy4() +declare void @foo(i1)