Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -80,6 +80,7 @@ void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); void initializeBreakCriticalEdgesPass(PassRegistry&); +void initializeCallSiteSplittingLegacyPassPass(PassRegistry&); void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&); void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&); void initializeCFGPrinterLegacyPassPass(PassRegistry&); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -73,6 +73,14 @@ // FunctionPass *createDeadStoreEliminationPass(); + +//===----------------------------------------------------------------------===// +// +// CallSiteSplitting - This pass split call-site based on its known argument +// values. +FunctionPass *createCallSiteSplittingPass(); + + //===----------------------------------------------------------------------===// // // AggressiveDCE - This pass uses the SSA based Aggressive DCE algorithm. This Index: include/llvm/Transforms/Scalar/CallSiteSplitting.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/CallSiteSplitting.h @@ -0,0 +1,29 @@ +//===- CallSiteSplitting..h - Callsite Splitting ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H +#define LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H + +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/Compiler.h" +#include + +namespace llvm { + +struct CallSiteSplittingPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_CALLSITESPLITTING__H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -88,6 +88,7 @@ #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" #include "llvm/Transforms/Scalar/DCE.h" @@ -594,6 +595,10 @@ GlobalCleanupPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(GlobalCleanupPM, Level); + if (Level == O3) { + GlobalCleanupPM.addPass(CallSiteSplittingPass()); + } + GlobalCleanupPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); @@ -956,6 +961,10 @@ // calls, etc, so let instcombine do this. FunctionPassManager PeepholeFPM(DebugLogging); PeepholeFPM.addPass(InstCombinePass()); + + // Split call sites. + PeepholeFPM.addPass(CallSiteSplittingPass()); + invokePeepholeEPCallbacks(PeepholeFPM, Level); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM))); Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -139,6 +139,7 @@ FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass()) FUNCTION_PASS("bdce", BDCEPass()) FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) +FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -468,6 +468,10 @@ addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); + + if (OptLevel > 2) + MPM.add(createCallSiteSplittingPass()); + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE // For SamplePGO in ThinLTO compile phase, we do not want to do indirect @@ -739,6 +743,8 @@ // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. addInstructionCombiningPass(PM); + + PM.add(createCallSiteSplittingPass()); addExtensionsToPM(EP_Peephole, PM); // Inline small functions Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -2,6 +2,7 @@ ADCE.cpp AlignmentFromAssumptions.cpp BDCE.cpp + CallSiteSplitting.cpp ConstantHoisting.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp Index: lib/Transforms/Scalar/CallSiteSplitting.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -0,0 +1,427 @@ +//===- CallSiteSplitting.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a transformation that tries to split a call-site to pass +// more constrained arguments if its argument is predicated in the control flow +// so that we can expose better context to the later passes (e.g, inliner, jump +// threading, or IPA-CP based function cloning, etc.). +// As of now we support two cases : +// +// 1) If a call site is dominated by an OR condition and if any of its arguments +// are predicated on this OR condition, try to split the condition with more +// constrained arguments. For example, in the code below, we try to split the +// call site since we can predicate the argument(ptr) based on the OR condition. +// +// Split from : +// if (!ptr || c) +// callee(ptr); +// to : +// if (!ptr) +// callee(nonnull ptr) // set non-null attribute in the argument +// else if (c) +// callee(null) // set the known constant value +// +// +// 2) We can also split a call-site based on constant incoming values of a PHI +// For example, +// from : +// BB0: +// %c = icmp eq i32 %i1, %i2 +// br i1 %c, label %BB2, label %BB1 +// BB1: +// br label %BB2 +// BB2: +// %p = phi i32 [ 0, %BB0 ], [ 1, %BB1 ] +// call void @bar(i32 %p) +// to +// BB0: +// %c = icmp eq i32 %i1, %i2 +// br i1 %c, label %BB2-split0, label %BB1 +// BB1: +// br label %BB2-split1 +// BB2-split0: +// call void @bar(i32 0) +// br label %BB2 +// BB2-split1: +// call void @bar(i32 1) +// br label %BB2 +// BB2: +// %p = phi i32 [ 0, %BB2-split0 ], [ 1, %BB2-split1 ] +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +using namespace llvm; +using namespace PatternMatch; + +#define DEBUG_TYPE "callsite-splitting" + +STATISTIC(NumCallSiteSplit, "Number of call-site split"); + +static void addNonNullAttribute(Instruction *CallI, Instruction *&NewCallI, + Value *Op, Constant *ConstValue) { + if (!NewCallI) + NewCallI = CallI->clone(); + CallSite CS(NewCallI); + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) + if (*I == Op) + CS.addParamAttr(ArgNo, Attribute::NonNull); +} + +static void setConstantInArgument(Instruction *CallI, Instruction *&NewCallI, + Value *Op, Constant *ConstValue) { + if (!NewCallI) + NewCallI = CallI->clone(); + CallSite CS(NewCallI); + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) + if (*I == Op) + CS.setArgument(ArgNo, ConstValue); +} + +static bool createCallSitesOnOrPredicatedArgument( + CallSite CS, Instruction *&NewCSTakendFromTop, + Instruction *&NewCSTakenFromNextCond, + SmallVectorImpl &BranchInsts, BasicBlock *TopBB) { + assert(BranchInsts.size() <= 2 && + "Unexpected number of blocks in the OR predicated condition"); + Instruction *Instr = CS.getInstruction(); + BasicBlock *CallSiteBB = Instr->getParent(); + TerminatorInst *TopTI = TopBB->getTerminator(); + bool IsCSInTakenPath = CallSiteBB == TopTI->getSuccessor(0); + + for (unsigned I = 0, E = BranchInsts.size(); I != E; ++I) { + BranchInst *PBI = BranchInsts[I]; + assert(PBI->isConditional()); + ICmpInst *Cmp = cast(PBI->getCondition()); + Value *Op0 = Cmp->getOperand(0); + Constant *Op1 = cast(Cmp->getOperand(1)); + CmpInst::Predicate Pred = Cmp->getPredicate(); + + if (PBI->getParent() == TopBB) { + Instruction *&CallTakenFromTop = + IsCSInTakenPath ? NewCSTakendFromTop : NewCSTakenFromNextCond; + Instruction *&CallUntakenFromTop = + IsCSInTakenPath ? NewCSTakenFromNextCond : NewCSTakendFromTop; + + assert(Pred == ICmpInst::ICMP_EQ || + Pred == ICmpInst::ICMP_NE && + "Unexpected predicate in an OR condition"); + + // Set the constant value for agruments in the call predicated based on + // the OR condition. + Instruction *&CallToSetConst = + Pred == ICmpInst::ICMP_EQ ? CallTakenFromTop : CallUntakenFromTop; + setConstantInArgument(Instr, CallToSetConst, Op0, Op1); + + // Add the NonNull attribute if compared with the null pointer. + if (Op1->getType()->isPointerTy() && Op1->isNullValue()) { + Instruction *&CallToSetAttr = + Pred == ICmpInst::ICMP_EQ ? CallUntakenFromTop : CallTakenFromTop; + addNonNullAttribute(Instr, CallToSetAttr, Op0, Op1); + } + + } else { + if (Pred == ICmpInst::ICMP_EQ) { + if (PBI->getSuccessor(0) == Instr->getParent()) { + // Set the constant value for the call taken from the second block in + // the OR condition. + setConstantInArgument(Instr, NewCSTakenFromNextCond, Op0, Op1); + } else { + // Add the NonNull attribute if compared with the null pointer for the + // call taken from the second block in the OR condition. + if (Op1->getType()->isPointerTy() && Op1->isNullValue()) + addNonNullAttribute(Instr, NewCSTakenFromNextCond, Op0, Op1); + } + } else { + if (PBI->getSuccessor(0) == Instr->getParent()) { + // Add the NonNull attribute if compared with the null pointer for the + // call taken from the second block in the OR condition. + if (Op1->getType()->isPointerTy() && Op1->isNullValue()) + addNonNullAttribute(Instr, NewCSTakenFromNextCond, Op0, Op1); + } else if (Pred == ICmpInst::ICMP_NE) { + // Set the constant value for the call in the untaken path from the + // top block. + setConstantInArgument(Instr, NewCSTakenFromNextCond, Op0, Op1); + } else + llvm_unreachable("Unexpected condition"); + } + } + } + return NewCSTakendFromTop || NewCSTakenFromNextCond; +} + +static bool splitCallSite(CallSite CS, BasicBlock *PredBB1, BasicBlock *PredBB2, + Instruction *CallInst1, Instruction *CallInst2) { + Instruction *Instr = CS.getInstruction(); + assert(Instr == (Instr->getParent()->getFirstNonPHI()) && + "Unexpected call-site"); + + BasicBlock *CallSiteBB = Instr->getParent(); + BasicBlock *SplitBlock1 = + SplitBlockPredecessors(CallSiteBB, PredBB1, ".predBB1.split"); + BasicBlock *SplitBlock2 = + SplitBlockPredecessors(CallSiteBB, PredBB2, ".predBB2.split"); + + if (!SplitBlock1 || !SplitBlock2) + return false; + + if (!CallInst1) + CallInst1 = Instr->clone(); + if (!CallInst2) + CallInst2 = Instr->clone(); + + CallInst1->insertBefore(&*SplitBlock1->getFirstInsertionPt()); + CallInst2->insertBefore(&*SplitBlock2->getFirstInsertionPt()); + + CallSite CS1(CallInst1); + CallSite CS2(CallInst2); + + // Handle PHIs used as arguments in the call-site. + for (BasicBlock::iterator BI = Instr->getParent()->begin(), + BE = Instr->getParent()->end(); + BI != BE; ++BI) { + PHINode *PN = dyn_cast(&*BI); + if (!PN) + break; + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) + if (*I == PN) { + CS1.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock1)); + CS2.setArgument(ArgNo, PN->getIncomingValueForBlock(SplitBlock2)); + } + } + + // Replace users of the original call with a PHI mering call-sites split. + if (Instr->getNumUses()) { + PHINode *PN = PHINode::Create(Instr->getType(), 2, "phi.call", Instr); + PN->addIncoming(CallInst1, SplitBlock1); + PN->addIncoming(CallInst2, SplitBlock2); + Instr->replaceAllUsesWith(PN); + } + Instr->eraseFromParent(); + return true; +} + +static bool isCondRelevantToAnyCallArgument(ICmpInst *Cmp, CallSite CS) { + assert(isa(Cmp->getOperand(1)) && "Expected a constant operand."); + Value *Op0 = Cmp->getOperand(0); + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) { + // Don't consider arguments that are already known non-null. + if (CS.paramHasAttr(ArgNo, Attribute::NonNull)) + continue; + + if (*I == Op0) + return true; + } + return false; +} + +static void findOrCondRelevantToCallArgument( + CallSite CS, BasicBlock *PredBB, BasicBlock *OtherPredBB, + SmallVectorImpl &BranchInsts, BasicBlock *&TopBB) { + auto *PBI = dyn_cast(PredBB->getTerminator()); + if (!PBI || !PBI->isConditional()) + return; + + if (PBI->getSuccessor(0) == OtherPredBB || + PBI->getSuccessor(1) == OtherPredBB) + if (PredBB == OtherPredBB->getSinglePredecessor()) { + assert(TopBB == nullptr && "Expect to find only a single top block"); + TopBB = PredBB; + } + + CmpInst::Predicate Pred; + Value *Cond = PBI->getCondition(); + if (match(Cond, m_ICmp(Pred, m_Value(), m_Constant()))) { + ICmpInst *Cmp = cast(Cond); + if (isCondRelevantToAnyCallArgument(Cmp, CS)) + if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) + BranchInsts.push_back(PBI); + } +} + +// Return true if the call-site has an argument which is a PHI with only +// constant incoming values. +static bool isPredicatedOnPHI(CallSite CS) { + Instruction *Instr = CS.getInstruction(); + if (Instr != (Instr->getParent()->getFirstNonPHI())) + return false; + + for (BasicBlock::iterator BI = Instr->getParent()->begin(), + BE = Instr->getParent()->end(); + BI != BE; ++BI) { + if (PHINode *PN = dyn_cast(&*BI)) { + unsigned ArgNo = 0; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; + ++I, ++ArgNo) + if (*I == PN) { + assert(PN->getNumIncomingValues() == 2 && + "Unexpected number of incoming values"); + if (PN->getIncomingBlock(0) == PN->getIncomingBlock(1)) + return false; + if (PN->getIncomingValue(0) == PN->getIncomingValue(1)) + continue; + if (isa(PN->getIncomingValue(0)) && + isa(PN->getIncomingValue(1))) + return true; + } + } + break; + } + return false; +} + +// Return true if an agument in CS is predicated on an 'or' condition. +// Create new call-site with arguments constrained based on the OR condition. +static bool findPredicatedOnOrCondition(CallSite CS, BasicBlock *PredBB1, + BasicBlock *PredBB2, + Instruction *&NewCallTakenFromTop, + Instruction *&NewCallTakenFromNextCond, + BasicBlock *&TopBB) { + SmallVector BranchInsts; + findOrCondRelevantToCallArgument(CS, PredBB1, PredBB2, BranchInsts, TopBB); + findOrCondRelevantToCallArgument(CS, PredBB2, PredBB1, BranchInsts, TopBB); + if (BranchInsts.empty() || TopBB == nullptr) + return false; + + // If an OR condition is detected, try to create call sites with constrained + // arguments (e.g., NonNull attribute or constant value). + return createCallSitesOnOrPredicatedArgument( + CS, NewCallTakenFromTop, NewCallTakenFromNextCond, BranchInsts, TopBB); +} + +static bool findPredicatedArgument(CallSite CS, Instruction *&CallInst1, + Instruction *&CallInst2, + BasicBlock *&PredBB1, BasicBlock *&PredBB2) { + Instruction *Instr = CS.getInstruction(); + + // Allow splitting a call-site only when there is no instruction before the + // call-site in the basic block. Based on this constraint, we only clone the + // call instruction, and we do not move a call-site across any other + // instruction. + if (Instr != (Instr->getParent()->getFirstNonPHI())) + return false; + + // FIXME: As of now we handle only CallInst. InvokeInst could be handled + // without too much effort. + if (!isa(Instr)) + return false; + + BasicBlock *ParentBB = CS.getInstruction()->getParent(); + pred_iterator PII = pred_begin(ParentBB); + pred_iterator PIE = pred_end(ParentBB); + unsigned NumPreds = std::distance(PII, PIE); + + // Allow only one extra call-site. No more than two from one call-site. + if (NumPreds != 2) + return false; + + BasicBlock *Preds[2] = {*PII++, *PII}; + BasicBlock *&TopBB = PredBB1; + if (!findPredicatedOnOrCondition(CS, Preds[0], Preds[1], CallInst1, CallInst2, + TopBB) && + !isPredicatedOnPHI(CS)) + return false; + + if (!PredBB1) + PredBB1 = Preds[0]; + + PredBB2 = PredBB1 == Preds[0] ? Preds[1] : Preds[0]; + return true; +} + +static bool tryToSplitCallSite(CallSite CS) { + if (!CS.arg_size()) + return false; + + BasicBlock *PredBB1 = nullptr; + BasicBlock *PredBB2 = nullptr; + Instruction *CallInst1 = nullptr; + Instruction *CallInst2 = nullptr; + + if (!findPredicatedArgument(CS, CallInst1, CallInst2, PredBB1, PredBB2)) + return false; + + if (!splitCallSite(CS, PredBB1, PredBB2, CallInst1, CallInst2)) { + if (CallInst1) + CallInst1->deleteValue(); + if (CallInst2) + CallInst2->deleteValue(); + return false; + } + NumCallSiteSplit++; + return true; +} + +static bool processToSplitCallSites(Function &F) { + bool Changed = false; + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) { + BasicBlock *BB = &*BI++; + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { + Instruction *I = &*II++; + CallSite CS(cast(I)); + if (!CS) + continue; + Function *Callee = CS.getCalledFunction(); + if (!Callee || Callee->isDeclaration()) + continue; + Changed |= tryToSplitCallSite(CS); + } + } + return Changed; +} + +namespace { +struct CallSiteSplittingLegacyPass : public FunctionPass { + static char ID; + CallSiteSplittingLegacyPass() : FunctionPass(ID) { + initializeCallSiteSplittingLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + return processToSplitCallSites(F); + } +}; +} // namespace + +char CallSiteSplittingLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting", + "Call-site splitting", false, false) +INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting", + "Call-site splitting", false, false) +FunctionPass *llvm::createCallSiteSplittingPass() { + return new CallSiteSplittingLegacyPass(); +} + +PreservedAnalyses CallSiteSplittingPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = false; + Changed = processToSplitCallSites(F); + + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + return PA; +} Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -35,6 +35,7 @@ initializeADCELegacyPassPass(Registry); initializeBDCELegacyPassPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); + initializeCallSiteSplittingLegacyPassPass(Registry); initializeConstantHoistingLegacyPassPass(Registry); initializeConstantPropagationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -86,6 +86,7 @@ ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass +; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA Index: test/Other/new-pm-lto-defaults.ll =================================================================== --- test/Other/new-pm-lto-defaults.ll +++ test/Other/new-pm-lto-defaults.ll @@ -55,6 +55,7 @@ ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O2-NEXT: Starting llvm::Function pass manager run. ; CHECK-O2-NEXT: Running pass: InstCombinePass +; CHECK-O2-NEXT: Running pass: CallSiteSplittingPass ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass ; CHECK-O2-NEXT: Finished llvm::Function pass manager run. ; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}InlinerPass> Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -81,6 +81,7 @@ ; CHECK-O-NEXT: Starting llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis +; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA Index: test/Transforms/CallSiteSplitting/callsite-split.ll =================================================================== --- /dev/null +++ test/Transforms/CallSiteSplitting/callsite-split.ll @@ -0,0 +1,118 @@ +; RUN: opt < %s -callsite-splitting -inline -instcombine -jump-threading -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-linaro-linux-gnueabi" + +%struct.bitmap = type { i32, %struct.bitmap* } + +;CHECK-LABEL: @caller +;CHECK-LABEL: NextCond: +;CHECK: br {{.*}} label %callee.exit +;CHECK-LABEL: CallSiteBB.predBB1.split: +;CHECK: call void @callee(%struct.bitmap* null, %struct.bitmap* null, %struct.bitmap* %b_elt, i1 false) +;CHECK-LABEL: callee.exit: +;CHECK: call void @dummy2(%struct.bitmap* %a_elt) + +define void @caller(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt) { +entry: + br label %Top + +Top: + %tobool1 = icmp eq %struct.bitmap* %a_elt, null + br i1 %tobool1, label %CallSiteBB, label %NextCond + +NextCond: + %cmp = icmp ne %struct.bitmap* %b_elt, null + br i1 %cmp, label %CallSiteBB, label %End + +CallSiteBB: + %p = phi i1 [0, %Top], [%c, %NextCond] + call void @callee(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %p) + br label %End + +End: + ret void +} + +define void @callee(%struct.bitmap* %dst_elt, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, i1 %c) { +entry: + %tobool = icmp ne %struct.bitmap* %a_elt, null + %tobool1 = icmp ne %struct.bitmap* %b_elt, null + %or.cond = and i1 %tobool, %tobool1 + br i1 %or.cond, label %Cond, label %Big + +Cond: + %cmp = icmp eq %struct.bitmap* %dst_elt, %a_elt + br i1 %cmp, label %Small, label %Big + +Small: + call void @dummy2(%struct.bitmap* %a_elt) + br label %End + +Big: + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + call void @dummy1(%struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt, %struct.bitmap* %a_elt) + br label %End + +End: + ret void +} + +declare void @dummy2(%struct.bitmap*) +declare void @dummy1(%struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.bitmap*) + + +;CHECK-LABEL: @caller2 +;CHECK-LABEL: CallSiteBB.predBB1.split: +;CHECK: call void @dummy4() +;CHECK-LABEL: CallSiteBB.predBB2.split: +;CHECK: call void @dummy3() +;CheCK-LABEL: CallSiteBB: +;CHECK: %phi.call = phi i1 [ false, %CallSiteBB.predBB1.split ], [ true, %CallSiteBB.predBB2.split ] +;CHECK: call void @foo(i1 %phi.call) +define void @caller2(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, %struct.bitmap* %c_elt) { +entry: + br label %Top + +Top: + %tobool1 = icmp eq %struct.bitmap* %a_elt, %b_elt + br i1 %tobool1, label %CallSiteBB, label %NextCond + +NextCond: + %cmp = icmp ne %struct.bitmap* %b_elt, %c_elt + br i1 %cmp, label %CallSiteBB, label %End + +CallSiteBB: + %phi = phi i1 [0, %Top],[1, %NextCond] + %u = call i1 @callee2(i1 %phi) + call void @foo(i1 %u) + br label %End + +End: + ret void +} + +define i1 @callee2(i1 %b) { +entry: + br i1 %b, label %BB1, label %BB2 + +BB1: + call void @dummy3() + br label %End + +BB2: + call void @dummy4() + br label %End + +End: + ret i1 %b +} + +declare void @dummy3() +declare void @dummy4() +declare void @foo(i1)