Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -699,7 +699,7 @@ /// \param [in,out] BBList - A list in order of use to track the basic blocks. /// \returns The BasicBlock the IRSimilarityCandidate ends in. void getBasicBlocks(DenseSet &BBSet, - std::vector &BBList) const { + SmallVector &BBList) const { for (IRInstructionData &ID : *this) { BasicBlock *BB = ID.Inst->getParent(); if (BBSet.contains(BB)) Index: llvm/include/llvm/Transforms/IPO/IROutliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/IROutliner.h +++ llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -91,6 +91,10 @@ /// call. bool ChangedArgOrder = false; + /// Marks whether this region ends in a branch, there is special handling + /// required for the following basic blocks in this case. + bool EndsInBranch = false; + /// Mapping of the argument number in the deduplicated function /// to a given constant, which is used when creating the arguments to the call /// to the newly created deduplicated function. This is handled separately @@ -316,8 +320,9 @@ struct InstructionAllowed : public InstVisitor { InstructionAllowed() {} - // TODO: Determine a scheme to resolve when the label is similar enough. - bool visitBranchInst(BranchInst &BI) { return false; } + bool visitBranchInst(BranchInst &BI) { + return EnableBranches; + } // TODO: Determine a scheme to resolve when the labels are similar enough. bool visitPHINode(PHINode &PN) { return false; } // TODO: Handle allocas. @@ -356,6 +361,10 @@ // TODO: Handle interblock similarity. bool visitTerminator(Instruction &I) { return false; } bool visitInstruction(Instruction &I) { return true; } + + // The flag variable that marks whether we should allow branch instructions + // to be outlined. + bool EnableBranches = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -33,6 +33,8 @@ using namespace llvm; using namespace IRSimilarity; +extern cl::opt DisableBranches; + // Set to true if the user wants the ir outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -91,6 +93,10 @@ /// to specific arguments. DenseMap CanonicalNumberToAggArg; + /// The number of branches in the region target a basic block that is outside + /// of the region. + unsigned BranchesToOutside = 0; + /// The number of instructions that will be outlined by extracting \ref /// Regions. InstructionCost Benefit = 0; @@ -133,14 +139,26 @@ void OutlinableRegion::splitCandidate() { assert(!CandidateSplit && "Candidate already split!"); - Instruction *EndInst = (*Candidate->end()).Inst; - assert(EndInst && "Expected an end instruction?"); + Instruction *BackInst = Candidate->backInstruction(); + + Instruction *EndInst = nullptr; + // Check whether the last instruction is a terminator, if it is, we do + // not split on the following instruction. We leave the block as it is. We + // also check that this is not the last instruction in the Module, otherwise + // the check for whether the current following instruction matches the + // previously recorded instruction will be incorrect. + if (!BackInst->isTerminator() || + BackInst->getParent() != &BackInst->getFunction()->back()) { + EndInst = Candidate->end()->Inst; + assert(EndInst && "Expected an end instruction?"); + } // We check if the current instruction following the last instruction in the // region is the same as the recorded instruction following the last // instruction. If they do not match, there could be problems in rewriting // the program after outlining, so we ignore it. - if (EndInst != Candidate->backInstruction()->getNextNonDebugInstruction()) + if (!BackInst->isTerminator() && + EndInst != BackInst->getNextNonDebugInstruction()) return; Instruction *StartInst = (*Candidate->begin()).Inst; @@ -166,13 +184,20 @@ std::string OriginalName = PrevBB->getName().str(); StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline"); - - // This is the case for the inner block since we do not have to include - // multiple blocks. - EndBB = StartBB; - FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline"); + PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB); CandidateSplit = true; + if (!BackInst->isTerminator()) { + EndBB = EndInst->getParent(); + FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline"); + EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB); + FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB); + return; + } + + EndBB = BackInst->getParent(); + EndsInBranch = true; + FollowBB = nullptr; } void OutlinableRegion::reattachCandidate() { @@ -193,7 +218,6 @@ // inst3 // inst4 assert(StartBB != nullptr && "StartBB for Candidate is not defined!"); - assert(FollowBB != nullptr && "StartBB for Candidate is not defined!"); // StartBB should only have one predecessor since we put an unconditional // branch at the end of PrevBB when we split the BasicBlock. @@ -202,21 +226,24 @@ "No Predecessor for the region start basic block!"); assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); - assert(EndBB->getTerminator() && "Terminator removed from EndBB!"); PrevBB->getTerminator()->eraseFromParent(); - EndBB->getTerminator()->eraseFromParent(); moveBBContents(*StartBB, *PrevBB); BasicBlock *PlacementBB = PrevBB; if (StartBB != EndBB) PlacementBB = EndBB; - moveBBContents(*FollowBB, *PlacementBB); + if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) { + assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); + assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); + PlacementBB->getTerminator()->eraseFromParent(); + moveBBContents(*FollowBB, *PlacementBB); + PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); + FollowBB->eraseFromParent(); + } PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); - PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); StartBB->eraseFromParent(); - FollowBB->eraseFromParent(); // Make sure to save changes back to the StartBB. StartBB = PrevBB; @@ -761,10 +788,52 @@ /// \param [in] Outputs - The values found by the code extractor. static void findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region, - ArrayRef Outputs) { + SetVector &Outputs) { OutlinableGroup &Group = *Region.Parent; IRSimilarityCandidate &C = *Region.Candidate; + SmallVector BE; + DenseSet BBSet; + C.getBasicBlocks(BBSet, BE); + + // Find the exits to the region. + SmallPtrSet Exits; + for (BasicBlock *Block : BE) { + for (BasicBlock *Succ : successors(Block)) { + if (!BBSet.contains(Succ)) { + Exits.insert(Succ); + } + } + } + + // For now, we check whether we have more than one exit, if we do, we + // ignore this region. + if (Exits.size() > 1) { + Region.IgnoreRegion = true; + return; + } + + // After determining which blocks exit to PHINodes, we add these PHINodes to + // the set of outputs to be processed. We also check the incoming values of + // the PHINodes for whether they should no longer be considered outputs. + DenseSet PHIWrapped; + for (BasicBlock *ExitBB : Exits) { + for (PHINode &PN : ExitBB->phis()) { + // Find all incoming values from the outlining region. + SmallVector IncomingVals; + for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx) + if (BBSet.contains(PN.getIncomingBlock(Idx))) + IncomingVals.push_back(Idx); + + // Do not process PHI if there is one (or fewer) predecessor from region. + if (IncomingVals.size() <= 1) + continue; + + Region.IgnoreRegion = true; + return; + } + } + // This counts the argument number in the extracted function. unsigned OriginalIndex = Region.NumExtractedInputs; @@ -840,7 +909,7 @@ // Map the outputs found by the CodeExtractor to the arguments found for // the overall function. - findExtractedOutputToOverallOutputMapping(Region, Outputs.getArrayRef()); + findExtractedOutputToOverallOutputMapping(Region, Outputs); } /// Replace the extracted function in the Region with a call to the overall @@ -1357,20 +1426,20 @@ // We check if the recorded instruction matches the actual next instruction, // if it does not, we fix it in the InstructionDataList. - Instruction *RealEndInstruction = - Region.Candidate->backInstruction()->getNextNonDebugInstruction(); - - assert(RealEndInstruction && "Next instruction is a nullptr?"); - if (Region.Candidate->end()->Inst != RealEndInstruction) { - IRInstructionDataList *IDL = Region.Candidate->front()->IDL; - Instruction *NewEndInst = RealEndInstruction; - IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate()) - IRInstructionData(*NewEndInst, InstructionClassifier.visit(*NewEndInst), - *IDL); - - // Insert the first IRInstructionData of the new region after the - // last IRInstructionData of the IRSimilarityCandidate. - IDL->insert(Region.Candidate->end(), *NewEndIRID); + if (!Region.Candidate->backInstruction()->isTerminator()) { + Instruction *NewEndInst = + Region.Candidate->backInstruction()->getNextNonDebugInstruction(); + assert(NewEndInst && "Next instruction is a nullptr?"); + if (Region.Candidate->end()->Inst != NewEndInst) { + IRInstructionDataList *IDL = Region.Candidate->front()->IDL; + IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate()) + IRInstructionData(*NewEndInst, + InstructionClassifier.visit(*NewEndInst), *IDL); + + // Insert the first IRInstructionData of the new region after the + // last IRInstructionData of the IRSimilarityCandidate. + IDL->insert(Region.Candidate->end(), *NewEndIRID); + } } return none_of(*IRSC, [this](IRInstructionData &ID) { @@ -1423,9 +1492,13 @@ if (PreviouslyOutlined) continue; - // TODO: If in the future we can outline across BasicBlocks, we will need to - // check all BasicBlocks contained in the region. - if (IRSC.getStartBB()->hasAddressTaken()) + // Check over the instructions, and if the basic block has its address + // taken for use somewhere else, we do not outline that block. + bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){ + return ID.Inst->getParent()->hasAddressTaken(); + }); + + if (BBHasAddressTaken) continue; if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() && @@ -1523,10 +1596,33 @@ OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI) { InstructionCost OutputCost = 0; + unsigned NumOutputBranches = 0; + + IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; + DenseSet CandidateBlocks; + Candidate.getBasicBlocks(CandidateBlocks); + + // Count the number of different output branches that point to blocks outside + // of the region. + DenseSet FoundBlocks; + for (IRInstructionData &ID : Candidate) { + if (!isa(ID.Inst)) + continue; + + for (Value *V : ID.OperVals) { + BasicBlock *BB = static_cast(V); + DenseSet::iterator CBIt = CandidateBlocks.find(BB); + if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB)) + continue; + FoundBlocks.insert(BB); + NumOutputBranches++; + } + } + + CurrentGroup.BranchesToOutside = NumOutputBranches; for (const ArrayRef &OutputUse : CurrentGroup.OutputGVNCombinations) { - IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; for (unsigned GVN : OutputUse) { Optional OV = Candidate.fromGVN(GVN); assert(OV.hasValue() && "Could not find value for GVN?"); @@ -1541,14 +1637,14 @@ LLVM_DEBUG(dbgs() << "Adding: " << StoreCost << " instructions to cost for output of type " << *V->getType() << "\n"); - OutputCost += StoreCost; + OutputCost += StoreCost * NumOutputBranches; } InstructionCost BranchCost = TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize); LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for" << " a branch instruction\n"); - OutputCost += BranchCost; + OutputCost += BranchCost * NumOutputBranches; } // If there is more than one output scheme, we must have a comparison and @@ -1567,7 +1663,7 @@ LLVM_DEBUG(dbgs() << "Adding: " << TotalCost << " instructions for each switch case for each different" << " output path in a function\n"); - OutputCost += TotalCost; + OutputCost += TotalCost * NumOutputBranches; } return OutputCost; @@ -1655,13 +1751,12 @@ bool IROutliner::extractSection(OutlinableRegion &Region) { SetVector ArgInputs, Outputs, SinkCands; - Region.CE->findInputsOutputs(ArgInputs, Outputs, SinkCands); - assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!"); - assert(Region.FollowBB && "FollowBB for the OutlinableRegion is nullptr!"); + BasicBlock *InitialStart = Region.StartBB; Function *OrigF = Region.StartBB->getParent(); CodeExtractorAnalysisCache CEAC(*OrigF); - Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC); + Region.ExtractedFunction = + Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs); // If the extraction was successful, find the BasicBlock, and reassign the // OutlinableRegion blocks @@ -1672,7 +1767,23 @@ return false; } - BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor(); + // Get the block containing the called branch, and reassign the blocks as + // necessary. If the original block still exists, it is because we ended on + // a branch instruction, and so we move the contents into the block before + // and assign the previous block correctly. + User *InstAsUser = Region.ExtractedFunction->user_back(); + BasicBlock *RewrittenBB = cast(InstAsUser)->getParent(); + Region.PrevBB = RewrittenBB->getSinglePredecessor(); + assert(Region.PrevBB && "PrevBB is nullptr?"); + if (Region.PrevBB == InitialStart) { + BasicBlock *NewPrev = InitialStart->getSinglePredecessor(); + Instruction *BI = NewPrev->getTerminator(); + BI->eraseFromParent(); + moveBBContents(*InitialStart, *NewPrev); + Region.PrevBB = NewPrev; + InitialStart->eraseFromParent(); + } + Region.StartBB = RewrittenBB; Region.EndBB = RewrittenBB; @@ -1715,6 +1826,7 @@ unsigned IROutliner::doOutline(Module &M) { // Find the possible similarity sections. + InstructionClassifier.EnableBranches = !DisableBranches; IRSimilarityIdentifier &Identifier = getIRSI(M); SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); @@ -1773,7 +1885,9 @@ if (!OS->CandidateSplit) continue; - std::vector BE = {OS->StartBB}; + SmallVector BE; + DenseSet BBSet; + OS->Candidate->getBasicBlocks(BBSet, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); @@ -1882,7 +1996,9 @@ // Create functions out of all the sections, and mark them as outlined. OutlinedRegions.clear(); for (OutlinableRegion *OS : CurrentGroup.Regions) { - SmallVector BE = {OS->StartBB}; + SmallVector BE; + DenseSet BBSet; + OS->Candidate->getBasicBlocks(BBSet, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); Index: llvm/test/Transforms/IROutliner/opt-remarks.ll =================================================================== --- llvm/test/Transforms/IROutliner/opt-remarks.ll +++ llvm/test/Transforms/IROutliner/opt-remarks.ll @@ -5,28 +5,27 @@ ; RUN: -pass-remarks-output=%t < %s ; RUN: cat %t | FileCheck -check-prefix=YAML %s -; CHECK: remark: :0:0: did not outline 2 regions due to estimated increase of 13 instructions at locations -; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 6 instructions at locations +; CHECK: remark: :0:0: did not outline 2 regions due to estimated increase of 10 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 4 instructions at locations ; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 0 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 10 instructions at locations ; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 1 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 11 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 2 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 12 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 13 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 3 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 4 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 14 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 5 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 15 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 6 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 11 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 12 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 7 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 8 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 18 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 14 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 9 instructions at locations -; CHECK-NEXT: :0:0: outlined 2 regions with decrease of 1 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 11 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 12 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 2 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 3 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 13 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 4 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 14 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 5 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 10 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 11 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 6 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 7 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 17 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 13 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 8 instructions at locations +; CHECK-NEXT: remark: :0:0: outlined 2 regions with decrease of 2 instructions at locations ; YAML: --- !Missed ; YAML-NEXT: Pass: iroutliner @@ -36,7 +35,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '13' +; YAML-NEXT: - InstructionIncrease: '10' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -50,7 +49,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '6' +; YAML-NEXT: - InstructionIncrease: '4' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -73,26 +72,12 @@ ; YAML-NEXT: --- !Missed ; YAML-NEXT: Pass: iroutliner ; YAML-NEXT: Name: WouldNotDecreaseSize -; YAML-NEXT: Function: function3 -; YAML-NEXT: Args: -; YAML-NEXT: - String: 'did not outline ' -; YAML-NEXT: - String: '2' -; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '1' -; YAML-NEXT: - String: ' instructions at locations ' -; YAML-NEXT: - DebugLoc: '' -; YAML-NEXT: - String: ' ' -; YAML-NEXT: - DebugLoc: '' -; YAML-NEXT: ... -; YAML-NEXT: --- !Missed -; YAML-NEXT: Pass: iroutliner -; YAML-NEXT: Name: WouldNotDecreaseSize ; YAML-NEXT: Function: function1 ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '11' +; YAML-NEXT: - InstructionIncrease: '10' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -106,7 +91,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '2' +; YAML-NEXT: - InstructionIncrease: '1' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -120,7 +105,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '12' +; YAML-NEXT: - InstructionIncrease: '11' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -134,7 +119,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '13' +; YAML-NEXT: - InstructionIncrease: '12' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -148,7 +133,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '3' +; YAML-NEXT: - InstructionIncrease: '2' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -162,7 +147,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '4' +; YAML-NEXT: - InstructionIncrease: '3' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -176,7 +161,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '14' +; YAML-NEXT: - InstructionIncrease: '13' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -190,7 +175,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '5' +; YAML-NEXT: - InstructionIncrease: '4' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -204,7 +189,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '15' +; YAML-NEXT: - InstructionIncrease: '14' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -218,7 +203,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '6' +; YAML-NEXT: - InstructionIncrease: '5' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -232,7 +217,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '11' +; YAML-NEXT: - InstructionIncrease: '10' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -246,7 +231,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '12' +; YAML-NEXT: - InstructionIncrease: '11' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -260,7 +245,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '7' +; YAML-NEXT: - InstructionIncrease: '6' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -274,7 +259,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '8' +; YAML-NEXT: - InstructionIncrease: '7' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -288,7 +273,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '18' +; YAML-NEXT: - InstructionIncrease: '17' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -302,7 +287,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '14' +; YAML-NEXT: - InstructionIncrease: '13' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -316,7 +301,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '9' +; YAML-NEXT: - InstructionIncrease: '8' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -330,7 +315,7 @@ ; YAML-NEXT: - String: 'outlined ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions with decrease of ' -; YAML-NEXT: - Benefit: '1' +; YAML-NEXT: - Benefit: '2' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' Index: llvm/test/Transforms/IROutliner/outlining-across-branch.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-across-branch.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This checks that we are able to outline exactly the same branch structure +; while also outlining similar items on either side of the branch. + +define void @outline_outputs1() #0 { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %next +next: + store i32 2, i32* %output, align 4 + store i32 3, i32* %result, align 4 + ret void +} + +define void @outline_outputs2() #0 { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %next +next: + store i32 2, i32* %output, align 4 + store i32 3, i32* %result, align 4 + ret void +} +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_outputs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]]) +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: store i32 2, i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; Index: llvm/test/Transforms/IROutliner/outlining-basic-branches.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-basic-branches.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This checks that we are able to outline exactly the same structure without +; any other items to outline. + +define void @outline_outputs1() #0 { +entry: + br label %next +next: + br label %next2 +next2: + br label %next +next3: + %a = alloca i32, align 4 + br label %next4 +next4: + br label %next3 +next5: + br label %next6 +next6: + %b = alloca i32, align 4 + ret void +} + +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: call void @outlined_ir_func_0() +; CHECK-NEXT: br label [[NEXT]] +; CHECK: next3: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0() +; CHECK-NEXT: br label [[NEXT3:%.*]] +; CHECK: next5: +; CHECK-NEXT: br label [[NEXT6:%.*]] +; CHECK: next6: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK: newFuncRoot: +; CHECK-NEXT: br label [[NEXT_TO_OUTLINE:%.*]] +; CHECK: next_to_outline: +; CHECK-NEXT: br label [[NEXT2:%.*]] +; CHECK: next2: +; CHECK-NEXT: br label [[NEXT_EXITSTUB:%.*]] +; CHECK: next.exitStub: +; CHECK-NEXT: ret void +; Index: llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Here we have multiple exits, but we can't actually outline anything but +; single entry and single exits yet, we check to make sure it doesn't happen. + +define void @outline_outputs1() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_6 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_7 +block_6: + %div = udiv i32 %aval, %bval + ret void +block_7: + %sub = sub i32 %a2val, %b2val + ret void +} + +define void @outline_outputs2() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_7 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_6 +block_6: + %diff = sub i32 %a2val, %b2val + ret void +block_7: + %quot = udiv i32 %aval, %bval + ret void +} +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] +; CHECK-NEXT: br label [[BLOCK_5:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] +; CHECK-NEXT: br label [[BLOCK_4:%.*]] +; CHECK: block_4: +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4 +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_5: +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD2]], i32* [[OUTPUT]], i32 [[MUL2]], i32* [[RESULT]]) +; CHECK-NEXT: br label [[BLOCK_7:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AVAL]], [[BVAL]] +; CHECK-NEXT: ret void +; CHECK: block_7: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A2VAL]], [[B2VAL]] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_outputs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] +; CHECK-NEXT: br label [[BLOCK_5:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] +; CHECK-NEXT: br label [[BLOCK_4:%.*]] +; CHECK: block_4: +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4 +; CHECK-NEXT: br label [[BLOCK_7:%.*]] +; CHECK: block_5: +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD2]], i32* [[OUTPUT]], i32 [[MUL2]], i32* [[RESULT]]) +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL]], [[B2VAL]] +; CHECK-NEXT: ret void +; CHECK: block_7: +; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[AVAL]], [[BVAL]] +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[BLOCK_5_TO_OUTLINE:%.*]] +; CHECK: block_5_to_outline: +; CHECK-NEXT: store i32 [[TMP0:%.*]], i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]] +; CHECK: block_7.exitStub: +; CHECK-NEXT: ret void +; Index: llvm/test/Transforms/IROutliner/region-end-of-module.ll =================================================================== --- llvm/test/Transforms/IROutliner/region-end-of-module.ll +++ llvm/test/Transforms/IROutliner/region-end-of-module.ll @@ -27,9 +27,19 @@ ; CHECK: for.cond1: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 2, 1 +; CHECK-NEXT: call void @outlined_ir_func_0() ; CHECK-NEXT: br label [[FOR_COND1]] ; CHECK: for.end: -; CHECK-NEXT: [[INC3:%.*]] = add nsw i32 2, 1 +; CHECK-NEXT: call void @outlined_ir_func_0() ; CHECK-NEXT: br label [[FOR_COND1]] ; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[FOR_BODY_TO_OUTLINE:%.*]] +; CHECK: for.body_to_outline: +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 2, 1 +; CHECK-NEXT: br label [[FOR_COND1_EXITSTUB:%.*]] +; CHECK: for.cond1.exitStub: +; CHECK-NEXT: ret void +;