Index: llvm/include/llvm/Transforms/IPO/IROutliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/IROutliner.h +++ llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -91,6 +91,10 @@ /// call. bool ChangedArgOrder = false; + /// Marks whether this region ends in a branch, there is special handling + /// required for the following basic blocks in this case. + bool EndsInBranch = false; + /// Mapping of the argument number in the deduplicated function /// to a given constant, which is used when creating the arguments to the call /// to the newly created deduplicated function. This is handled separately @@ -315,8 +319,11 @@ struct InstructionAllowed : public InstVisitor { InstructionAllowed() {} - // TODO: Determine a scheme to resolve when the label is similar enough. - bool visitBranchInst(BranchInst &BI) { return false; } + bool visitBranchInst(BranchInst &BI) { + if (EnableBranches) + return true; + return false; + } // TODO: Determine a scheme to resolve when the labels are similar enough. bool visitPHINode(PHINode &PN) { return false; } // TODO: Handle allocas. @@ -355,6 +362,8 @@ // TODO: Handle interblock similarity. bool visitTerminator(Instruction &I) { return false; } bool visitInstruction(Instruction &I) { return true; } + + bool EnableBranches = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -33,6 +33,8 @@ using namespace llvm; using namespace IRSimilarity; +extern cl::opt DoNotEnableBranches; + // Set to true if the user wants the ir outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -91,6 +93,10 @@ /// to specific arguments. DenseMap CanonicalNumberToAggArg; + /// Keeping track of how many different branches outside the region the + /// regions in the group perform. + unsigned BranchesToOutside = 0; + /// The number of instructions that will be outlined by extracting \ref /// Regions. InstructionCost Benefit = 0; @@ -133,19 +139,44 @@ void OutlinableRegion::splitCandidate() { assert(!CandidateSplit && "Candidate already split!"); - Instruction *EndInst = (*Candidate->end()).Inst; - assert(EndInst && "Expected an end instruction?"); + Instruction *BackInst = Candidate->backInstruction(); + + Instruction *EndInst = nullptr; + // Check whether the last instruction is a terminator, if it is, we do + // not split on the following instruction. We leave the block as it is. We + // also check that this is not the last instruction in the Module, otherwise + // the check for whether the current following instruction matches the + // previously recorded instruction will be incorrect. + if (!BackInst->isTerminator() || + BackInst->getParent() != &BackInst->getFunction()->back()) { + EndInst = Candidate->end()->Inst; + assert(EndInst && "Expected an end instruction?"); + } - // We check if the current instruction following the last instruction in the - // region is the same as the recorded instruction following the last - // instruction, if they do not match, there could be problems in rewriting - // the program after outlining, so we ignore it. - if (EndInst != Candidate->backInstruction()->getNextNonDebugInstruction()) - return; + // If the last recorded following instruction does not match the current + // following instruction, we cannot split. + if (!BackInst->isTerminator()) + if (EndInst != BackInst->getNextNonDebugInstruction()) + return; Instruction *StartInst = (*Candidate->begin()).Inst; - assert(StartInst && EndInst && "Expected a start instruction?"); + assert(StartInst && "Expected a start instruction?"); StartBB = StartInst->getParent(); + DenseSet BBSet; + Candidate->getBasicBlocks(BBSet); + + BasicBlock::iterator It = StartInst->getIterator(); + while(PHINode *PN = dyn_cast(&*It)) { + unsigned NumPredsOutsideRegion = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!BBSet.contains(PN->getIncomingBlock(i))) + ++NumPredsOutsideRegion; + + if (NumPredsOutsideRegion > 1) + return; + + It++; + } PrevBB = StartBB; // The basic block gets split like so: @@ -166,11 +197,18 @@ std::string OriginalName = PrevBB->getName().str(); StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline"); + PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB); - // This is the case for the inner block since we do not have to include - // multiple blocks. - EndBB = StartBB; - FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline"); + if (!BackInst->isTerminator()) { + EndBB = EndInst->getParent(); + FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline"); + EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB); + FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB); + } else { + EndBB = BackInst->getParent(); + EndsInBranch = true; + FollowBB = nullptr; + } CandidateSplit = true; } @@ -193,7 +231,6 @@ // inst3 // inst4 assert(StartBB != nullptr && "StartBB for Candidate is not defined!"); - assert(FollowBB != nullptr && "StartBB for Candidate is not defined!"); // StartBB should only have one predecessor since we put an unconditional // branch at the end of PrevBB when we split the BasicBlock. @@ -202,21 +239,24 @@ "No Predecessor for the region start basic block!"); assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); - assert(EndBB->getTerminator() && "Terminator removed from EndBB!"); PrevBB->getTerminator()->eraseFromParent(); - EndBB->getTerminator()->eraseFromParent(); moveBBContents(*StartBB, *PrevBB); BasicBlock *PlacementBB = PrevBB; if (StartBB != EndBB) PlacementBB = EndBB; - moveBBContents(*FollowBB, *PlacementBB); + if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) { + assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); + assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); + PlacementBB->getTerminator()->eraseFromParent(); + moveBBContents(*FollowBB, *PlacementBB); + PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); + FollowBB->eraseFromParent(); + } PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); - PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); StartBB->eraseFromParent(); - FollowBB->eraseFromParent(); // Make sure to save changes back to the StartBB. StartBB = PrevBB; @@ -763,10 +803,53 @@ /// \param [in] Outputs - The values found by the code extractor. static void findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region, - ArrayRef Outputs) { + SetVector &Outputs) { OutlinableGroup &Group = *Region.Parent; IRSimilarityCandidate &C = *Region.Candidate; + std::vector BE; + DenseSet BBSet; + C.getBasicBlocks(BBSet, BE); + + // Find the exits to the region. + SmallPtrSet Exits; + for (BasicBlock *Block : BE) { + for (BasicBlock *Succ : successors(Block)) { + if (!BBSet.contains(Succ)) { + Exits.insert(Succ); + } + } + } + + // For now, we check whether we have more than one exit, if we do, we + // ignore this region. + if (Exits.size() > 1) { + Region.IgnoreRegion = true; + return; + } + + // After determining which blocks exit to PHINodes, we add these PHINodes to + // the set of outputs to be processed. We also check the incoming values of + // the PHINodes for whether they should no longer be considered outputs. + DenseSet PHIWrapped; + for (BasicBlock *ExitBB : Exits) { + for (PHINode &PN : ExitBB->phis()) { + // Find all incoming values from the outlining region. + SmallVector IncomingVals; + for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) + if (BBSet.contains(PN.getIncomingBlock(i))) + IncomingVals.push_back(i); + + // Do not process PHI if there is one (or fewer) predecessor from region. + if (IncomingVals.size() <= 1) + continue; + else { + Region.IgnoreRegion = true; + return; + } + } + } + // This counts the argument number in the extracted function. unsigned OriginalIndex = Region.NumExtractedInputs; @@ -842,7 +925,7 @@ // Map the outputs found by the CodeExtractor to the arguments found for // the overall function. - findExtractedOutputToOverallOutputMapping(Region, Outputs.getArrayRef()); + findExtractedOutputToOverallOutputMapping(Region, Outputs); } /// Replace the extracted function in the Region with a call to the overall @@ -1354,18 +1437,20 @@ if (Outlined.contains(Idx)) return false; - Instruction *RealEndInstruction = - Region.Candidate->backInstruction()->getNextNonDebugInstruction(); - if (Region.Candidate->end()->Inst != RealEndInstruction) { - IRInstructionDataList *IDL = Region.Candidate->front()->IDL; - Instruction *NewEndInst = RealEndInstruction; - IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate()) - IRInstructionData(*NewEndInst, InstructionClassifier.visit(*NewEndInst), - *IDL); - - // Insert the first IRInstructionData of the new region after the - // last IRInstructionData of the IRSimilarityCandidate. - IDL->insert(Region.Candidate->end(), *NewEndIRID); + if (!Region.Candidate->backInstruction()->isTerminator()) { + if (Region.Candidate->end()->Inst != + Region.Candidate->backInstruction()->getNextNonDebugInstruction()) { + IRInstructionDataList *IDL = Region.Candidate->front()->IDL; + Instruction *NewEndInst = + Region.Candidate->backInstruction()->getNextNonDebugInstruction(); + IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate()) + IRInstructionData(*NewEndInst, + InstructionClassifier.visit(*NewEndInst), *IDL); + + // Insert the first IRInstructionData of the new region after the + // last IRInstructionData of the IRSimilarityCandidate. + IDL->insert(Region.Candidate->end(), *NewEndIRID); + } } return !any_of(*IRSC, [this](IRInstructionData &ID) { @@ -1376,9 +1461,10 @@ // Since we do not have any similarity data about this particular // instruction, we cannot confidently outline it, and must discard this // candidate. - if (std::next(ID.getIterator())->Inst != - ID.Inst->getNextNonDebugInstruction()) - return true; + if (!ID.Inst->isTerminator()) + if (std::next(ID.getIterator())->Inst != + ID.Inst->getNextNonDebugInstruction()) + return true; return !this->InstructionClassifier.visit(ID.Inst); }); } @@ -1409,9 +1495,14 @@ if (PreviouslyOutlined) continue; - // TODO: If in the future we can outline across BasicBlocks, we will need to - // check all BasicBlocks contained in the region. - if (IRSC.getStartBB()->hasAddressTaken()) + bool BBHasAddressTaken = false; + for (IRInstructionData &ID : IRSC) { + if (ID.Inst->getParent()->hasAddressTaken()) { + BBHasAddressTaken = true; + break; + } + } + if (BBHasAddressTaken) continue; if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() && @@ -1431,9 +1522,10 @@ // Since we do not have any similarity data about this particular // instruction, we cannot confidently outline it, and must discard this // candidate. - if (std::next(ID.getIterator())->Inst != - ID.Inst->getNextNonDebugInstruction()) - return true; + if (!ID.Inst->isTerminator()) + if (std::next(ID.getIterator())->Inst != + ID.Inst->getNextNonDebugInstruction()) + return true; return !this->InstructionClassifier.visit(ID.Inst); }); @@ -1500,10 +1592,35 @@ OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI) { InstructionCost OutputCost = 0; + unsigned NumOutputBranches = 0; + + IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; + DenseSet CandidateBlocks; + Candidate.getBasicBlocks(CandidateBlocks); + + // Count the number of different output branches that point to blocks outside + // of the region. + DenseSet FoundBlocks; + for (IRInstructionData &ID : Candidate) { + if (!isa(ID.Inst)) + continue; + + for (Value *V : ID.OperVals) { + BasicBlock *BB = static_cast(V); + DenseSet::iterator CBIt = CandidateBlocks.find(BB); + if (CBIt != CandidateBlocks.end()) + continue; + if (FoundBlocks.contains(BB)) + continue; + FoundBlocks.insert(BB); + NumOutputBranches++; + } + } + + CurrentGroup.BranchesToOutside = NumOutputBranches; for (const ArrayRef &OutputUse : CurrentGroup.OutputGVNCombinations) { - IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; for (unsigned GVN : OutputUse) { Optional OV = Candidate.fromGVN(GVN); assert(OV.hasValue() && "Could not find value for GVN?"); @@ -1518,14 +1635,14 @@ LLVM_DEBUG(dbgs() << "Adding: " << StoreCost << " instructions to cost for output of type " << *V->getType() << "\n"); - OutputCost += StoreCost; + OutputCost += StoreCost * NumOutputBranches; } InstructionCost BranchCost = TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize); LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for" << " a branch instruction\n"); - OutputCost += BranchCost; + OutputCost += BranchCost * NumOutputBranches; } // If there is more than one output scheme, we must have a comparison and @@ -1544,7 +1661,7 @@ LLVM_DEBUG(dbgs() << "Adding: " << TotalCost << " instructions for each switch case for each different" << " output path in a function\n"); - OutputCost += TotalCost; + OutputCost += TotalCost * NumOutputBranches; } return OutputCost; @@ -1632,13 +1749,12 @@ bool IROutliner::extractSection(OutlinableRegion &Region) { SetVector ArgInputs, Outputs, SinkCands; - Region.CE->findInputsOutputs(ArgInputs, Outputs, SinkCands); - assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!"); - assert(Region.FollowBB && "FollowBB for the OutlinableRegion is nullptr!"); + BasicBlock *InitialStart = Region.StartBB; Function *OrigF = Region.StartBB->getParent(); CodeExtractorAnalysisCache CEAC(*OrigF); - Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC); + Region.ExtractedFunction = + Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs); // If the extraction was successful, find the BasicBlock, and reassign the // OutlinableRegion blocks @@ -1649,7 +1765,22 @@ return false; } - BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor(); + // Get the block containing the called branch, and reassign the blocks as + // necessary. If the original block still exists, it is because we ended on + // a branch instruction, and so we move the contents into the block before + // and assign the previous block correctly. + User *InstAsUser = Region.ExtractedFunction->user_back(); + BasicBlock *RewrittenBB = cast(InstAsUser)->getParent(); + Region.PrevBB = RewrittenBB->getSinglePredecessor(); + if (Region.PrevBB == InitialStart) { + BasicBlock *NewPrev = InitialStart->getSinglePredecessor(); + Instruction *BI = NewPrev->getTerminator(); + BI->eraseFromParent(); + moveBBContents(*InitialStart, *NewPrev); + Region.PrevBB = NewPrev; + InitialStart->eraseFromParent(); + } + Region.StartBB = RewrittenBB; Region.EndBB = RewrittenBB; @@ -1692,6 +1823,7 @@ unsigned IROutliner::doOutline(Module &M) { // Find the possible similarity sections. + InstructionClassifier.EnableBranches = !DoNotEnableBranches; IRSimilarityIdentifier &Identifier = getIRSI(M); SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); @@ -1750,7 +1882,9 @@ if (!OS->CandidateSplit) continue; - std::vector BE = {OS->StartBB}; + std::vector BE; + DenseSet BBSet; + OS->Candidate->getBasicBlocks(BBSet, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); @@ -1854,7 +1988,9 @@ // Create functions out of all the sections, and mark them as outlined. OutlinedRegions.clear(); for (OutlinableRegion *OS : CurrentGroup.Regions) { - std::vector BE = {OS->StartBB}; + std::vector BE; + DenseSet BBSet; + OS->Candidate->getBasicBlocks(BBSet, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); Index: llvm/test/Transforms/IROutliner/opt-remarks.ll =================================================================== --- llvm/test/Transforms/IROutliner/opt-remarks.ll +++ llvm/test/Transforms/IROutliner/opt-remarks.ll @@ -5,28 +5,27 @@ ; RUN: -pass-remarks-output=%t < %s ; RUN: cat %t | FileCheck -check-prefix=YAML %s -; CHECK: remark: :0:0: did not outline 2 regions due to estimated increase of 13 instructions at locations -; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 6 instructions at locations +; CHECK: remark: :0:0: did not outline 2 regions due to estimated increase of 10 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 4 instructions at locations ; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 0 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 10 instructions at locations ; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 1 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 11 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 2 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 12 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 13 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 3 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 4 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 14 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 5 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 15 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 6 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 11 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 12 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 7 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 8 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 18 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 14 instructions at locations -; CHECK-NEXT: :0:0: did not outline 2 regions due to estimated increase of 9 instructions at locations -; CHECK-NEXT: :0:0: outlined 2 regions with decrease of 1 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 11 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 12 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 2 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 3 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 13 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 4 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 14 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 5 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 10 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 11 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 6 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 7 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 17 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 13 instructions at locations +; CHECK-NEXT: remark: :0:0: did not outline 2 regions due to estimated increase of 8 instructions at locations +; CHECK-NEXT: remark: :0:0: outlined 2 regions with decrease of 2 instructions at locations ; YAML: --- !Missed ; YAML-NEXT: Pass: iroutliner @@ -36,7 +35,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '13' +; YAML-NEXT: - InstructionIncrease: '10' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -50,7 +49,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '6' +; YAML-NEXT: - InstructionIncrease: '4' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -73,26 +72,12 @@ ; YAML-NEXT: --- !Missed ; YAML-NEXT: Pass: iroutliner ; YAML-NEXT: Name: WouldNotDecreaseSize -; YAML-NEXT: Function: function3 -; YAML-NEXT: Args: -; YAML-NEXT: - String: 'did not outline ' -; YAML-NEXT: - String: '2' -; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '1' -; YAML-NEXT: - String: ' instructions at locations ' -; YAML-NEXT: - DebugLoc: '' -; YAML-NEXT: - String: ' ' -; YAML-NEXT: - DebugLoc: '' -; YAML-NEXT: ... -; YAML-NEXT: --- !Missed -; YAML-NEXT: Pass: iroutliner -; YAML-NEXT: Name: WouldNotDecreaseSize ; YAML-NEXT: Function: function1 ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '11' +; YAML-NEXT: - InstructionIncrease: '10' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -106,7 +91,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '2' +; YAML-NEXT: - InstructionIncrease: '1' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -120,7 +105,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '12' +; YAML-NEXT: - InstructionIncrease: '11' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -134,7 +119,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '13' +; YAML-NEXT: - InstructionIncrease: '12' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -148,7 +133,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '3' +; YAML-NEXT: - InstructionIncrease: '2' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -162,7 +147,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '4' +; YAML-NEXT: - InstructionIncrease: '3' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -176,7 +161,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '14' +; YAML-NEXT: - InstructionIncrease: '13' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -190,7 +175,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '5' +; YAML-NEXT: - InstructionIncrease: '4' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -204,7 +189,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '15' +; YAML-NEXT: - InstructionIncrease: '14' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -218,7 +203,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '6' +; YAML-NEXT: - InstructionIncrease: '5' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -232,7 +217,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '11' +; YAML-NEXT: - InstructionIncrease: '10' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -246,7 +231,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '12' +; YAML-NEXT: - InstructionIncrease: '11' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -260,7 +245,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '7' +; YAML-NEXT: - InstructionIncrease: '6' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -274,7 +259,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '8' +; YAML-NEXT: - InstructionIncrease: '7' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -288,7 +273,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '18' +; YAML-NEXT: - InstructionIncrease: '17' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -302,7 +287,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '14' +; YAML-NEXT: - InstructionIncrease: '13' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -316,7 +301,7 @@ ; YAML-NEXT: - String: 'did not outline ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions due to estimated increase of ' -; YAML-NEXT: - InstructionIncrease: '9' +; YAML-NEXT: - InstructionIncrease: '8' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' @@ -330,7 +315,7 @@ ; YAML-NEXT: - String: 'outlined ' ; YAML-NEXT: - String: '2' ; YAML-NEXT: - String: ' regions with decrease of ' -; YAML-NEXT: - Benefit: '1' +; YAML-NEXT: - Benefit: '2' ; YAML-NEXT: - String: ' instructions at locations ' ; YAML-NEXT: - DebugLoc: '' ; YAML-NEXT: - String: ' ' Index: llvm/test/Transforms/IROutliner/outlining-across-branch.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-across-branch.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This checks that we are able to outline exactly the same branch structure +; while also outlining similar items on either side of the branch. + +define void @outline_outputs1() #0 { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %next +next: + store i32 2, i32* %output, align 4 + store i32 3, i32* %result, align 4 + ret void +} + +define void @outline_outputs2() #0 { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %next +next: + store i32 2, i32* %output, align 4 + store i32 3, i32* %result, align 4 + ret void +} +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_outputs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]]) +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: store i32 2, i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; Index: llvm/test/Transforms/IROutliner/outlining-basic-branches.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-basic-branches.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This checks that we are able to outline exactly the same structure without +; any other items to outline. + +define void @outline_outputs1() #0 { +entry: + br label %next +next: + br label %next2 +next2: + br label %next +next3: + %a = alloca i32, align 4 + br label %next4 +next4: + br label %next3 +next5: + br label %next6 +next6: + %b = alloca i32, align 4 + ret void +} + +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: call void @outlined_ir_func_0() +; CHECK-NEXT: br label [[NEXT]] +; CHECK: next3: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0() +; CHECK-NEXT: br label [[NEXT3:%.*]] +; CHECK: next5: +; CHECK-NEXT: br label [[NEXT6:%.*]] +; CHECK: next6: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK: newFuncRoot: +; CHECK-NEXT: br label [[NEXT_TO_OUTLINE:%.*]] +; CHECK: next.exitStub: +; CHECK-NEXT: ret void +; CHECK: next_to_outline: +; CHECK-NEXT: br label [[NEXT2:%.*]] +; CHECK: next2: +; CHECK-NEXT: br label [[NEXT_EXITSTUB:%.*]] +; Index: llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Here we have multiple exits, but we can't actually outline anything but +; single entry and single exits yet, we check to make sure it doesn't happen. + +define void @outline_outputs1() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_6 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_7 +block_6: + %div = udiv i32 %aval, %bval + ret void +block_7: + %sub = sub i32 %a2val, %b2val + ret void +} + +define void @outline_outputs2() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_7 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_6 +block_6: + %diff = sub i32 %a2val, %b2val + ret void +block_7: + %quot = udiv i32 %aval, %bval + ret void +} +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] +; CHECK-NEXT: br label [[BLOCK_5:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] +; CHECK-NEXT: br label [[BLOCK_4:%.*]] +; CHECK: block_4: +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4 +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_5: +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD2]], i32* [[OUTPUT]], i32 [[MUL2]], i32* [[RESULT]]) +; CHECK-NEXT: br label [[BLOCK_7:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AVAL]], [[BVAL]] +; CHECK-NEXT: ret void +; CHECK: block_7: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A2VAL]], [[B2VAL]] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_outputs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] +; CHECK-NEXT: br label [[BLOCK_5:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] +; CHECK-NEXT: br label [[BLOCK_4:%.*]] +; CHECK: block_4: +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4 +; CHECK-NEXT: br label [[BLOCK_7:%.*]] +; CHECK: block_5: +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD2]], i32* [[OUTPUT]], i32 [[MUL2]], i32* [[RESULT]]) +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL]], [[B2VAL]] +; CHECK-NEXT: ret void +; CHECK: block_7: +; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[AVAL]], [[BVAL]] +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[BLOCK_5_TO_OUTLINE:%.*]] +; CHECK: block_7.exitStub: +; CHECK-NEXT: ret void +; CHECK: block_5_to_outline: +; CHECK-NEXT: store i32 [[TMP0:%.*]], i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]] +;