Index: llvm/include/llvm/Analysis/IRSimilarityIdentifier.h =================================================================== --- llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -681,6 +681,7 @@ /// \returns The BasicBlock the IRSimilarityCandidate ends in. BasicBlock *getEndBB() { return LastInst->Inst->getParent(); } + /// \returns The Function that the IRSimilarityCandidate is located in. Function *getFunction() { return getStartBB()->getParent(); } @@ -732,7 +733,7 @@ using iterator = IRInstructionDataList::iterator; iterator begin() const { return iterator(front()); } iterator end() const { return std::next(iterator(back())); } -}; + }; typedef DenseMap>> Index: llvm/include/llvm/Transforms/IPO/IROutliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/IROutliner.h +++ llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -90,6 +90,9 @@ /// call. bool ChangedArgOrder = false; + /// Marks whether this region ends in a branch, there is special handling + /// required for the following basic blocks in this case. + bool EndsInBranch = false; /// Mapping of the argument number in the deduplicated function /// to a given constant, which is used when creating the arguments to the call /// to the newly created deduplicated function. This is handled separately @@ -182,6 +185,10 @@ /// The return block for the overall function. BasicBlock *EndBB = nullptr; + /// Keeping track of how many different branches outside the region the + /// regions in the group perform. + unsigned BranchesToOutside = 0; + /// A set containing the different GVN store sets needed. Each array contains /// a sorted list of the different values that need to be stored into output /// registers. @@ -386,10 +393,13 @@ struct InstructionAllowed : public InstVisitor { InstructionAllowed() {} - // TODO: Determine a scheme to resolve when the label is similar enough. - bool visitBranchInst(BranchInst &BI) { return false; } + bool visitBranchInst(BranchInst &BI) { + if (EnableBranches) + return true; + return false; + } // TODO: Determine a scheme to resolve when the labels are similar enough. - bool visitPHINode(PHINode &PN) { return false; } + bool visitPHINode(PHINode &PN) { return true; } // TODO: Handle allocas. bool visitAllocaInst(AllocaInst &AI) { return false; } // VAArg instructions are not allowed since this could cause difficulty when @@ -426,6 +436,8 @@ // TODO: Handle interblock similarity. bool visitTerminator(Instruction &I) { return false; } bool visitInstruction(Instruction &I) { return true; } + + bool EnableBranches = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -205,7 +205,7 @@ if (HaveLegalRange) { if (AddedIllegalLastTime) - mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); + mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); for (IRInstructionData *ID : InstrListForBB) this->IDL->push_back(*ID); llvm::append_range(InstrList, InstrListForBB); Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -33,6 +33,8 @@ using namespace llvm; using namespace IRSimilarity; +extern cl::opt EnableBranches; + // Set to true if the user wants the ir outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -66,15 +68,44 @@ void OutlinableRegion::splitCandidate() { assert(!CandidateSplit && "Candidate already split!"); + Instruction *BackInst = Candidate->backInstruction(); + + Instruction *EndInst = nullptr; + // Check whether the last instruction is a terminator, if it is, we do + // not split on the following instruction. We leave the block as it is. We + // also check that this is not the last instruction in the Module, otherwise + // the check for whether the current following instruction matches the + // previously recorded instruction will be incorrect. + if (!BackInst->isTerminator() || + BackInst->getParent() != &BackInst->getFunction()->back()) { + EndInst = Candidate->end()->Inst; + assert(EndInst && "Expected an end instruction?"); + } - if (Candidate->end()->Inst != - Candidate->backInstruction()->getNextNonDebugInstruction()) - return; + // If the last recorded following instruction does not match the current + // following instruction, we cannot split. + if (!BackInst->isTerminator()) + if (EndInst != BackInst->getNextNonDebugInstruction()) + return; Instruction *StartInst = (*Candidate->begin()).Inst; - Instruction *EndInst = (*Candidate->end()).Inst; - assert(StartInst && EndInst && "Expected a start and end instruction?"); + assert(StartInst && "Expected a start instruction?"); StartBB = StartInst->getParent(); + DenseSet BBSet; + Candidate->getBasicBlocks(BBSet); + + BasicBlock::iterator It = StartInst->getIterator(); + while(PHINode *PN = dyn_cast(&*It)) { + unsigned NumPredsOutsideRegion = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (!BBSet.contains(PN->getIncomingBlock(i))) + ++NumPredsOutsideRegion; + + if (NumPredsOutsideRegion > 1) + return; + + It++; + } PrevBB = StartBB; // The basic block gets split like so: @@ -95,11 +126,18 @@ std::string OriginalName = PrevBB->getName().str(); StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline"); + PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB); - // This is the case for the inner block since we do not have to include - // multiple blocks. - EndBB = StartBB; - FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline"); + if (!BackInst->isTerminator()) { + EndBB = EndInst->getParent(); + FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline"); + EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB); + FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB); + } else { + EndBB = BackInst->getParent(); + EndsInBranch = true; + FollowBB = nullptr; + } CandidateSplit = true; } @@ -122,7 +160,6 @@ // inst3 // inst4 assert(StartBB != nullptr && "StartBB for Candidate is not defined!"); - assert(FollowBB != nullptr && "StartBB for Candidate is not defined!"); // StartBB should only have one predecessor since we put an unconditional // branch at the end of PrevBB when we split the BasicBlock. @@ -131,21 +168,24 @@ "No Predecessor for the region start basic block!"); assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!"); - assert(EndBB->getTerminator() && "Terminator removed from EndBB!"); PrevBB->getTerminator()->eraseFromParent(); - EndBB->getTerminator()->eraseFromParent(); moveBBContents(*StartBB, *PrevBB); BasicBlock *PlacementBB = PrevBB; if (StartBB != EndBB) PlacementBB = EndBB; - moveBBContents(*FollowBB, *PlacementBB); + if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) { + assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!"); + assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!"); + PlacementBB->getTerminator()->eraseFromParent(); + moveBBContents(*FollowBB, *PlacementBB); + PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); + FollowBB->eraseFromParent(); + } PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB); - PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB); StartBB->eraseFromParent(); - FollowBB->eraseFromParent(); // Make sure to save changes back to the StartBB. StartBB = PrevBB; @@ -203,19 +243,24 @@ // division instruction for targets that have a native division instruction. // To be overly conservative, we only add 1 to the number of instructions for // each division instruction. - for (Instruction &I : *StartBB) { - switch (I.getOpcode()) { - case Instruction::FDiv: - case Instruction::FRem: - case Instruction::SDiv: - case Instruction::SRem: - case Instruction::UDiv: - case Instruction::URem: - Benefit += 1; - break; - default: - Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); - break; + DenseSet CandidateBlocks; + Candidate->getBasicBlocks(CandidateBlocks); + + for (BasicBlock *BB : CandidateBlocks) { + for (Instruction &I : *BB) { + switch (I.getOpcode()) { + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::UDiv: + case Instruction::URem: + Benefit += 1; + break; + default: + Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); + break; + } } } @@ -688,10 +733,53 @@ /// \param [in] Outputs - The values found by the code extractor. static void findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region, - ArrayRef Outputs) { + SetVector &Outputs) { OutlinableGroup &Group = *Region.Parent; IRSimilarityCandidate &C = *Region.Candidate; + std::vector BE; + DenseSet BBSet; + C.getBasicBlocks(BBSet, BE); + + // Find the exits to the region. + SmallPtrSet Exits; + for (BasicBlock *Block : BE) { + for (BasicBlock *Succ : successors(Block)) { + if (!BBSet.contains(Succ)) { + Exits.insert(Succ); + } + } + } + + // For now, we check whether we have more than one exit, if we do, we + // ignore this region. + if (Exits.size() > 1) { + Region.IgnoreRegion = true; + return; + } + + // After determining which blocks exit to PHINodes, we add these PHINodes to + // the set of outputs to be processed. We also check the incoming values of + // the PHINodes for whether they should no longer be considered outputs. + DenseSet PHIWrapped; + for (BasicBlock *ExitBB : Exits) { + for (PHINode &PN : ExitBB->phis()) { + // Find all incoming values from the outlining region. + SmallVector IncomingVals; + for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) + if (BBSet.contains(PN.getIncomingBlock(i))) + IncomingVals.push_back(i); + + // Do not process PHI if there is one (or fewer) predecessor from region. + if (IncomingVals.size() <= 1) + continue; + else { + Region.IgnoreRegion = true; + return; + } + } + } + // This counts the argument number in the extracted function. unsigned OriginalIndex = Region.NumExtractedInputs; @@ -767,7 +855,7 @@ // Map the outputs found by the CodeExtractor to the arguments found for // the overall function. - findExtractedOutputToOverallOutputMapping(Region, Outputs.getArrayRef()); + findExtractedOutputToOverallOutputMapping(Region, Outputs); } /// Replace the extracted function in the Region with a call to the overall @@ -1280,19 +1368,21 @@ return false; } - if (Region.Candidate->end()->Inst != - Region.Candidate->backInstruction()->getNextNonDebugInstruction()) { - IRInstructionDataList *IDL = Region.Candidate->front()->IDL; - Instruction *NewEndInst = - Region.Candidate->backInstruction()->getNextNonDebugInstruction(); - IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate()) - IRInstructionData(*NewEndInst, - InstructionClassifier.visit(*NewEndInst), *IDL); - - // Insert the first IRInstructionData of the new region after the - // last IRInstructionData of the IRSimilarityCandidate. - IDL->insert(Region.Candidate->end(), *NewEndIRID); - } + if (!Region.Candidate->backInstruction()->isTerminator()) { + if (Region.Candidate->end()->Inst != + Region.Candidate->backInstruction()->getNextNonDebugInstruction()) { + IRInstructionDataList *IDL = Region.Candidate->front()->IDL; + Instruction *NewEndInst = + Region.Candidate->backInstruction()->getNextNonDebugInstruction(); + IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate()) + IRInstructionData(*NewEndInst, + InstructionClassifier.visit(*NewEndInst), *IDL); + + // Insert the first IRInstructionData of the new region after the + // last IRInstructionData of the IRSimilarityCandidate. + IDL->insert(Region.Candidate->end(), *NewEndIRID); + } + } bool BadInst = any_of(*IRSC, [this](IRInstructionData &ID) { // We check if there is a discrepancy between the InstructionDataList @@ -1302,9 +1392,10 @@ // Since we do not have any similarity data about this particular // instruction, we cannot confidently outline it, and must discard this // candidate. - if (std::next(ID.getIterator())->Inst != - ID.Inst->getNextNonDebugInstruction()) - return true; + if (!ID.Inst->isTerminator()) + if (std::next(ID.getIterator())->Inst != + ID.Inst->getNextNonDebugInstruction()) + return true; return !this->InstructionClassifier.visit(ID.Inst); }); @@ -1337,9 +1428,14 @@ if (PreviouslyOutlined) continue; - // TODO: If in the future we can outline across BasicBlocks, we will need to - // check all BasicBlocks contained in the region. - if (IRSC.getStartBB()->hasAddressTaken()) + bool BBHasAddressTaken = false; + for (IRInstructionData &ID : IRSC) { + if (ID.Inst->getParent()->hasAddressTaken()) { + BBHasAddressTaken = true; + break; + } + } + if (BBHasAddressTaken) continue; if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() && @@ -1359,9 +1455,10 @@ // Since we do not have any similarity data about this particular // instruction, we cannot confidently outline it, and must discard this // candidate. - if (std::next(ID.getIterator())->Inst != - ID.Inst->getNextNonDebugInstruction()) - return true; + if (!ID.Inst->isTerminator()) + if (std::next(ID.getIterator())->Inst != + ID.Inst->getNextNonDebugInstruction()) + return true; return !this->InstructionClassifier.visit(ID.Inst); }); @@ -1428,10 +1525,32 @@ OutlinableGroup &CurrentGroup, TargetTransformInfo &TTI) { InstructionCost OutputCost = 0; + unsigned NumOutputBranches = 0; + + IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; + DenseSet CandidateBlocks; + Candidate.getBasicBlocks(CandidateBlocks); + + for (IRInstructionData &ID : Candidate) { + if (!isa(ID.Inst)) + continue; + + for (Value *V : ID.OperVals) { + BasicBlock *BB = static_cast(V); + DenseSet::iterator CBIt = CandidateBlocks.find(BB); + if (CBIt != CandidateBlocks.end()) + continue; + NumOutputBranches++; + } + } + + if (NumOutputBranches == 0) + NumOutputBranches++; + + CurrentGroup.BranchesToOutside = NumOutputBranches; for (const ArrayRef &OutputUse : CurrentGroup.OutputGVNCombinations) { - IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; for (unsigned GVN : OutputUse) { Optional OV = Candidate.fromGVN(GVN); assert(OV.hasValue() && "Could not find value for GVN?"); @@ -1446,14 +1565,14 @@ LLVM_DEBUG(dbgs() << "Adding: " << StoreCost << " instructions to cost for output of type " << *V->getType() << "\n"); - OutputCost += StoreCost; + OutputCost += StoreCost * NumOutputBranches; } InstructionCost BranchCost = TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize); LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for" << " a branch instruction\n"); - OutputCost += BranchCost; + OutputCost += BranchCost * NumOutputBranches; } // If there is more than one output scheme, we must have a comparison and @@ -1472,7 +1591,7 @@ LLVM_DEBUG(dbgs() << "Adding: " << TotalCost << " instructions for each switch case for each different" << " output path in a function\n"); - OutputCost += TotalCost; + OutputCost += TotalCost * NumOutputBranches; } return OutputCost; @@ -1560,13 +1679,12 @@ bool IROutliner::extractSection(OutlinableRegion &Region) { SetVector ArgInputs, Outputs, SinkCands; - Region.CE->findInputsOutputs(ArgInputs, Outputs, SinkCands); - assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!"); - assert(Region.FollowBB && "FollowBB for the OutlinableRegion is nullptr!"); + BasicBlock *InitialStart = Region.StartBB; Function *OrigF = Region.StartBB->getParent(); CodeExtractorAnalysisCache CEAC(*OrigF); - Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC); + Region.ExtractedFunction = + Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs); // If the extraction was successful, find the BasicBlock, and reassign the // OutlinableRegion blocks @@ -1577,7 +1695,18 @@ return false; } - BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor(); + User *InstAsUser = Region.ExtractedFunction->user_back(); + BasicBlock *RewrittenBB = cast(InstAsUser)->getParent(); + Region.PrevBB = RewrittenBB->getSinglePredecessor(); + if (Region.PrevBB == InitialStart) { + BasicBlock *NewPrev = InitialStart->getSinglePredecessor(); + Instruction *BI = NewPrev->getTerminator(); + BI->eraseFromParent(); + moveBBContents(*InitialStart, *NewPrev); + Region.PrevBB = NewPrev; + InitialStart->eraseFromParent(); + } + Region.StartBB = RewrittenBB; Region.EndBB = RewrittenBB; @@ -1620,6 +1749,7 @@ unsigned IROutliner::doOutline(Module &M) { // Find the possible similarity sections. + InstructionClassifier.EnableBranches = EnableBranches; IRSimilarityIdentifier &Identifier = getIRSI(M); SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); @@ -1671,7 +1801,9 @@ if (!OS->CandidateSplit) continue; - std::vector BE = {OS->StartBB}; + std::vector BE; + DenseSet BBSet; + OS->Candidate->getBasicBlocks(BBSet, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); @@ -1762,7 +1894,9 @@ // Create functions out of all the sections, and mark them as outlined. OutlinedRegions.clear(); for (OutlinableRegion *OS : CurrentGroup.Regions) { - std::vector BE = {OS->StartBB}; + std::vector BE; + DenseSet BBSet; + OS->Candidate->getBasicBlocks(BBSet, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); Index: llvm/test/Transforms/IROutliner/outlining-across-branch.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-across-branch.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -ir-sim-branches -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This checks that we are able to outline exactly the same branch structure +; while also outlining similar items on either side of the branch. + +define void @outline_outputs1() #0 { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %next +next: + store i32 2, i32* %output, align 4 + store i32 3, i32* %result, align 4 + ret void +} + +define void @outline_outputs2() #0 { +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %next +next: + store i32 2, i32* %output, align 4 + store i32 3, i32* %result, align 4 + ret void +} +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_outputs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]]) +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: store i32 2, i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; Index: llvm/test/Transforms/IROutliner/outlining-basic-branches.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-basic-branches.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -ir-sim-branches -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This checks that we are able to outline exactly the same structure without +; any other items to outline. + +define void @outline_outputs1() #0 { +entry: + br label %next +next: + br label %next2 +next2: + br label %next +next3: + %a = alloca i32, align 4 + br label %next4 +next4: + br label %next3 +next5: + br label %next6 +next6: + %b = alloca i32, align 4 + ret void +} + +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: call void @outlined_ir_func_0() +; CHECK-NEXT: br label [[NEXT]] +; CHECK: next3: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0() +; CHECK-NEXT: br label [[NEXT3:%.*]] +; CHECK: next5: +; CHECK-NEXT: br label [[NEXT6:%.*]] +; CHECK: next6: +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK: newFuncRoot: +; CHECK-NEXT: br label [[NEXT_TO_OUTLINE:%.*]] +; CHECK: next.exitStub: +; CHECK-NEXT: ret void +; CHECK: next_to_outline: +; CHECK-NEXT: br label [[NEXT2:%.*]] +; CHECK: next2: +; CHECK-NEXT: br label [[NEXT_EXITSTUB:%.*]] +;