Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" @@ -73,8 +74,12 @@ /// for extraction. bool IgnoreGroup = false; - /// The return block for the overall function. - BasicBlock *EndBB = nullptr; + /// The return blocks for the overall function. + DenseMap EndBBs; + + /// The PHIBlocks with their corresponding return block based on the return + /// value as the key. + DenseMap PHIBlocks; /// A set containing the different GVN store sets needed. Each array contains /// a sorted list of the different values that need to be stored into output @@ -136,6 +141,29 @@ } } +/// A function to sort the keys of \p Map, if it contains constants, and +/// return it in \p SortedKeys +/// +/// \param SortedKeys - The vector the keys will be return in and sorted. +/// \param Map - The DenseMap containing keys to sort. +static void getSortedConstantKeys(std::vector &SortedKeys, + DenseMap &Map) { + for (std::pair &VtoBB : Map) + SortedKeys.push_back(VtoBB.first); + + if (SortedKeys.size() > 1) + stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) { + const ConstantInt *LHSC = dyn_cast(LHS); + const ConstantInt *RHSC = dyn_cast(RHS); + assert(RHSC && "Not a constant integer in return value?"); + assert(LHSC && "Not a constant integer in return value?"); + const uint64_t LHSVal = LHSC->getLimitedValue(); + const uint64_t RHSVal = RHSC->getLimitedValue(); + + return LHSVal < RHSVal; + }); +} + void OutlinableRegion::splitCandidate() { assert(!CandidateSplit && "Candidate already split!"); @@ -430,8 +458,25 @@ unsigned FunctionNameSuffix) { assert(!Group.OutlinedFunction && "Function is already defined!"); + Type *RetTy = Type::getVoidTy(M.getContext()); + // All extracted functions _should_ have the same return type at this point. + // since the similarity identifier ensures that all branches outside of the + // region occur in the same place. + + // NOTE: Should we ever move to the model that uses a switch at every point + // needed, meaning that we could branch within the region or out, it is + // possible that we will need to switch to using the most general case all of + // the time. + for (OutlinableRegion *R : Group.Regions) { + Type *ExtractedFuncType = R->ExtractedFunction->getReturnType(); + if (RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) + RetTy = ExtractedFuncType; + else if (RetTy->isIntegerTy(1) && RetTy->isIntegerTy(16)) + RetTy = ExtractedFuncType; + } + Group.OutlinedFunctionType = FunctionType::get( - Type::getVoidTy(M.getContext()), Group.ArgumentTypes, false); + RetTy, Group.ArgumentTypes, false); // These functions will only be called from within the same module, so // we can set an internal linkage. @@ -487,19 +532,23 @@ /// /// \param [in] Old - The function to move the basic blocks from. /// \param [in] New - The function to move the basic blocks to. +/// \param [out] NewEnds - The return blocks of the new overall function. /// \returns the first return block for the function in New. -static BasicBlock *moveFunctionData(Function &Old, Function &New) { +static void moveFunctionData(Function &Old, Function &New, + DenseMap &NewEnds) { Function::iterator CurrBB, NextBB, FinalBB; - BasicBlock *NewEnd = nullptr; std::vector DebugInsts; for (CurrBB = Old.begin(), FinalBB = Old.end(); CurrBB != FinalBB; CurrBB = NextBB) { + DebugInsts.clear(); NextBB = std::next(CurrBB); CurrBB->removeFromParent(); CurrBB->insertInto(&New); Instruction *I = CurrBB->getTerminator(); - if (isa(I)) - NewEnd = &(*CurrBB); + if (ReturnInst *RI = dyn_cast(I)) { + Value *RetVal = RI->getReturnValue(); + NewEnds.insert(std::make_pair(RetVal, &(*CurrBB))); + } for (Instruction &Val : *CurrBB) { // We must handle the scoping of called functions differently than @@ -533,8 +582,7 @@ I->eraseFromParent(); } - assert(NewEnd && "No return instruction for new function?"); - return NewEnd; + assert(NewEnds.size() > 0 && "No return instruction for new function?"); } /// Find the the constants that will need to be lifted into arguments @@ -821,17 +869,9 @@ } } - // For now, we check whether we have more than one exit, if we do, we - // ignore this region. - if (Exits.size() > 1) { - Region.IgnoreRegion = true; - return; - } - // After determining which blocks exit to PHINodes, we add these PHINodes to // the set of outputs to be processed. We also check the incoming values of // the PHINodes for whether they should no longer be considered outputs. - DenseSet PHIWrapped; for (BasicBlock *ExitBB : Exits) { for (PHINode &PN : ExitBB->phis()) { // Find all incoming values from the outlining region. @@ -1024,6 +1064,9 @@ // Transfer any debug information. Call->setDebugLoc(Region.Call->getDebugLoc()); + // Since our output may determine which branch we go to, we make sure to + // propogate this new call value through the module. + OldCall->replaceAllUsesWith(Call); // Remove the old instruction. OldCall->eraseFromParent(); @@ -1044,11 +1087,21 @@ /// \param [in] Region - The region of extracted code to be changed. /// \param [in,out] OutputBB - The BasicBlock for the output stores for this /// region. -static void replaceArgumentUses(OutlinableRegion &Region, - BasicBlock *OutputBB) { +/// \param [in] FirstFunction - A flag to indicate whether we are using this +/// function to define the overall outlined function for all the regions, or +/// if we are operating on one of the following regions. +static void +replaceArgumentUses(OutlinableRegion &Region, + DenseMap &OutputBBs, + bool FirstFunction = false) { OutlinableGroup &Group = *Region.Parent; assert(Region.ExtractedFunction && "Region has no extracted function?"); + Function *DominatingFunction = Region.ExtractedFunction; + if (FirstFunction) + DominatingFunction = Group.OutlinedFunction; + DominatorTree DT(*DominatingFunction); + for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size(); ArgIdx++) { assert(Region.ExtractedArgToAgg.find(ArgIdx) != @@ -1075,11 +1128,42 @@ assert(InstAsUser && "User is nullptr!"); Instruction *I = cast(InstAsUser); - I->setDebugLoc(DebugLoc()); - LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to " - << *OutputBB << "\n"); + BasicBlock *BB = I->getParent(); + SmallVector Descendants; + DT.getDescendants(BB, Descendants); + bool EdgeAdded = false; + if (Descendants.size() == 0) { + EdgeAdded = true; + DT.insertEdge(&DominatingFunction->getEntryBlock(), BB); + DT.getDescendants(BB, Descendants); + } - I->moveBefore(*OutputBB, OutputBB->end()); + // Iterate over the following blocks, looking for return instructions, + // if we find one, find the corresponding output block for the return value + // and move our store instruction there. + for (BasicBlock *DescendBB : Descendants) { + Instruction *Term = DescendBB->getTerminator(); + if (!isa(Term)) + continue; + ReturnInst *RI = cast(Term); + Value *RetVal = RI->getReturnValue(); + DenseMap::iterator VBBIt, PHIVBBIt; + VBBIt = OutputBBs.find(RetVal); + assert(VBBIt != OutputBBs.end() && "Could not find output value!"); + + Instruction *NewI = I->clone(); + NewI->setDebugLoc(DebugLoc()); + BasicBlock *OutputBB = VBBIt->second; + OutputBB->getInstList().push_back(NewI); + LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to " + << *OutputBB << "\n"); + } + + // If we added an edge for basic blocks without a predecessor, we remove it + // here. + if (EdgeAdded) + DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB); + I->eraseFromParent(); LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function " << *Region.ExtractedFunction << " with " << *AggArg @@ -1153,34 +1237,48 @@ /// \param OutputBB [in] the block we are looking for a duplicate of. /// \param OutputStoreBBs [in] The existing output blocks. /// \returns an optional value with the number output block if there is a match. -Optional -findDuplicateOutputBlock(BasicBlock *OutputBB, - ArrayRef OutputStoreBBs) { +Optional findDuplicateOutputBlock( + DenseMap &OutputBBs, + std::vector> &OutputStoreBBs) { bool WrongInst = false; bool WrongSize = false; unsigned MatchingNum = 0; - for (BasicBlock *CompBB : OutputStoreBBs) { + // We compare the new set output blocks to the other sets of outut blocks. + // If they are the same number, and have identical instructions, they are + // considered to be the same. + for (DenseMap &CompBBs : OutputStoreBBs) { WrongInst = false; - if (CompBB->size() - 1 != OutputBB->size()) { - WrongSize = true; - MatchingNum++; - continue; - } - WrongSize = false; - BasicBlock::iterator NIt = OutputBB->begin(); - for (Instruction &I : *CompBB) { - if (isa(&I)) - continue; + for (std::pair &VToB : CompBBs) { + DenseMap::iterator OutputBBIt = + OutputBBs.find(VToB.first); + if (OutputBBIt == OutputBBs.end()) { + WrongSize = true; + break; + } - if (!I.isIdenticalTo(&(*NIt))) { - WrongInst = true; + BasicBlock *CompBB = VToB.second; + BasicBlock *OutputBB = OutputBBIt->second; + if (CompBB->size() - 1 != OutputBB->size()) { + WrongSize = true; break; } - NIt++; + BasicBlock::iterator NIt = OutputBB->begin(); + for (Instruction &I : *CompBB) { + if (isa(&I)) + continue; + + if (!I.isIdenticalTo(&(*NIt))) { + WrongInst = true; + break; + } + + NIt++; + } } + if (!WrongInst && !WrongSize) return MatchingNum; @@ -1202,11 +1300,12 @@ /// \param [in] OutputMappings - OutputMappings the mapping of values that have /// been replaced by a new output value. /// \param [in,out] OutputStoreBBs - The existing output blocks. -static void -alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region, - BasicBlock *OutputBB, BasicBlock *EndBB, - const DenseMap &OutputMappings, - std::vector &OutputStoreBBs) { +static void alignOutputBlockWithAggFunc( + OutlinableGroup &OG, OutlinableRegion &Region, + DenseMap &OutputBBs, + DenseMap &EndBBs, + const DenseMap &OutputMappings, + std::vector> &OutputStoreBBs) { DenseSet ValuesToFind(Region.GVNStores.begin(), Region.GVNStores.end()); @@ -1215,9 +1314,13 @@ // be contained in a store, we replace the uses of the value with the value // from the overall function, so that the store is storing the correct // value from the overall function. - DenseSet ExcludeBBs(OutputStoreBBs.begin(), - OutputStoreBBs.end()); - ExcludeBBs.insert(OutputBB); + DenseSet ExcludeBBs; + for (DenseMap &BBMap : OutputStoreBBs) + for (std::pair &VBPair : BBMap) + ExcludeBBs.insert(VBPair.second); + for (std::pair &VBPair : OutputBBs) + ExcludeBBs.insert(VBPair.second); + std::vector ExtractedFunctionInsts = collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs); std::vector OverallFunctionInsts = @@ -1250,35 +1353,62 @@ // If the size of the block is 0, then there are no stores, and we do not // need to save this block. - if (OutputBB->size() == 0) { + bool AllRemoved = true; + Value *RetValueForBB; + BasicBlock *NewBB; + SmallVector ToRemove; + for (std::pair &VtoBB : OutputBBs) { + RetValueForBB = VtoBB.first; + NewBB = VtoBB.second; + + if (NewBB->size() == 0) { + NewBB->eraseFromParent(); + ToRemove.push_back(RetValueForBB); + continue; + } + + AllRemoved = false; + } + + for (Value *V : ToRemove) + OutputBBs.erase(V); + + if (AllRemoved) { Region.OutputBlockNum = -1; - OutputBB->eraseFromParent(); return; } - // Determine is there is a duplicate block. + // Determine is there is a duplicate set of blocks. Optional MatchingBB = - findDuplicateOutputBlock(OutputBB, OutputStoreBBs); + findDuplicateOutputBlock(OutputBBs, OutputStoreBBs); - // If there is, we remove the new output block. If it does not, - // we add it to our list of output blocks. + // If there is, we remove the new output blocks. If it does not, + // we add it to our list of sets of output blocks. if (MatchingBB.hasValue()) { LLVM_DEBUG(dbgs() << "Set output block for region in function" << Region.ExtractedFunction << " to " << MatchingBB.getValue()); Region.OutputBlockNum = MatchingBB.getValue(); - OutputBB->eraseFromParent(); + for (std::pair &VtoBB : OutputBBs) + VtoBB.second->eraseFromParent(); return; } Region.OutputBlockNum = OutputStoreBBs.size(); - LLVM_DEBUG(dbgs() << "Create output block for region in" - << Region.ExtractedFunction << " to " - << *OutputBB); - OutputStoreBBs.push_back(OutputBB); - BranchInst::Create(EndBB, OutputBB); + OutputStoreBBs.push_back(DenseMap()); + for (std::pair &VtoBB : OutputBBs) { + RetValueForBB = VtoBB.first; + NewBB = VtoBB.second; + DenseMap::iterator VBBIt = + EndBBs.find(RetValueForBB); + LLVM_DEBUG(dbgs() << "Create output block for region in" + << Region.ExtractedFunction << " to " + << *NewBB); + BranchInst::Create(VBBIt->second, NewBB); + OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB)); + } } /// Create the switch statement for outlined function to differentiate between @@ -1290,48 +1420,73 @@ /// \param [in] OG - The group of regions to be outlined. /// \param [in] EndBB - The final block of the extracted function. /// \param [in,out] OutputStoreBBs - The existing output blocks. -void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB, - ArrayRef OutputStoreBBs) { +void createSwitchStatement( + Module &M, OutlinableGroup &OG, DenseMap &EndBBs, + std::vector> &OutputStoreBBs) { // We only need the switch statement if there is more than one store // combination. if (OG.OutputGVNCombinations.size() > 1) { Function *AggFunc = OG.OutlinedFunction; - // Create a final block - BasicBlock *ReturnBlock = - BasicBlock::Create(M.getContext(), "final_block", AggFunc); - Instruction *Term = EndBB->getTerminator(); - Term->moveBefore(*ReturnBlock, ReturnBlock->end()); - // Put the switch statement in the old end basic block for the function with - // a fall through to the new return block - LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for " - << OutputStoreBBs.size() << "\n"); - SwitchInst *SwitchI = - SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1), - ReturnBlock, OutputStoreBBs.size(), EndBB); - - unsigned Idx = 0; - for (BasicBlock *BB : OutputStoreBBs) { - SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), - BB); - Term = BB->getTerminator(); - Term->setSuccessor(0, ReturnBlock); - Idx++; + // Create a final block for each different return block. + unsigned FinalBlockIdx = 0; + std::vector SortedKeys; + getSortedConstantKeys(SortedKeys, OG.EndBBs); + + for (Value *RetVal : SortedKeys) { + std::pair &OutputBlock = *OG.EndBBs.find(RetVal); + BasicBlock *EndBB = OutputBlock.second; + BasicBlock *ReturnBlock = BasicBlock::Create( + M.getContext(), + "final_block_" + Twine(static_cast(FinalBlockIdx++)), + AggFunc); + Instruction *Term = EndBB->getTerminator(); + Term->moveBefore(*ReturnBlock, ReturnBlock->end()); + // Put the switch statement in the old end basic block for the function + // with a fall through to the new return block. + LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for " + << OutputStoreBBs.size() << "\n"); + SwitchInst *SwitchI = + SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1), + ReturnBlock, OutputStoreBBs.size(), EndBB); + + unsigned Idx = 0; + for (DenseMap &OutputStoreBB : OutputStoreBBs) { + DenseMap::iterator OSBBIt = + OutputStoreBB.find(OutputBlock.first); + + if (OSBBIt == OutputStoreBB.end()) + continue; + + BasicBlock *BB = OSBBIt->second; + SwitchI->addCase( + ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB); + Term = BB->getTerminator(); + Term->setSuccessor(0, ReturnBlock); + Idx++; + } } return; } - // If there needs to be stores, move them from the output block to the end - // block to save on branching instructions. + // If there needs to be stores, move them from the output blocks to their + // corresponding ending block. if (OutputStoreBBs.size() == 1) { LLVM_DEBUG(dbgs() << "Move store instructions to the end block in " << *OG.OutlinedFunction << "\n"); - BasicBlock *OutputBlock = OutputStoreBBs[0]; - Instruction *Term = OutputBlock->getTerminator(); - Term->eraseFromParent(); - Term = EndBB->getTerminator(); - moveBBContents(*OutputBlock, *EndBB); - Term->moveBefore(*EndBB, EndBB->end()); - OutputBlock->eraseFromParent(); + DenseMap OutputBlocks = OutputStoreBBs[0]; + for (std::pair &VBPair : OutputBlocks) { + DenseMap::iterator EndBBIt = + EndBBs.find(VBPair.first); + assert(EndBBIt != EndBBs.end() && "Could not find end block"); + BasicBlock *EndBB = EndBBIt->second; + BasicBlock *OutputBB = VBPair.second; + Instruction *Term = OutputBB->getTerminator(); + Term->eraseFromParent(); + Term = EndBB->getTerminator(); + moveBBContents(*OutputBB, *EndBB); + Term->moveBefore(*EndBB, EndBB->end()); + OutputBB->eraseFromParent(); + } } } @@ -1346,42 +1501,73 @@ /// set of stores needed for the different functions. /// \param [in,out] FuncsToRemove - Extracted functions to erase from module /// once outlining is complete. -static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup, - std::vector &OutputStoreBBs, - std::vector &FuncsToRemove) { +static void fillOverallFunction( + Module &M, OutlinableGroup &CurrentGroup, + std::vector> &OutputStoreBBs, + std::vector &FuncsToRemove) { OutlinableRegion *CurrentOS = CurrentGroup.Regions[0]; // Move first extracted function's instructions into new function. LLVM_DEBUG(dbgs() << "Move instructions from " << *CurrentOS->ExtractedFunction << " to instruction " << *CurrentGroup.OutlinedFunction << "\n"); - - CurrentGroup.EndBB = moveFunctionData(*CurrentOS->ExtractedFunction, - *CurrentGroup.OutlinedFunction); + moveFunctionData(*CurrentOS->ExtractedFunction, + *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs); // Transfer the attributes from the function to the new function. for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttributes()) CurrentGroup.OutlinedFunction->addFnAttr(A); - // Create an output block for the first extracted function. - BasicBlock *NewBB = BasicBlock::Create( - M.getContext(), Twine("output_block_") + Twine(static_cast(0)), - CurrentGroup.OutlinedFunction); + // Create a new set of output blocks for the first extracted function. + DenseMap NewBBs; + unsigned Idx = 0; + std::vector SortedKeys; + + getSortedConstantKeys(SortedKeys, CurrentGroup.EndBBs); + + for (Value *RetVal : SortedKeys) { + BasicBlock *NewBB = BasicBlock::Create( + M.getContext(), + Twine("output_block_") + Twine(static_cast(0)) + Twine("_") + + Twine(static_cast(Idx++)), + CurrentGroup.OutlinedFunction); + NewBBs.insert(std::make_pair(RetVal, NewBB)); + } CurrentOS->OutputBlockNum = 0; - replaceArgumentUses(*CurrentOS, NewBB); + replaceArgumentUses(*CurrentOS, NewBBs, true); replaceConstants(*CurrentOS); - // If the new basic block has no new stores, we can erase it from the module. - // It it does, we create a branch instruction to the last basic block from the - // new one. - if (NewBB->size() == 0) { + // If a new basic block has no new stores, we can erase it from the module. + // If it does, we create a branch instruction to the basic block to the return + // block for the function. + BasicBlock *NewBB; + Value *RetValueForBB; + OutputStoreBBs.push_back(DenseMap()); + bool AllRemoved = true; + SmallVector ToRemove; + for (std::pair &VtoBB : NewBBs) { + RetValueForBB = VtoBB.first; + NewBB = VtoBB.second; + + if (NewBB->size() == 0) { + NewBB->eraseFromParent(); + ToRemove.push_back(RetValueForBB); + continue; + } + + AllRemoved = false; + DenseMap::iterator VBBIt = + CurrentGroup.EndBBs.find(RetValueForBB); + BasicBlock *EndBB = VBBIt->second; + BranchInst::Create(EndBB, NewBB); + OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB)); + } + + if (AllRemoved) { + OutputStoreBBs.pop_back(); CurrentOS->OutputBlockNum = -1; - NewBB->eraseFromParent(); - } else { - BranchInst::Create(CurrentGroup.EndBB, NewBB); - OutputStoreBBs.push_back(NewBB); } // Replace the call to the extracted function with the outlined function. @@ -1397,25 +1583,37 @@ std::vector &FuncsToRemove, unsigned &OutlinedFunctionNum) { createFunction(M, CurrentGroup, OutlinedFunctionNum); - std::vector OutputStoreBBs; + std::vector> OutputStoreBBs; OutlinableRegion *CurrentOS; fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove); + std::vector SortedKeys; for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) { CurrentOS = CurrentGroup.Regions[Idx]; AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction, *CurrentOS->ExtractedFunction); - // Create a new BasicBlock to hold the needed store instructions. - BasicBlock *NewBB = BasicBlock::Create( - M.getContext(), "output_block_" + std::to_string(Idx), - CurrentGroup.OutlinedFunction); - replaceArgumentUses(*CurrentOS, NewBB); - - alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB, - CurrentGroup.EndBB, OutputMappings, + // Create a set of BasicBlocks, one for each return block, to hold the + // needed store instructions. + DenseMap NewBBs; + unsigned BBIdx = 0; + + SortedKeys.clear(); + getSortedConstantKeys(SortedKeys, CurrentGroup.EndBBs); + + for (Value *RetVal : SortedKeys) { + BasicBlock *NewBB = BasicBlock::Create( + M.getContext(), + Twine("output_block_") + Twine(static_cast(Idx)) + + Twine("_") + Twine(static_cast(BBIdx++)), + CurrentGroup.OutlinedFunction); + NewBBs.insert(std::make_pair(RetVal, NewBB)); + } + replaceArgumentUses(*CurrentOS, NewBBs); + alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs, + CurrentGroup.EndBBs, OutputMappings, OutputStoreBBs); CurrentOS->Call = replaceCalledFunction(M, *CurrentOS); @@ -1423,7 +1621,7 @@ } // Create a switch statement to handle the different output schemes. - createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs); + createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs); OutlinedFunctionNum++; } Index: llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Show that we do not extract similar regions that would involve the splitting +; of phi nodes on exit. + +define void @function1(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br label %first +test: + %d = load i32, i32* %0, align 4 + br label %first +first: + %1 = phi i32 [ %c, %test ], [ %e, %test1 ] + ret void +} + +define void @function2(i32* %a, i32* %b) { +entry: + %0 = alloca i32, align 4 + %c = load i32, i32* %0, align 4 + br label %test1 +test1: + %e = load i32, i32* %0, align 4 + br label %first +test: + %d = load i32, i32* %0, align 4 + br label %first +first: + %1 = phi i32 [ %c, %test ], [ %e, %test1 ] + ret void +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: test1: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: test: +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]]) +; CHECK-NEXT: br label [[FIRST]] +; CHECK: first: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[TEST1:%.*]] +; CHECK: test1: +; CHECK-NEXT: [[E:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: test: +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[TMP0]]) +; CHECK-NEXT: br label [[FIRST]] +; CHECK: first: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ] +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[TEST_TO_OUTLINE:%.*]] +; CHECK: first.exitStub: +; CHECK-NEXT: ret void +; CHECK: test_to_outline: +; CHECK-NEXT: [[D:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[FIRST_EXITSTUB:%.*]] +; Index: llvm/test/Transforms/IROutliner/outlining-multiple-exits-diff-outputs.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-multiple-exits-diff-outputs.ll @@ -0,0 +1,229 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Here we have multiple exits, but the different sources, different outputs are +; needed, this checks that they are handled by separate switch statements. + +define void @outline_outputs1() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_6 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_7 +block_6: + %div = udiv i32 %aval, %bval + ret void +block_7: + %sub = sub i32 %a2val, %b2val + ret void +} + +define void @outline_outputs2() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_7 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_6 +block_6: + %diff = sub i32 %a2val, %b2val + ret void +block_7: + %quot = udiv i32 %add, %mul + ret void +} +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BVAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[AVAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[AVAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[BVAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[AVAL_LOC]], i32* [[BVAL_LOC]], i32 0) +; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4 +; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4 +; CHECK-NEXT: [[AVAL_RELOAD:%.*]] = load i32, i32* [[AVAL_LOC]], align 4 +; CHECK-NEXT: [[BVAL_RELOAD:%.*]] = load i32, i32* [[BVAL_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_6:%.*]], label [[BLOCK_7:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AVAL_RELOAD]], [[BVAL_RELOAD]] +; CHECK-NEXT: ret void +; CHECK: block_7: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_outputs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[ADD_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[MUL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[ADD_LOC]], i32* [[MUL_LOC]], i32 1) +; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4 +; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4 +; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 +; CHECK-NEXT: [[MUL_RELOAD:%.*]] = load i32, i32* [[MUL_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_7:%.*]], label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]] +; CHECK-NEXT: ret void +; CHECK: block_7: +; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[ADD_RELOAD]], [[MUL_RELOAD]] +; CHECK-NEXT: ret void +; +; +; CHECK: define internal i1 @outlined_ir_func_0( +; CHECK: newFuncRoot: +; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]] +; CHECK: block_6.exitStub: +; CHECK-NEXT: switch i32 [[TMP8:%.*]], label [[FINAL_BLOCK_1:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]] +; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_1:%.*]] +; CHECK-NEXT: ] +; CHECK: block_7.exitStub: +; CHECK-NEXT: switch i32 [[TMP8]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]] +; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]] +; CHECK-NEXT: ] +; CHECK: block_2_to_outline: +; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] +; CHECK-NEXT: br label [[BLOCK_5:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] +; CHECK-NEXT: br label [[BLOCK_4:%.*]] +; CHECK: block_4: +; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[BLOCK_6_EXITSTUB:%.*]] +; CHECK: block_5: +; CHECK-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4 +; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]] +; CHECK: output_block_0_0: +; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4:%.*]], align 4 +; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_0_1: +; CHECK-NEXT: store i32 [[AVAL]], i32* [[TMP6:%.*]], align 4 +; CHECK-NEXT: store i32 [[BVAL]], i32* [[TMP7:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_1]] +; CHECK: output_block_1_0: +; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4]], align 4 +; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_1_1: +; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP6]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP7]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_1]] +; CHECK: final_block_0: +; CHECK-NEXT: ret i1 false +; CHECK: final_block_1: +; CHECK-NEXT: ret i1 true +; Index: llvm/test/Transforms/IROutliner/outlining-multiple-exits-one-output-set.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-multiple-exits-one-output-set.ll @@ -0,0 +1,196 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Here we have multiple exits, but different sources, and only one has an +; output set. We check to make sure that we do not generated extra output +; blocks or entries in the switch statement. + +define void @outline_outputs1() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_6 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_7 +block_6: + ret void +block_7: + ret void +} + +define void @outline_outputs2() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_7 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_6 +block_6: + %diff = sub i32 %a2val, %b2val + ret void +block_7: + %quot = udiv i32 %add, %mul + ret void +} +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* null, i32* null, i32* null, i32* null, i32 -1) +; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_6:%.*]], label [[BLOCK_7:%.*]] +; CHECK: block_6: +; CHECK-NEXT: ret void +; CHECK: block_7: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_outputs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[ADD_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[MUL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: [[TMP0:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[ADD_LOC]], i32* [[MUL_LOC]], i32 0) +; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4 +; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4 +; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 +; CHECK-NEXT: [[MUL_RELOAD:%.*]] = load i32, i32* [[MUL_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: br i1 [[TMP0]], label [[BLOCK_7:%.*]], label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]] +; CHECK-NEXT: ret void +; CHECK: block_7: +; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[ADD_RELOAD]], [[MUL_RELOAD]] +; CHECK-NEXT: ret void +; +; +; CHECK: define internal i1 @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]] +; CHECK: block_6.exitStub: +; CHECK-NEXT: switch i32 [[TMP8:%.*]], label [[FINAL_BLOCK_1:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_1_1:%.*]] +; CHECK-NEXT: ] +; CHECK: block_7.exitStub: +; CHECK-NEXT: switch i32 [[TMP8]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_1_0:%.*]] +; CHECK-NEXT: ] +; CHECK: block_2_to_outline: +; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] +; CHECK-NEXT: br label [[BLOCK_5:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] +; CHECK-NEXT: br label [[BLOCK_4:%.*]] +; CHECK: block_4: +; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[BLOCK_6_EXITSTUB:%.*]] +; CHECK: block_5: +; CHECK-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4 +; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]] +; CHECK: output_block_1_0: +; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4:%.*]], align 4 +; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_1_1: +; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP6:%.*]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP7:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_1]] +; CHECK: final_block_0: +; CHECK-NEXT: ret i1 false +; CHECK: final_block_1: +; CHECK-NEXT: ret i1 true +; Index: llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll +++ llvm/test/Transforms/IROutliner/outlining-multiple-exits.ll @@ -1,8 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -; Here we have multiple exits, but we can't actually outline anything but -; single entry and single exits yet, we check to make sure it doesn't happen. +; Here we have multiple exits, but the different sources, same outputs are +; needed, this checks that they are compressed, and moved into the appropriate +; output blocks. define void @outline_outputs1() #0 { entry: @@ -87,6 +88,10 @@ } ; CHECK-LABEL: @outline_outputs1( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[BVAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[AVAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 @@ -99,34 +104,38 @@ ; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[BLOCK_2]] ; CHECK: block_2: -; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[B]], align 4 -; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] -; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] -; CHECK-NEXT: br label [[BLOCK_5:%.*]] -; CHECK: block_3: -; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[B]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] -; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] -; CHECK-NEXT: br label [[BLOCK_4:%.*]] -; CHECK: block_4: -; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 -; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4 -; CHECK-NEXT: br label [[BLOCK_6:%.*]] -; CHECK: block_5: -; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD2]], i32* [[OUTPUT]], i32 [[MUL2]], i32* [[RESULT]]) -; CHECK-NEXT: br label [[BLOCK_7:%.*]] +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[AVAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[BVAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[AVAL_LOC]], i32* [[BVAL_LOC]]) +; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4 +; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4 +; CHECK-NEXT: [[AVAL_RELOAD:%.*]] = load i32, i32* [[AVAL_LOC]], align 4 +; CHECK-NEXT: [[BVAL_RELOAD:%.*]] = load i32, i32* [[BVAL_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[BLOCK_6:%.*]], label [[BLOCK_7:%.*]] ; CHECK: block_6: -; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AVAL]], [[BVAL]] +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[AVAL_RELOAD]], [[BVAL_RELOAD]] ; CHECK-NEXT: ret void ; CHECK: block_7: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A2VAL]], [[B2VAL]] +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]] ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: @outline_outputs2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[BVAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[AVAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2VAL_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A2VAL_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 @@ -139,39 +148,61 @@ ; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[BLOCK_2]] ; CHECK: block_2: -; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[A2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[B2VAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[LT_CAST2:%.*]] = bitcast i32* [[AVAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[BVAL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[A2VAL_LOC]], i32* [[B2VAL_LOC]], i32* [[AVAL_LOC]], i32* [[BVAL_LOC]]) +; CHECK-NEXT: [[A2VAL_RELOAD:%.*]] = load i32, i32* [[A2VAL_LOC]], align 4 +; CHECK-NEXT: [[B2VAL_RELOAD:%.*]] = load i32, i32* [[B2VAL_LOC]], align 4 +; CHECK-NEXT: [[AVAL_RELOAD:%.*]] = load i32, i32* [[AVAL_LOC]], align 4 +; CHECK-NEXT: [[BVAL_RELOAD:%.*]] = load i32, i32* [[BVAL_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[BLOCK_7:%.*]], label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL_RELOAD]], [[B2VAL_RELOAD]] +; CHECK-NEXT: ret void +; CHECK: block_7: +; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[AVAL_RELOAD]], [[BVAL_RELOAD]] +; CHECK-NEXT: ret void +; +; +; CHECK: define internal i1 @outlined_ir_func_0( +; CHECK: newFuncRoot: +; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]] +; CHECK: block_6.exitStub: +; CHECK-NEXT: store i32 [[AVAL:%.*]], i32* [[TMP6:%.*]], align 4 +; CHECK-NEXT: store i32 [[BVAL:%.*]], i32* [[TMP7:%.*]], align 4 +; CHECK-NEXT: ret i1 true +; CHECK: block_7.exitStub: +; CHECK-NEXT: store i32 [[A2VAL:%.*]], i32* [[TMP4:%.*]], align 4 +; CHECK-NEXT: store i32 [[B2VAL:%.*]], i32* [[TMP5:%.*]], align 4 +; CHECK-NEXT: ret i1 false +; CHECK: block_2_to_outline: +; CHECK-NEXT: [[A2VAL]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: [[B2VAL]] = load i32, i32* [[TMP1:%.*]], align 4 ; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] ; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] ; CHECK-NEXT: br label [[BLOCK_5:%.*]] ; CHECK: block_3: -; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[AVAL]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[BVAL]] = load i32, i32* [[TMP1]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] ; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] ; CHECK-NEXT: br label [[BLOCK_4:%.*]] ; CHECK: block_4: -; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 -; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4 -; CHECK-NEXT: br label [[BLOCK_7:%.*]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[BLOCK_6_EXITSTUB:%.*]] ; CHECK: block_5: -; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD2]], i32* [[OUTPUT]], i32 [[MUL2]], i32* [[RESULT]]) -; CHECK-NEXT: br label [[BLOCK_6:%.*]] -; CHECK: block_6: -; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[A2VAL]], [[B2VAL]] -; CHECK-NEXT: ret void -; CHECK: block_7: -; CHECK-NEXT: [[QUOT:%.*]] = udiv i32 [[AVAL]], [[BVAL]] -; CHECK-NEXT: ret void -; -; -; CHECK: define internal void @outlined_ir_func_0( -; CHECK-NEXT: newFuncRoot: -; CHECK-NEXT: br label [[BLOCK_5_TO_OUTLINE:%.*]] -; CHECK: block_7.exitStub: -; CHECK-NEXT: ret void -; CHECK: block_5_to_outline: -; CHECK-NEXT: store i32 [[TMP0:%.*]], i32* [[TMP1:%.*]], align 4 -; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4 ; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]] ;