Index: llvm/include/llvm/Transforms/IPO/IROutliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/IROutliner.h +++ llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -93,6 +93,13 @@ /// Marks whether this region ends in a branch, there is special handling /// required for the following basic blocks in this case. bool EndsInBranch = false; + + BasicBlock* PHINodeAdjustBB = nullptr; + + /// The PHIBlocks with their corresponding return block based on the return + /// value as the key. + DenseMap PHIBlocks; + /// Mapping of the argument number in the deduplicated function /// to a given constant, which is used when creating the arguments to the call /// to the newly created deduplicated function. This is handled separately Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -268,6 +268,13 @@ return Benefit; } +static Value *findOutputMapping(const DenseMap OutputMappings, + Value *Input) { + if (OutputMappings.find(Input) != OutputMappings.end()) + return OutputMappings.find(Input)->second; + return Input; +} + /// Find whether \p Region matches the global value numbering to Constant /// mapping found so far. /// @@ -787,9 +794,43 @@ // Do not process PHI if there is one (or fewer) predecessor from region. if (IncomingVals.size() <= 1) continue; - else { - Region.IgnoreRegion = true; - return; + + Outputs.insert(&PN); + + // Not all of the incoming values should be ignored as other inputs and + // outputs into the outlined region. If they have other uses outside of + // the single PHINode we should not skip over it. + for (unsigned i : IncomingVals) { + Value *V = PN.getIncomingValue(i); + bool NotOnlyUsedInExitBlocks = true; + for (User *U : V->users()) { + Instruction *I = dyn_cast(U); + if (!I) + continue; + + // If the use of the item is inside the region, we skip it since use + // inside the region does tell us anything about the uses outside + // non exit blocks outside the region. + BasicBlock *Parent = I->getParent(); + if (BBSet.contains(Parent)) + continue; + + // If it's not a PHINode then we definitely know the use matters. + if (!isa(I)) { + NotOnlyUsedInExitBlocks = false; + break; + } + + // If we have a PHINode outside one of the exit locations, then it + // can be considered an outside use as well. + if (!Exits.contains(Parent)) { + NotOnlyUsedInExitBlocks = false; + break; + } + } + if (!NotOnlyUsedInExitBlocks) + continue; + PHIWrapped.insert(PN.getIncomingValue(i)); } } } @@ -808,6 +849,8 @@ // type to the overall argument type list. We also store the GVNs used for // stores to identify which values will need to be moved into an special // block that holds the stores to the output registers. + SmallVector GlobalValues; + DenseSet GVNStored; for (Value *Output : Outputs) { TypeFound = false; // We can do this since it is a result value, and will have a number @@ -815,9 +858,40 @@ // do not have to be in same order, but are functionally the same, we will // have to use a different scheme, as one-to-one correspondence is not // guaranteed. - unsigned GlobalValue = C.getGVN(Output).getValue(); + GlobalValues.clear(); + + // Since values outside the region can be combined into PHINode when we + // have multiple exists, we check if we have a PHINode in the output, to + // mark that both of these outputs are getting caught. + + // If we cannot find a GVN, this means that the input to the PHINode is not + // included in the region we are trying to analyze, meaning, that if it was + // outlined, we would be adding an extra input. We ignore this case for + // now, and so ignore the region. + + // TODO: Adapt to the extra input from the PHINode. + PHINode *PN = dyn_cast(Output); + if (PN && !BBSet.contains(PN->getParent())) { + for (unsigned i = 0; i < PN->getNumIncomingValues(); ++i) { + Value *V = PN->getIncomingValue(i); + if (!C.getGVN(V).hasValue()) { + Region.IgnoreRegion = true; + return; + } + if (isa(V)) + continue; + Instruction *I = dyn_cast(V); + if (I && !BBSet.contains(I->getParent())) + continue; + GlobalValues.push_back(C.getGVN(V).getValue()); + } + } else + GlobalValues.push_back(C.getGVN(Output).getValue()); unsigned ArgumentSize = Group.ArgumentTypes.size(); + // If the output is combined in a PHINode, we make sure to skip over it. + if (PHIWrapped.contains(Output)) + continue; for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) { if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType())) continue; @@ -829,7 +903,8 @@ AggArgsUsed.insert(Jdx); Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx)); Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex)); - Region.GVNStores.push_back(GlobalValue); + for (unsigned GlobalValue : GlobalValues) + GVNStored.insert(GlobalValue); break; } @@ -843,13 +918,16 @@ std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1)); Region.AggArgToExtracted.insert( std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex)); - Region.GVNStores.push_back(GlobalValue); + for (unsigned GlobalValue : GlobalValues) + GVNStored.insert(GlobalValue); } - stable_sort(Region.GVNStores); OriginalIndex++; TypeIndex++; } + for (unsigned GVN : GVNStored) + Region.GVNStores.push_back(GVN); + stable_sort(Region.GVNStores); } void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region, @@ -997,6 +1075,7 @@ static void replaceArgumentUses(OutlinableRegion &Region, DenseMap &OutputBBs, + const DenseMap &OutputMappings, bool FirstFunction = false) { OutlinableGroup &Group = *Region.Parent; assert(Region.ExtractedFunction && "Region has no extracted function?"); @@ -1061,6 +1140,154 @@ OutputBB->getInstList().push_back(NewI); LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to " << *OutputBB << "\n"); + + // If this is storing a PHINode, we must make sure it is included in the + // overall function. + StoreInst *SI = static_cast(NewI); + if (!isa(SI->getValueOperand())) + continue; + PHINode *PN = cast(SI->getValueOperand()); + // If it has a value, it was not split by the code extractor, which + // is what we are looking for. + if (Region.Candidate->getGVN(PN).hasValue()) + continue; + + // Find if a PHIBlock exists for this return value already. If it is + // the first time we are anaylzing this, we will not, so we record it. + PHIVBBIt = Group.PHIBlocks.find(RetVal); + bool Inserted = false; + if (FirstFunction) { + BasicBlock *PHIBlock = PN->getParent(); + std::tie(PHIVBBIt, Inserted) = + Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock)); + } + + Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent())); + + // If we did not find a block, we create one, and insert it into the + // overall function and record it. + if (PHIVBBIt == Group.PHIBlocks.end()) { + BasicBlock *PHIBlock; + PHIBlock = BasicBlock::Create(I->getContext(), "phi_block", + OutputBB->getParent()); + std::tie(PHIVBBIt, Inserted) = + Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock)); + + // We find the predecessors in the overall function, and make sure + // we now branch to this new block. + for (BasicBlock *Pred : predecessors(OutputBB)) { + Instruction *Term = Pred->getTerminator(); + BranchInst *BI = static_cast(Term); + for (unsigned Succ = 0; Succ < BI->getNumSuccessors(); Succ++) { + if (BI->getSuccessor(Succ) != OutputBB) + continue; + BI->setSuccessor(Succ, PHIBlock); + } + } + + BranchInst::Create(OutputBB, PHIBlock); + } + BasicBlock *OverallPhiBlock = PHIVBBIt->second; + + // If this is the first function, we do not need to worry about mergiing + // this with any other block in the overall outlined function + if (FirstFunction) + continue; + + // For our PHINode, we find the combined canonical numbering, and + // attempt to find a matching PHINode in the overall PHIBlock. If we + // cannot, we copy the PHINode and move it into this new block. + DenseSet PNGVNs; + for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { + Value *IVal = PN->getIncomingValue(Idx); + if (Argument *A = dyn_cast(IVal)) { + CallInst *CI = cast(Region.ExtractedFunction->user_back()); + unsigned ArgNum = A->getArgNo(); + IVal = CI->getArgOperand(ArgNum); + } + IVal = findOutputMapping(OutputMappings, IVal); + Optional GVN = Region.Candidate->getGVN(IVal); + assert(GVN.hasValue() && "No GVN for incoming value"); + Optional CanonNum = Region.Candidate->getCanonicalNum(*GVN); + PNGVNs.insert(*CanonNum); + } + + bool Found = false; + OutlinableRegion *First = Group.Regions[0]; + for (Instruction &Inst : *OverallPhiBlock) { + if (!isa(Inst)) + continue; + bool GVNMatches = true; + PHINode *CurrPN = static_cast(&Inst); + for (unsigned Idx = 0; Idx < CurrPN->getNumIncomingValues(); Idx++) { + Value *IVal = CurrPN->getIncomingValue(Idx); + OutlinableRegion *First = Group.Regions[0]; + if (Argument *A = dyn_cast(IVal)) { + CallInst *CI = cast(First->Call); + unsigned ArgNum = A->getArgNo(); + IVal = CI->getArgOperand(ArgNum); + } + IVal = findOutputMapping(OutputMappings, IVal); + Optional GVN = First->Candidate->getGVN(IVal); + assert(GVN.hasValue() && "No GVN for incoming value"); + Optional CanonNum = First->Candidate->getCanonicalNum(*GVN); + if (!PNGVNs.contains(*CanonNum)) { + GVNMatches = false; + break; + } + } + if (GVNMatches) { + PN->replaceAllUsesWith(CurrPN); + Found = true; + break; + } + } + + if (!Found) { + PHINode *NewPN = static_cast(PN->clone()); + NewPN->insertBefore(&*OverallPhiBlock->begin()); + for (unsigned Idx = 0; Idx < NewPN->getNumIncomingValues(); Idx++) { + Value *IVal = NewPN->getIncomingValue(Idx); + BasicBlock *IBlock = NewPN->getIncomingBlock(Idx); + if (Argument *A = dyn_cast(IVal)) { + unsigned ArgNum = A->getArgNo(); + BasicBlock *In = NewPN->getIncomingBlock(Idx); + Instruction *Inst = In->getFirstNonPHI(); + Optional GVN = Region.Candidate->getGVN(Inst); + assert(GVN.hasValue() && "No GVN for incoming value"); + Optional CanonNum = + Region.Candidate->getCanonicalNum(*GVN); + Optional FirstGVN = + First->Candidate->fromCanonicalNum(*CanonNum); + Optional TempVal = First->Candidate->fromGVN(*FirstGVN); + Value *Val = Group.OutlinedFunction->getArg(ArgNum); + NewPN->setIncomingValue(Idx, Val); + NewPN->setIncomingBlock(Idx, + cast(*TempVal)->getParent()); + continue; + } + + IVal = findOutputMapping(OutputMappings, IVal); + Optional GVN = Region.Candidate->getGVN(IVal); + assert(GVN.hasValue() && "No GVN for incoming value"); + Optional CanonNum = Region.Candidate->getCanonicalNum(*GVN); + Optional FirstGVN = + First->Candidate->fromCanonicalNum(*CanonNum); + Optional Val = First->Candidate->fromGVN(*FirstGVN); + NewPN->setIncomingValue(Idx, *Val); + if (!isa(*Val)) { + Instruction *FirstI = &IBlock->front(); + Optional GVN2 = Region.Candidate->getGVN(FirstI); + assert(GVN2.hasValue() && "No GVN for incoming value"); + Optional CanonNum2 = Region.Candidate->getCanonicalNum(*GVN2); + Optional FirstGVN2 = + First->Candidate->fromCanonicalNum(*CanonNum2); + Val = First->Candidate->fromGVN(*FirstGVN2); + } + NewPN->setIncomingBlock(Idx, cast(*Val)->getParent()); + } + PN->replaceAllUsesWith(NewPN); + } } // If we added an edge for basic blocks without a predecessor, we remove it @@ -1224,6 +1451,12 @@ ExcludeBBs.insert(VBPair.second); for (std::pair &VBPair : OutputBBs) ExcludeBBs.insert(VBPair.second); + // Add PHIBlocks to exclusionary zones since it is possible they may not be + // same across the extracted functions. + for (std::pair &VBPair : Region.PHIBlocks) + ExcludeBBs.insert(VBPair.second); + for (std::pair &VBPair : Region.Parent->PHIBlocks) + ExcludeBBs.insert(VBPair.second); std::vector ExtractedFunctionInsts = collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs); @@ -1244,6 +1477,28 @@ // If we have found one of the stored values for output, replace the value // with the corresponding one from the overall function. if (GVN.hasValue() && ValuesToFind.erase(GVN.getValue())) { + Value *NewV = OverallFunctionInsts[Idx]; + PHINode *OverallPN = nullptr; + // If there is a PHINode following the instruction, it means it is not + // really an output, but the following PHINode is, we make sure to update + // the uses of the PHINode as well to ensure that our output blocks in the + // overall function are consistent. + for (User *U : NewV->users()) + if (PHINode *PN = dyn_cast(U)) + OverallPN = PN; + + for (User *U : V->users()) { + if (PHINode *PN = dyn_cast(U)) { + if (!OverallPN) { + BasicBlock *Old = static_cast(V)->getParent(); + BasicBlock *New = static_cast(NewV)->getParent(); + PN->replaceIncomingBlockWith(Old, New); + continue; + } + PN->replaceAllUsesWith(OverallPN); + } + } + V->replaceAllUsesWith(OverallFunctionInsts[Idx]); if (ValuesToFind.size() == 0) break; @@ -1253,6 +1508,13 @@ break; } + CallInst *CI = cast(Region.ExtractedFunction->user_back()); + for (Value *V : CI->args()) { + Optional GVN = Region.Candidate->getGVN(V); + if (GVN.hasValue()) + ValuesToFind.erase(*GVN); + } + assert(ValuesToFind.size() == 0 && "Not all store values were handled!"); // If the size of the block is 0, then there are no stores, and we do not @@ -1401,10 +1663,13 @@ /// set of stores needed for the different functions. /// \param [in,out] FuncsToRemove - Extracted functions to erase from module /// once outlining is complete. +/// \param [in] OutputMappings - Extracted functions to erase from module +/// once outlining is complete. static void fillOverallFunction( Module &M, OutlinableGroup &CurrentGroup, std::vector> &OutputStoreBBs, - std::vector &FuncsToRemove) { + std::vector &FuncsToRemove, + const DenseMap &OutputMappings) { OutlinableRegion *CurrentOS = CurrentGroup.Regions[0]; // Move first extracted function's instructions into new function. @@ -1432,7 +1697,7 @@ } CurrentOS->OutputBlockNum = 0; - replaceArgumentUses(*CurrentOS, NewBBs, true); + replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true); replaceConstants(*CurrentOS); // If a new basic block has no new stores, we can erase it from the module. @@ -1483,7 +1748,8 @@ OutlinableRegion *CurrentOS; - fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove); + fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove, + OutputMappings); for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) { CurrentOS = CurrentGroup.Regions[Idx]; @@ -1502,7 +1768,7 @@ CurrentGroup.OutlinedFunction); NewBBs.insert(std::make_pair(VtoBB.first, NewBB)); } - replaceArgumentUses(*CurrentOS, NewBBs); + replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings); alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs, CurrentGroup.EndBBs, OutputMappings, OutputStoreBBs); Index: llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll @@ -0,0 +1,165 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -ir-sim-branches -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Here we have multiple exits, but the different sources, same outputs are +; needed, this checks that they are compressed, and moved into the appropriate +; output blocks. + +define void @outline_outputs1() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_6 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_6 +block_6: + %diff = phi i32 [%aval, %block_4], [%a2val, %block_5] + ret void +} + +define void @outline_outputs2() #0 { +entry: + %output = alloca i32, align 4 + %result = alloca i32, align 4 + %output2 = alloca i32, align 4 + %result2 = alloca i32, align 4 + %a = alloca i32, align 4 + %b = alloca i32, align 4 + br label %block_2 +block_1: + %a2 = alloca i32, align 4 + %b2 = alloca i32, align 4 + br label %block_2 +block_2: + %a2val = load i32, i32* %a + %b2val = load i32, i32* %b + %add2 = add i32 2, %a2val + %mul2 = mul i32 2, %b2val + br label %block_5 +block_3: + %aval = load i32, i32* %a + %bval = load i32, i32* %b + %add = add i32 2, %aval + %mul = mul i32 2, %bval + br label %block_4 +block_4: + store i32 %add, i32* %output, align 4 + store i32 %mul, i32* %result, align 4 + br label %block_6 +block_5: + store i32 %add2, i32* %output, align 4 + store i32 %mul2, i32* %result, align 4 + br label %block_6 +block_6: + %diff = phi i32 [%aval, %block_4], [%a2val, %block_5] + ret void +} + +; CHECK-LABEL: @outline_outputs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIFF_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DIFF_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[DIFF_CE_LOC]]) +; CHECK-NEXT: [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_outputs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DIFF_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BLOCK_2]] +; CHECK: block_2: +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[DIFF_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[DIFF_CE_LOC]]) +; CHECK-NEXT: [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[BLOCK_6:%.*]] +; CHECK: block_6: +; CHECK-NEXT: [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ] +; CHECK-NEXT: ret void +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]] +; CHECK: block_6.exitStub: +; CHECK-NEXT: store i32 [[DIFF_CE:%.*]], i32* [[TMP4:%.*]], align 4 +; CHECK-NEXT: ret void +; CHECK: block_2_to_outline: +; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 2, [[A2VAL]] +; CHECK-NEXT: [[MUL2:%.*]] = mul i32 2, [[B2VAL]] +; CHECK-NEXT: br label [[BLOCK_5:%.*]] +; CHECK: block_3: +; CHECK-NEXT: [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 2, [[AVAL]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 2, [[BVAL]] +; CHECK-NEXT: br label [[BLOCK_4:%.*]] +; CHECK: block_4: +; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[BLOCK_6_SPLIT:%.*]] +; CHECK: block_5: +; CHECK-NEXT: store i32 [[ADD2]], i32* [[TMP2]], align 4 +; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4 +; CHECK-NEXT: br label [[BLOCK_6_SPLIT]] +; CHECK: block_6.split: +; CHECK-NEXT: [[DIFF_CE]] = phi i32 [ [[AVAL]], [[BLOCK_4]] ], [ [[A2VAL]], [[BLOCK_5]] ] +; CHECK-NEXT: br label [[BLOCK_6_EXITSTUB:%.*]] +; Index: llvm/test/Transforms/IROutliner/outlining-multiple-exits-diff-outputs.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-multiple-exits-diff-outputs.ll +++ llvm/test/Transforms/IROutliner/outlining-multiple-exits-diff-outputs.ll @@ -174,18 +174,18 @@ ; ; ; CHECK: define internal i1 @outlined_ir_func_0( -; CHECK: newFuncRoot: +; CHECK-NEXT: newFuncRoot: ; CHECK-NEXT: br label [[BLOCK_2_TO_OUTLINE:%.*]] ; CHECK: block_6.exitStub: -; CHECK-NEXT: switch i32 [[TMP8:%.*]], label [[FINAL_BLOCK_1:%.*]] [ -; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]] -; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_1:%.*]] -; CHECK-NEXT: ] -; CHECK: block_7.exitStub: -; CHECK-NEXT: switch i32 [[TMP8]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: switch i32 [[TMP8:%.*]], label [[FINAL_BLOCK_0:%.*]] [ ; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]] ; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]] ; CHECK-NEXT: ] +; CHECK: block_7.exitStub: +; CHECK-NEXT: switch i32 [[TMP8]], label [[FINAL_BLOCK_1:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_1:%.*]] +; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_1:%.*]] +; CHECK-NEXT: ] ; CHECK: block_2_to_outline: ; CHECK-NEXT: [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4 ; CHECK-NEXT: [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4 @@ -207,23 +207,23 @@ ; CHECK-NEXT: store i32 [[MUL2]], i32* [[TMP3]], align 4 ; CHECK-NEXT: br label [[BLOCK_7_EXITSTUB:%.*]] ; CHECK: output_block_0_0: -; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4:%.*]], align 4 -; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5:%.*]], align 4 -; CHECK-NEXT: br label [[FINAL_BLOCK_0]] -; CHECK: output_block_0_1: ; CHECK-NEXT: store i32 [[AVAL]], i32* [[TMP6:%.*]], align 4 ; CHECK-NEXT: store i32 [[BVAL]], i32* [[TMP7:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_0_1: +; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4:%.*]], align 4 +; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5:%.*]], align 4 ; CHECK-NEXT: br label [[FINAL_BLOCK_1]] ; CHECK: output_block_1_0: -; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4]], align 4 -; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5]], align 4 -; CHECK-NEXT: br label [[FINAL_BLOCK_0]] -; CHECK: output_block_1_1: ; CHECK-NEXT: store i32 [[ADD]], i32* [[TMP6]], align 4 ; CHECK-NEXT: store i32 [[MUL]], i32* [[TMP7]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_1_1: +; CHECK-NEXT: store i32 [[A2VAL]], i32* [[TMP4]], align 4 +; CHECK-NEXT: store i32 [[B2VAL]], i32* [[TMP5]], align 4 ; CHECK-NEXT: br label [[FINAL_BLOCK_1]] ; CHECK: final_block_0: -; CHECK-NEXT: ret i1 false -; CHECK: final_block_1: ; CHECK-NEXT: ret i1 true +; CHECK: final_block_1: +; CHECK-NEXT: ret i1 false ;