Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -53,6 +53,11 @@ /// The return block for the overall function. BasicBlock *EndBB = nullptr; + /// A set containing the different GVN store sets needed. Each array contains + /// a sorted list of the different values that need to be stored into output + /// registers. + DenseSet> OutputGVNCombinations; + /// Flag for whether the \ref ArgumentTypes have been defined after the /// extraction of the first region. bool InputTypesSet = false; @@ -67,6 +72,13 @@ /// \param [in,out] NotSame contains the global value numbers where the /// constant is not always the same, and must be passed in as an argument. void findSameConstants(DenseSet &NotSame); + + /// For the regions, look at each set of GVN stores needed and account for + /// each combination. Add an argument to the argument types if there is + /// more than one combination. + /// + /// \param [in] M - The module we are outlining from. + void collectGVNStoreSets(Module &M); }; /// Move the contents of \p SourceBB to before the last instruction of \p @@ -265,6 +277,17 @@ collectRegionsConstants(*Region, GVNToConstant, NotSame); } +void OutlinableGroup::collectGVNStoreSets(Module &M) { + for (OutlinableRegion *OS : Regions) + OutputGVNCombinations.insert(OS->GVNStores); + + // We are adding an extracted argument to decide between which output path + // to use in the basic block. It is used in a switch statement and only + // needs to be an integer. + if (OutputGVNCombinations.size() > 1) + ArgumentTypes.push_back(Type::getInt32Ty(M.getContext())); +} + Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group, unsigned FunctionNameSuffix) { assert(!Group.OutlinedFunction && "Function is already defined!"); @@ -653,7 +676,7 @@ for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) { if (AggArgIdx == AggFunc->arg_size() - 1 && - Group.ArgumentTypes.size() > Group.NumAggregateInputs) { + Group.OutputGVNCombinations.size() > 1) { // If we are on the last argument, and we need to differentiate between // output blocks, add an integer to the argument list to determine // what block to take @@ -800,6 +823,50 @@ } } +/// It is possible that there is a basic block that already performs the same +/// stores. If there is, we remove it the new output block. If it does not, +/// we add it to our list of output blocks. +/// +/// \param OutputBB [in] the block we are looking for a duplicate of. +/// \param OutputStoreBBs [in] The existing output blocks. +/// \returns an optional value with the number output block if there is a match. +Optional +findDuplicateOutputBlock(BasicBlock *OutputBB, + ArrayRef OutputStoreBBs) { + + bool WrongInst = false; + bool WrongSize = false; + unsigned MatchingNum = 0; + for (BasicBlock *CompBB : OutputStoreBBs) { + WrongInst = false; + if (CompBB->size() - 1 != OutputBB->size()) { + WrongSize = true; + MatchingNum++; + continue; + } + + WrongSize = false; + BasicBlock::iterator NIt = OutputBB->begin(); + for (Instruction &I : *CompBB) { + if (BranchInst *BI = dyn_cast(&I)) + continue; + + if (!I.isIdenticalTo(&(*NIt))) { + WrongInst = true; + break; + } + + NIt++; + } + if (!WrongInst && !WrongSize) + return MatchingNum; + + MatchingNum++; + } + + return None; +} + /// For the outlined section, move needed the StoreInsts for the output /// registers into their own block. Then, determine if there is a duplicate /// output block already created. @@ -872,6 +939,34 @@ } assert(ValuesToFind.size() == 0 && "Not all store values were handled!"); + + // If the size of the block is 0, then there are no stores, and we do not + // need to save this block. + if (OutputBB->size() == 0) { + Region.OutputBlockNum = -1; + OutputBB->eraseFromParent(); + return; + } + + Optional MatchingBB = + findDuplicateOutputBlock(OutputBB, OutputStoreBBs); + + LLVM_DEBUG(dbgs() << "Set output block for region in function" + << Region.ExtractedFunction << " to " + << MatchingBB.getValue()); + if (MatchingBB.hasValue()) { + Region.OutputBlockNum = MatchingBB.getValue(); + OutputBB->eraseFromParent(); + return; + } + + Region.OutputBlockNum = OutputStoreBBs.size(); + + LLVM_DEBUG(dbgs() << "Create output block for region in" + << Region.ExtractedFunction << " to " + << *OutputBB); + OutputStoreBBs.push_back(OutputBB); + BranchInst::Create(EndBB, OutputBB); } /// Create the switch statement for outlined function to differentiate between @@ -886,27 +981,46 @@ /// \param [in,out] OutputStoreBBs - The existing output blocks. void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB, ArrayRef OutputStoreBBs) { - Function *AggFunc = OG.OutlinedFunction; - // Create a final block - BasicBlock *ReturnBlock = - BasicBlock::Create(M.getContext(), "final_block", AggFunc); - Instruction *Term = EndBB->getTerminator(); - Term->moveBefore(*ReturnBlock, ReturnBlock->end()); - // Put the switch statement in the old end basic block for the function with - // a fall through to the new return block - LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for " - << OutputStoreBBs.size() << "\n"); - SwitchInst *SwitchI = - SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1), ReturnBlock, - OutputStoreBBs.size(), EndBB); - - unsigned Idx = 0; - for (BasicBlock *BB : OutputStoreBBs) { - SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), - BB); - Term = BB->getTerminator(); - Term->setSuccessor(0, ReturnBlock); - Idx++; + // We only need the switch statement if there is more than one store + // combination. + if (OG.OutputGVNCombinations.size() > 1) { + Function *AggFunc = OG.OutlinedFunction; + // Create a final block + BasicBlock *ReturnBlock = + BasicBlock::Create(M.getContext(), "final_block", AggFunc); + Instruction *Term = EndBB->getTerminator(); + Term->moveBefore(*ReturnBlock, ReturnBlock->end()); + // Put the switch statement in the old end basic block for the function with + // a fall through to the new return block + LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for " + << OutputStoreBBs.size() << "\n"); + SwitchInst *SwitchI = + SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1), + ReturnBlock, OutputStoreBBs.size(), EndBB); + + unsigned Idx = 0; + for (BasicBlock *BB : OutputStoreBBs) { + SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), + BB); + Term = BB->getTerminator(); + Term->setSuccessor(0, ReturnBlock); + Idx++; + } + return; + } + + // If there needs to be stores, move them from the output block to the end + // block to save on branching instructions + if (OutputStoreBBs.size() == 1) { + LLVM_DEBUG(dbgs() << "Move store instructions to the end block in " + << *OG.OutlinedFunction << "\n"); + BasicBlock *OutputBlock = OutputStoreBBs[0]; + Instruction *Term = OutputBlock->getTerminator(); + Term->eraseFromParent(); + Term = EndBB->getTerminator(); + moveBBContents(*OutputBlock, *EndBB); + Term->moveBefore(*EndBB, EndBB->end()); + OutputBlock->eraseFromParent(); } return; @@ -950,11 +1064,16 @@ replaceArgumentUses(*CurrentOS, NewBB); replaceConstants(*CurrentOS); - if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs) { + // If the new basic block has no new stores, we can erase it from the module. + // It it does, we create a branch instruction to the last basic block from the + // new one. + if (NewBB->size() == 0) { + CurrentOS->OutputBlockNum = -1; + NewBB->eraseFromParent(); + } else { BranchInst::Create(CurrentGroup.EndBB, NewBB); OutputStoreBBs.push_back(NewBB); - } else - NewBB->eraseFromParent(); + } // Replace the call to the extracted function with the outlined function. CurrentOS->Call = replaceCalledFunction(M, *CurrentOS); @@ -984,23 +1103,16 @@ CurrentGroup.OutlinedFunction); replaceArgumentUses(*CurrentOS, NewBB); - if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs) { - alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB, - CurrentGroup.EndBB, OutputMappings, - OutputStoreBBs); - BranchInst::Create(CurrentGroup.EndBB, NewBB); - CurrentOS->OutputBlockNum = OutputStoreBBs.size(); - OutputStoreBBs.push_back(NewBB); - } else - NewBB->eraseFromParent(); + alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB, + CurrentGroup.EndBB, OutputMappings, + OutputStoreBBs); CurrentOS->Call = replaceCalledFunction(M, *CurrentOS); FuncsToRemove.push_back(CurrentOS->ExtractedFunction); } // Create a switch statement to handle the different output schemes. - if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs) - createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs); + createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs); OutlinedFunctionNum++; } @@ -1215,11 +1327,7 @@ if (CurrentGroup.Regions.empty()) continue; - // We are adding an extracted argument to decide between which output path - // to use in the basic block. It is used in a switch statement and only - // needs to be an integer. - if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs) - CurrentGroup.ArgumentTypes.push_back(Type::getInt32Ty(M.getContext())); + CurrentGroup.collectGVNStoreSets(M); // Create functions out of all the sections, and mark them as outlined OutlinedRegions.clear(); Index: llvm/test/Transforms/IROutliner/extraction.ll =================================================================== --- llvm/test/Transforms/IROutliner/extraction.ll +++ llvm/test/Transforms/IROutliner/extraction.ll @@ -63,7 +63,7 @@ ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 0) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) ; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 ; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) @@ -105,7 +105,7 @@ ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 1) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) ; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 ; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) Index: llvm/test/Transforms/IROutliner/illegal-assumes.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, i1* [[D]], i1* [[DL_LOC]], i32 0) +; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, i1* [[D]], i1* [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] @@ -51,7 +51,7 @@ ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, i1* [[D]], i1* [[DL_LOC]], i32 1) +; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, i1* [[D]], i1* [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) Index: llvm/test/Transforms/IROutliner/illegal-memcpy.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memcpy.ll +++ llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -14,14 +14,14 @@ ; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 0) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) ; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 ; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 0) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) ; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) ; CHECK-NEXT: ret i8 [[RET_RELOAD]] @@ -43,14 +43,14 @@ ; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 1) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) ; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 ; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 1) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) ; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) ; CHECK-NEXT: ret i8 [[RET_RELOAD]] Index: llvm/test/Transforms/IROutliner/illegal-memmove.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -14,14 +14,14 @@ ; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 0) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) ; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 ; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 0) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) ; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) ; CHECK-NEXT: ret i8 [[RET_RELOAD]] @@ -43,14 +43,14 @@ ; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 1) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) ; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 ; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 1) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) ; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) ; CHECK-NEXT: ret i8 [[RET_RELOAD]] Index: llvm/test/Transforms/IROutliner/illegal-vaarg.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]], i32 0) +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) ; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) @@ -56,7 +56,7 @@ ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]], i32 1) +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) ; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) Index: llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll +++ llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32 2, i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 0) +; CHECK-NEXT: call void @outlined_ir_func_0(i32 2, i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) ; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 ; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) @@ -34,7 +34,7 @@ ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST4]]) ; CHECK-NEXT: [[LT_CAST5:%.*]] = bitcast i32* [[DOTLOC2]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST5]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD_RELOAD]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[OUTPUT2]], i32* [[ADD2_LOC]], i32* [[DOTLOC2]], i32 1) +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD_RELOAD]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[OUTPUT2]], i32* [[ADD2_LOC]], i32* [[DOTLOC2]]) ; CHECK-NEXT: [[ADD2_RELOAD:%.*]] = load i32, i32* [[ADD2_LOC]], align 4 ; CHECK-NEXT: [[DOTRELOAD3:%.*]] = load i32, i32* [[DOTLOC2]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST4]]) Index: llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll +++ llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 0) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) ; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 ; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) @@ -58,7 +58,7 @@ ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) ; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 1) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) ; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 ; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) @@ -83,25 +83,16 @@ ret void } -; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]], i32* [[ARG4:%.*]], i32 [[ARG5:%.*]]) #1 { +; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]], i32* [[ARG4:%.*]]) #1 { ; CHECK: entry_after_outline.exitStub: -; CHECK-NEXT: switch i32 [[ARG5]], label [[BLOCK:%.*]] [ -; CHECK-NEXT: i32 0, label %[[BLOCK_0:.*]] -; CHECK-NEXT: i32 1, label %[[BLOCK_1:.*]] +; CHECK-NEXT: store i32 [[ADD:%.*]], i32* [[ARG3]], align 4 +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[ARG4]], align 4 ; CHECK: entry_to_outline: ; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 ; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARG0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG1]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[ADD]] = add i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[ARG2]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARG2]], align 4 - -; CHECK: [[BLOCK_0]]: -; CHECK-NEXT: store i32 [[ADD]], i32* [[ARG3]], align 4 -; CHECK-NEXT: store i32 [[TMP2]], i32* [[ARG4]], align 4 - -; CHECK: [[BLOCK_1]]: -; CHECK-NEXT: store i32 [[ADD]], i32* [[ARG3]], align 4 -; CHECK-NEXT: store i32 [[TMP2]], i32* [[ARG4]], align 4 +; CHECK-NEXT: [[TMP2]] = load i32, i32* [[ARG2]], align 4