Index: llvm/include/llvm/Transforms/IPO/IROutliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/IROutliner.h +++ llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -142,6 +142,12 @@ /// function has been extracted, the start and end of the BasicBlock /// containing the called function. void reattachCandidate(); + + /// Get the size of the code removed from the region. + /// + /// \param [in] TTI - The TargetTransformInfo for the parent function. + /// \returns the code size of the region + unsigned getBenefit(TargetTransformInfo &TTI); }; /// This class is a pass that identifies similarity in a Module, extracts @@ -199,6 +205,28 @@ bool findAddInputsOutputs(Module &M, OutlinableRegion &Region, DenseSet &NotSame); + /// Find the number of instructions that will be removed by extracting the + /// OutlinableRegions in \p CurrentGroup. + /// + /// \param [in] CurrentGroup - The collection of OutlinableRegions to be + /// analyzed. + /// \returns the number of outlined instructions across all regions. + unsigned findBenefitFromAllRegions(OutlinableGroup &CurrentGroup); + + /// Find the number of instructions that will be added by reloading arguments. + /// + /// \param [in] CurrentGroup - The collection of OutlinableRegions to be + /// analyzed. + /// \returns the number of added reload instructions across all regions. + unsigned findCostOutputReloads(OutlinableGroup &CurrentGroup); + + /// Find the cost and the benefit of \p CurrentGroup and save it back to + /// \p CurrentGroup. + /// + /// \param [in] M - The module being analyzed + /// \param [in,out] CurrentGroup - The overall outlined section + void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup); + /// Update the output mapping based on the load instruction, and the outputs /// of the extracted function. /// @@ -227,6 +255,11 @@ std::vector &FuncsToRemove, unsigned &OutlinedFunctionNum); + /// If false, we do not worry if the cost is greater than the benefit. This + /// is for debugging and testing, so that we can test small cases to ensure + /// that the outlining is being done correctly. + bool CostModel = true; + /// The set of outlined Instructions, identified by their location in the /// sequential ordering of instructions in a Module. DenseSet Outlined; Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -30,6 +30,13 @@ using namespace llvm; using namespace IRSimilarity; +// This is a debug option to test small pieces of code to ensure that outlining +// works correctly. +static cl::opt NoCostModel( + "ir-outlining-no-cost", cl::init(false), cl::ReallyHidden, + cl::desc("Debug option to outline greedily, without restriction that " + "calculated benefit outweighs cost")); + /// The OutlinableGroup holds all the overarching information for outlining /// a set of regions that are structurally similar to one another, such as the /// types of the overall function, the output blocks, the sets of stores needed @@ -63,6 +70,13 @@ /// index in ArgumentTypes is an output argument. unsigned NumAggregateInputs = 0; + /// The number of instructions that will be outlined by extracting \ref + /// Regions. + unsigned Benefit = 0; + /// The number of added instructions needed for the outlining of the \ref + /// Regions. + unsigned Cost = 0; + /// For the \ref Regions, we look at every Value. If it is a constant, /// we check whether it is the same in Region. /// @@ -205,6 +219,40 @@ return false; } +unsigned OutlinableRegion::getBenefit(TargetTransformInfo &TTI) { + unsigned Benefit = 0; + + // Estimate the benefit of outlining a specific sections of the program. We + // delegate mostly this task to the TargetTransformInfo so that if the target + // has specific changes, we can have a more accurate estimate. + + // However, getInstructionCost delegates the code size calculation for + // arithmetic instructions to getArithmeticInstrCost in + // include/Analysis/TargetTransformImpl.h, where it always estimates that the + // code size for a division and remainder instruction to be equal to 4, and + // everything else to 1. This is not an accurate representation of the + // division instruction for targets that have a native division instruction. + // To be overly conservative, we only add 1 to the number of instructions for + // each division instruction. + for (Instruction &I : *StartBB) { + switch (I.getOpcode()) { + case Instruction::FDiv: + case Instruction::FRem: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::UDiv: + case Instruction::URem: + Benefit += 1; + break; + default: + Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize); + break; + } + } + + return Benefit; +} + /// Find whether \p Region matches the global value numbering to Constant /// mapping found so far. /// @@ -1162,6 +1210,150 @@ } } +unsigned IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) { + unsigned RegionBenefit = 0; + for (OutlinableRegion *Region : CurrentGroup.Regions) { + TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent()); + // We add the number of instructions in the region to the benefit as an + // estimate as to how much will be removed. + RegionBenefit += Region->getBenefit(TTI); + LLVM_DEBUG(dbgs() << "Adding: " << RegionBenefit + << " saved instructions to overfall benefit.\n"); + } + + return RegionBenefit; +} + +unsigned IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) { + unsigned OverallCost = 0; + for (OutlinableRegion *Region : CurrentGroup.Regions) { + TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent()); + + // Each output incurs a load after the call, so we add that to the cost. + for (unsigned OutputGVN : Region->GVNStores) { + Optional OV = Region->Candidate->fromGVN(OutputGVN); + assert(OV.hasValue() && "Could not find value for GVN?"); + Value *V = OV.getValue(); + unsigned LoadCost = + TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0, + TargetTransformInfo::TCK_CodeSize); + + LLVM_DEBUG(dbgs() << "Adding: " << LoadCost + << " instructions to cost for output of type " + << *V->getType() << "\n"); + OverallCost += LoadCost; + } + } + + return OverallCost; +} + +/// Find the extra instructions needed to handle any output values for the +/// region. +/// +/// \param [in] M - The Module to outline from. +/// \param [in] CurrentGroup - The collection of OutlinableRegions to analyze. +/// \param [in] TTI - The TargetTransformInfo used to collect information for +/// new instruction costs. +/// \returns the additional cost to handle the outputs. +static unsigned findCostForOutputBlocks(Module &M, + OutlinableGroup &CurrentGroup, + TargetTransformInfo &TTI) { + unsigned OutputCost = 0; + + for (const ArrayRef &OutputUse : + CurrentGroup.OutputGVNCombinations) { + IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate; + for (unsigned GVN : OutputUse) { + Optional OV = Candidate.fromGVN(GVN); + assert(OV.hasValue() && "Could not find value for GVN?"); + Value *V = OV.getValue(); + unsigned StoreCost = + TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0, + TargetTransformInfo::TCK_CodeSize); + + // An instruction cost is added for each store set that needs to occur for + // various output combinations inside the function, plus a branch to + // return to the exit block. + LLVM_DEBUG(dbgs() << "Adding: " << StoreCost + << " instructions to cost for output of type " + << *V->getType() << "\n"); + OutputCost += StoreCost; + } + + unsigned BranchCost = + TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize); + LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for" + << " a branch instruction\n"); + OutputCost += BranchCost; + } + + // If there is more than one output scheme, we must have a comparison and + // branch for each different item in the switch statement. + if (CurrentGroup.OutputGVNCombinations.size() > 1) { + unsigned ComparisonCost = TTI.getCmpSelInstrCost( + Instruction::ICmp, Type::getInt32Ty(M.getContext()), + Type::getInt32Ty(M.getContext()), TargetTransformInfo::TCK_CodeSize); + unsigned BranchCost = + TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize); + + unsigned DifferentBlocks = CurrentGroup.OutputGVNCombinations.size(); + unsigned TotalCost = ComparisonCost * BranchCost * DifferentBlocks; + + LLVM_DEBUG(dbgs() << "Adding: " << TotalCost + << " instructions for each switch case for each different" + << " output path in a function\n"); + OutputCost += TotalCost; + } + + return OutputCost; +} + +void IROutliner::findCostBenefit(Module &M, OutlinableGroup &CurrentGroup) { + unsigned RegionBenefit = findBenefitFromAllRegions(CurrentGroup); + CurrentGroup.Benefit += RegionBenefit; + LLVM_DEBUG(dbgs() << "Current Benefit: " << CurrentGroup.Benefit << "\n"); + + unsigned OutputReloadCost = findCostOutputReloads(CurrentGroup); + CurrentGroup.Cost += OutputReloadCost; + LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); + + unsigned AverageRegionBenefit = RegionBenefit / CurrentGroup.Regions.size(); + unsigned OverallArgumentNum = CurrentGroup.ArgumentTypes.size(); + unsigned NumRegions = CurrentGroup.Regions.size(); + TargetTransformInfo &TTI = + getTTI(*CurrentGroup.Regions[0]->Candidate->getFunction()); + + // We add one region to the cost once, to account for the instructions added + // inside of the newly created function. + LLVM_DEBUG(dbgs() << "Adding: " << AverageRegionBenefit + << " instructions to cost for body of new function.\n"); + CurrentGroup.Cost += AverageRegionBenefit; + LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); + + // For each argument, we must add an instruction for loading the argument + // out of the register and into a value inside of the newly outlined function. + LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum + << " instructions to cost for each argument in the new" + << " function.\n"); + CurrentGroup.Cost += 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic; + LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); + + // Each argument needs to either be loaded into a register or onto the stack. + // Some arguments will only be loaded into the stack once the argument + // registers are filled. + LLVM_DEBUG(dbgs() << "Adding: " << OverallArgumentNum + << " instructions to cost for each argument in the new" + << " function " << NumRegions << " times for the " + << "needed argument handling at the call site.\n"); + CurrentGroup.Cost += + 2 * OverallArgumentNum * TargetTransformInfo::TCC_Basic * NumRegions; + LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); + + CurrentGroup.Cost += findCostForOutputBlocks(M, CurrentGroup, TTI); + LLVM_DEBUG(dbgs() << "Current Cost: " << CurrentGroup.Cost << "\n"); +} + void IROutliner::updateOutputMapping(OutlinableRegion &Region, ArrayRef Outputs, LoadInst *LI) { // For and load instructions following the call @@ -1319,6 +1511,19 @@ CurrentGroup.collectGVNStoreSets(M); + if (CostModel) + findCostBenefit(M, CurrentGroup); + + // If we are adhering to the cost model, reattach all the candidates + if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) { + for (OutlinableRegion *OS : CurrentGroup.Regions) + OS->reattachCandidate(); + continue; + } + + LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost + << " and benefit " << CurrentGroup.Benefit << "\n"); + // Create functions out of all the sections, and mark them as outlined. std::vector OutlinedRegions; for (OutlinableRegion *OS : OutlinableRegions) { @@ -1348,7 +1553,11 @@ return OutlinedFunctionNum; } -bool IROutliner::run(Module &M) { return doOutline(M) > 0; } +bool IROutliner::run(Module &M) { + CostModel = !NoCostModel; + + return doOutline(M) > 0; +} // Pass Manager Boilerplate class IROutlinerLegacyPass : public ModulePass { Index: llvm/test/Transforms/IROutliner/extraction.ll =================================================================== --- llvm/test/Transforms/IROutliner/extraction.ll +++ llvm/test/Transforms/IROutliner/extraction.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test makes sure we are extracting the found similarity sections ; correctly at the call site. Index: llvm/test/Transforms/IROutliner/illegal-allocas.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-allocas.ll +++ llvm/test/Transforms/IROutliner/illegal-allocas.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; Show that we do not extract allocas, as outlining allocas may cause ; inconsistencies with the CodeExtractor's algorithm. Index: llvm/test/Transforms/IROutliner/illegal-assumes.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test ensures that we do not include llvm.assumes. There are exceptions ; in the CodeExtractor's algorithm for llvm.assumes, so we ignore it for now. Index: llvm/test/Transforms/IROutliner/illegal-branches.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-branches.ll +++ llvm/test/Transforms/IROutliner/illegal-branches.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; Show that we do not extract sections with branches as it would require extra ; label and control flow checking. Index: llvm/test/Transforms/IROutliner/illegal-callbr.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-callbr.ll +++ llvm/test/Transforms/IROutliner/illegal-callbr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that we do not outline callbr instruction since as we do not ; outline any control flow change instructions. Index: llvm/test/Transforms/IROutliner/illegal-calls.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-calls.ll +++ llvm/test/Transforms/IROutliner/illegal-calls.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that we do not outline calls. Special calls, such as ; indirect or nameless calls require extra handling to ensure that there Index: llvm/test/Transforms/IROutliner/illegal-catchpad.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-catchpad.ll +++ llvm/test/Transforms/IROutliner/illegal-catchpad.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that catchpad instructions are not outlined even if they ; in a similar section. Dealing with exception handling inside of an outlined Index: llvm/test/Transforms/IROutliner/illegal-cleanup.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-cleanup.ll +++ llvm/test/Transforms/IROutliner/illegal-cleanup.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that cleanuppad instructions are not outlined even if they ; in a similar section. Dealing with exception handling inside of an outlined Index: llvm/test/Transforms/IROutliner/illegal-frozen.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-frozen.ll +++ llvm/test/Transforms/IROutliner/illegal-frozen.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; Show that we do not extract freeze instructions, since extra handling is ; required to mark any outputs used with freeze. Index: llvm/test/Transforms/IROutliner/illegal-gep.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-gep.ll +++ llvm/test/Transforms/IROutliner/illegal-gep.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks to make sure that we do not outline getelementptr ; instructions since we must make extra checks on the final operands. Index: llvm/test/Transforms/IROutliner/illegal-invoke.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-invoke.ll +++ llvm/test/Transforms/IROutliner/illegal-invoke.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that invoke instructions are not outlined even if they ; in a similar section. Outlining does not currently handle control flow Index: llvm/test/Transforms/IROutliner/illegal-landingpad.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-landingpad.ll +++ llvm/test/Transforms/IROutliner/illegal-landingpad.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that landingpad instructions are not outlined even if they ; in a similar section. Dealing with exception handling inside of an outlined Index: llvm/test/Transforms/IROutliner/illegal-memcpy.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memcpy.ll +++ llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. Index: llvm/test/Transforms/IROutliner/illegal-memmove.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. Index: llvm/test/Transforms/IROutliner/illegal-memset.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memset.ll +++ llvm/test/Transforms/IROutliner/illegal-memset.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test checks that we do not outline memset intrinsics since it requires ; extra address space checks. Index: llvm/test/Transforms/IROutliner/illegal-phi-nodes.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-phi-nodes.ll +++ llvm/test/Transforms/IROutliner/illegal-phi-nodes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; Show that we do not extract phi nodes as it would require extra label and ; control flow checking. Index: llvm/test/Transforms/IROutliner/illegal-vaarg.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test ensures that we do not outline vararg instructions or intrinsics, as ; they may cause inconsistencies when outlining. Index: llvm/test/Transforms/IROutliner/legal-debug.ll =================================================================== --- llvm/test/Transforms/IROutliner/legal-debug.ll +++ llvm/test/Transforms/IROutliner/legal-debug.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test looks ahecks that debug info is extracted along with the other ; instructions. Index: llvm/test/Transforms/IROutliner/outlining-address-taken.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-address-taken.ll +++ llvm/test/Transforms/IROutliner/outlining-address-taken.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test shows that we do not outline from basic blocks with their address ; taken. Index: llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll +++ llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test looks at instances of constants in the different regions. If there ; is a register in the same place as a constant in a similar region of code, we Index: llvm/test/Transforms/IROutliner/outlining-cost-model.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-cost-model.ll @@ -0,0 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s -check-prefix=NOCOST + +; This test checks that we have different results from when the cost model +; is on versus when it is off. That is, if the number of instructions needed to +; handle the arguments is greater than the number of instructions being added, +; we do not outline. + +define void @function1() #0 { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: ret void +; +; NOCOST-LABEL: @function1( +; NOCOST-NEXT: entry: +; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]]) +; NOCOST-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %add = add i32 %0, %1 + %mul = mul i32 %0, %1 + %sub = sub i32 %0, %1 + %div = sdiv i32 %0, %1 + %add2 = add i32 %0, %1 + %mul2 = mul i32 %0, %1 + %sub2 = sub i32 %0, %1 + %div2 = sdiv i32 %0, %1 + ret void +} + +define void @function2() #0 { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: ret void +; +; NOCOST-LABEL: @function2( +; NOCOST-NEXT: entry: +; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]]) +; NOCOST-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %add = add i32 %0, %1 + %mul = mul i32 %0, %1 + %sub = sub i32 %0, %1 + %div = sdiv i32 %0, %1 + %add2 = add i32 %0, %1 + %mul2 = mul i32 %0, %1 + %sub2 = sub i32 %0, %1 + %div2 = sdiv i32 %0, %1 + ret void +} + +define void @function3() #0 { +; CHECK-LABEL: @function3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP2]], [[ADD]] +; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4 +; CHECK-NEXT: ret void +; +; NOCOST-LABEL: @function3( +; NOCOST-NEXT: entry: +; NOCOST-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* +; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; NOCOST-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* +; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; NOCOST-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) +; NOCOST-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 +; NOCOST-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 +; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; NOCOST-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; NOCOST-NEXT: call void @outlined_ir_func_2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; NOCOST-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %add = add i32 %0, %1 + store i32 %add, i32* %output, align 4 + %2 = load i32, i32* %output, align 4 + %3 = load i32, i32* %output, align 4 + %mul = mul i32 %2, %add + store i32 %mul, i32* %result, align 4 + ret void +} + +define void @function4() #0 { +; CHECK-LABEL: @function4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP2]], [[ADD]] +; CHECK-NEXT: store i32 [[MUL]], i32* [[RESULT]], align 4 +; CHECK-NEXT: ret void +; +; NOCOST-LABEL: @function4( +; NOCOST-NEXT: entry: +; NOCOST-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[A:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[B:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4 +; NOCOST-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* +; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; NOCOST-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* +; NOCOST-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; NOCOST-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) +; NOCOST-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 +; NOCOST-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 +; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; NOCOST-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; NOCOST-NEXT: call void @outlined_ir_func_2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; NOCOST-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %output = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + %0 = load i32, i32* %a, align 4 + %1 = load i32, i32* %b, align 4 + %add = add i32 %0, %1 + store i32 %add, i32* %output, align 4 + %2 = load i32, i32* %output, align 4 + %mul = mul i32 %2, %add + store i32 %mul, i32* %result, align 4 + ret void +} Index: llvm/test/Transforms/IROutliner/outlining-debug-statements.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-debug-statements.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test makes sure that we do not include debug statements in outlined +; functions. + +define void @outline_dbg1() { +; CHECK-LABEL: @outline_dbg1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + call void @llvm.dbg.value(metadata i64 0, metadata !14, metadata !DIExpression()), !dbg !14 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +define void @outline_dbg2() { +; CHECK-LABEL: @outline_dbg2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 + +!0 = !DIFile(filename: "foo.c", directory: "/tmp") +!1 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!5 = distinct !DICompileUnit(language: DW_LANG_C, file: !0, producer: "My Compiler", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !6, retainedTypes: !6, globals: !7) +!6 = !{} +!7 = !{} +!11 = distinct !DISubprogram(name: "func_5", scope: !0, file: !0, line: 117, type: !12, isLocal: true, isDefinition: true, scopeLine: 118, isOptimized: false, unit: !5, retainedNodes: !6) +!12 = !DISubroutineType(types: !13) +!13 = !{} +!14 = !DILocalVariable(name: "p_6", arg: 1, scope: !11, line: 117, type: !1) Index: llvm/test/Transforms/IROutliner/outlining-different-constants.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-different-constants.ll +++ llvm/test/Transforms/IROutliner/outlining-different-constants.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost< %s | FileCheck %s ; This test looks at the constants in the regions, and if it they are the ; differents it elevates the constants to arguments. Index: llvm/test/Transforms/IROutliner/outlining-different-globals.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-different-globals.ll +++ llvm/test/Transforms/IROutliner/outlining-different-globals.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test looks at the globals in the regions, and makes sure they are not ; outlined if they are different values. Index: llvm/test/Transforms/IROutliner/outlining-different-output-blocks.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-different-output-blocks.ll +++ llvm/test/Transforms/IROutliner/outlining-different-output-blocks.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -iroutliner < %s | FileCheck %s +; RUN: opt -S -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; These functions are constructed slightly differently so that they require ; different output blocks for the values used outside of the region. We are Index: llvm/test/Transforms/IROutliner/outlining-different-structure.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-different-structure.ll +++ llvm/test/Transforms/IROutliner/outlining-different-structure.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner --ir-outlining-no-cost < %s | FileCheck %s ; This is a negative case to show that when we have the same set of ; instructions, but in a different order, they are not outlined in the same way. Index: llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll +++ llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -iroutliner < %s | FileCheck %s +; RUN: opt -S -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test tests that inputs that are replaced with the output of an outlined ; function is still recognized as the same value. Index: llvm/test/Transforms/IROutliner/outlining-same-constants.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-same-constants.ll +++ llvm/test/Transforms/IROutliner/outlining-same-constants.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test looks at the constants in the regions, and if it they are the ; same it outlines them as constants rather than elevating them to arguments. Index: llvm/test/Transforms/IROutliner/outlining-same-globals.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-same-globals.ll +++ llvm/test/Transforms/IROutliner/outlining-same-globals.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s @global1 = global i32 1, align 4 @global2 = global i32 2, align 4 Index: llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll +++ llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -iroutliner < %s | FileCheck %s +; RUN: opt -S -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; These functions are constructed slightly differently so that they require ; the same output blocks for the values used outside of the region. We are