Index: llvm/include/llvm/Transforms/IPO/IROutliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/IROutliner.h +++ llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -154,6 +154,13 @@ pruneIncompatibleRegions(std::vector &CandidateVec, OutlinableGroup &CurrentGroup); + /// Identify the needed extracted inputs in a section, and add to the overall + /// function if needed. + /// + /// \param [in] M - The module to outline from. + /// \param [in,out] Region - The region to be extracted + void findAddInputsOutputs(Module &M, OutlinableRegion &Region); + /// Extract \p Region into its own function. /// /// \param [in] Region - The region to be extracted into its own function. @@ -177,8 +184,7 @@ /// Custom InstVisitor to classify different instructions for whether it can /// be analyzed for similarity. This is needed as there may be instruction we /// can identify as having similarity, but are more complicated to outline. - struct InstructionAllowed - : public InstVisitor { + struct InstructionAllowed : public InstVisitor { InstructionAllowed() {} // TODO: Determine a scheme to resolve when the label is similar enough. @@ -198,13 +204,9 @@ // DebugInfo should be included in the regions, but should not be // analyzed for similarity as it has no bearing on the outcome of the // program. - bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { - return true; - } + bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } // TODO: Handle GetElementPtrInsts - bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { - return false; - } + bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { return false; } // TODO: Handle specific intrinsics individually from those that can be // handled. bool IntrinsicInst(IntrinsicInst &II) { return false; } @@ -221,9 +223,7 @@ bool visitCallBrInst(CallBrInst &CBI) { return false; } // TODO: Handle interblock similarity. bool visitTerminator(Instruction &I) { return false; } - bool visitInstruction(Instruction &I) { - return true; - } + bool visitInstruction(Instruction &I) { return true; } }; /// A InstVisitor used to exclude certain instructions from being outlined. Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -41,6 +41,13 @@ /// Flag for whether we should not consider this group of OutlinableRegions /// for extraction. bool IgnoreGroup = false; + + /// For the \ref Regions, we look at every Value. If it is a constant, + /// we check whether it is the same in Region. + /// + /// \param [in,out] NotSame contains the global value numbers where the + /// constant is not always the same, and must be passed in as an argument. + void findSameConstants(DenseSet &NotSame); }; /// Move the contents of \p SourceBB to before the last instruction of \p @@ -144,6 +151,198 @@ CandidateSplit = false; } +/// Find whether \p V matches the Constants previously found for the \p GVN. +/// +/// \param V - The value to check for consistency. +/// \param GVN - The global value number assigned to \p V. +/// \param GVNToConstant - The mapping of global value number to Constants. +/// \returns true if the Value matches the Constant mapped to by V and false if +/// it \p V is a Constant but does not match. +/// \returns None if \p V is not a Constant. +static Optional +constantMatches(Value *V, unsigned GVN, + DenseMap &GVNToConstant) { + // See if we have a constants + Constant *CST = dyn_cast(V); + if (!CST) + return None; + + // Holds a mapping from a global value number to a Constant. + DenseMap::iterator GVNToConstantIt; + bool Inserted; + + // If we have a constant, try to make a new entry in the GVNToConstant. + std::tie(GVNToConstantIt, Inserted) = + GVNToConstant.insert(std::make_pair(GVN, CST)); + // If it was found and is not equal, it is not the same. We do not + // handle this case yet, and exit early. + if (Inserted || (GVNToConstantIt->second == CST)) + return true; + + return false; +} + +/// Find whether \p Region matches the global value numbering to Constant mapping +/// found so far. +/// +/// \param Region - The OutlinableRegion we are checking for constants +/// \param NotSame - The set of global value numbers that do not have the same +/// constant in each region. +/// \returns true if all Constants are the same in every use of a Constant in \p +/// Region and false if not +static bool +collectRegionsConstants(OutlinableRegion &Region, + DenseMap &GVNToConstant, + DenseSet &NotSame) { + IRSimilarityCandidate &C = *Region.Candidate; + for (IRInstructionData &ID : C) { + + // Iterate over the operands in an instruction. If the global value number, + // assigned by the IRSimilarityCandidate, has been seen before, we check if + // the the number has been found to be not the same value in each instance. + for (Value *V : ID.OperVals) { + Optional GVNOpt = C.getGVN(V); + assert(GVNOpt.hasValue() && "Expected a GVN for operand?"); + unsigned GVN = GVNOpt.getValue(); + + // If this global value has been found to not be the same, it could have + // just been a register, check that it is not a constant value. + if (NotSame.find(GVN) != NotSame.end()) { + if (isa(V)) + return false; + continue; + } + + // If it has been the same so far, we check the value for if the + // associated Constant value match the previous instances of the same + // global value number. If the global value does not map to a Constant, + // it is considered to not be the same value. + Optional ConstantMatches = constantMatches(V, GVN, GVNToConstant); + if (ConstantMatches.hasValue()) { + if (ConstantMatches.getValue()) + continue; + else + return false; + } + + // While this value is a register, it might not have been previously, + // make sure we don't already have a constant mapped to this global value + // number. + if (GVNToConstant.find(GVN) != GVNToConstant.end()) + return false; + + NotSame.insert(GVN); + } + } + + return true; +} + +void OutlinableGroup::findSameConstants(DenseSet &NotSame) { + DenseMap GVNToConstant; + + for (OutlinableRegion *Region : Regions) + if (!collectRegionsConstants(*Region, GVNToConstant, NotSame)) { + IgnoreGroup = true; + return; + } +} + +/// Find the GVN for the inputs that have been found by the CodeExtractor, +/// excluding the ones that will be removed by llvm.assumes as these will be +/// removed by the CodeExtractor. +/// +/// \param [in] C - The IRSimilarityCandidate containing the region we are +/// analyzing. +/// \param [in] CurrentInputs - The set of inputs found by the +/// CodeExtractor. +/// \param [out] CurrentInputNumbers - The global value numbers for the extracted +/// arguments. +static void mapInputsToGVNs(IRSimilarityCandidate &C, + SetVector &CurrentInputs, + std::vector &EndInputNumbers) { + // Get the global value number for each input. + for (Value *Input : CurrentInputs) { + assert(Input && "Have a nullptr as an input"); + assert(C.getGVN(Input).hasValue() && + "Could not find a numbering for the given input"); + EndInputNumbers.push_back(C.getGVN(Input).getValue()); + } +} + +/// Find the input GVNs and the output values for a region of Instructions. +/// Using the code extractor, we collect the inputs to the extracted function. +/// +/// The \p Region can be identifed as needing to be ignored in this function. +/// It should be checked whether it should be ignored after a call to this +/// function. +/// +/// \param [in,out] Region - The region of code to be analyzed. +/// \param [out] Inputs - The global value numbers for the extracted arguments. +/// \param [out] ArgInputs - The values of the inputs to the extracted function. +static void getCodeExtractorArguments(OutlinableRegion &Region, + std::vector &InputGVNs, + SetVector &ArgInputs) { + IRSimilarityCandidate &C = *Region.Candidate; + + // OverallInputs are the inputs to the region found by the CodeExtractor, + // SinkCands and HoistCands are used by the CodeExtractor to find sunken + // allocas of values whose lifetimes are contained completely within the + // outlined region. Outputs are values used outside of the outlined region + // found by the CodeExtractor. + SetVector OverallInputs, SinkCands, HoistCands, Outputs; + + // Use the code extractor to get the inputs and outputs, without sunken + // allocas or removing llvm.assumes. + CodeExtractor *CE = Region.CE; + CE->findInputsOutputs(OverallInputs, Outputs, SinkCands); + assert(Region.StartBB && "Region must have a start BasicBlock!"); + Function *OrigF = Region.StartBB->getParent(); + CodeExtractorAnalysisCache CEAC(*OrigF); + BasicBlock *Dummy = nullptr; + + // The region may be ineligible due to VarArgs in the parent function. In this + // case we ignore the region. + if (!CE->isEligible()) { + Region.IgnoreRegion = true; + return; + } + + // Find if any values are going to be sunk into the function when extracted + CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy); + CE->findInputsOutputs(ArgInputs, Outputs, SinkCands); + + // TODO: Support regions with output values. Outputs add an extra layer of + // resolution that adds too much complexity at this stage. + if (Outputs.size() > 0) { + Region.IgnoreRegion = true; + return; + } + + // TODO: Support regions with sunken allocas: values whose lifetimes are + // contained completely within the outlined region. These are not guaranteed + // to be the same in every region, so we must elevate them all to arguments + // when they appear. If these values are not equal, it means there is some + // Input in OverallInputs that was removed for ArgInputs. + if (ArgInputs.size() != OverallInputs.size()) { + Region.IgnoreRegion = true; + return; + } + + mapInputsToGVNs(C, OverallInputs, InputGVNs); +} + +void IROutliner::findAddInputsOutputs( + Module &M, OutlinableRegion &Region) { + std::vector Inputs; + SetVector ArgInputs; + + getCodeExtractorArguments(Region, Inputs, ArgInputs); + + if (Region.IgnoreRegion) + return; +} + void IROutliner::pruneIncompatibleRegions( std::vector &CandidateVec, OutlinableGroup &CurrentGroup) { @@ -273,6 +472,7 @@ RHS[0].getLength() * RHS.size(); }); + DenseSet NotSame; // Iterate over the possible sets of similarity. for (SimilarityGroup &CandidateVec : SimilarityCandidates) { OutlinableGroup CurrentGroup; @@ -286,7 +486,18 @@ if (CurrentGroup.Regions.size() < 2) continue; - // Create a CodeExtractor for each outlinable region. + // Determine if there are any values that are the same constant throughout + // each section in the set. + NotSame.clear(); + CurrentGroup.findSameConstants(NotSame); + + if (CurrentGroup.IgnoreGroup) + continue; + + // Create a CodeExtractor for each outlinable region. Identify inputs and + // outputs for each section using the code extractor and create the argument + // types for the Aggregate Outlining Function. + std::vector OutlinedRegions; for (OutlinableRegion *OS : CurrentGroup.Regions) { // Break the outlinable region out of its parent BasicBlock into its own // BasicBlocks (see function implementation). @@ -295,10 +506,17 @@ OS->CE = new (OutlinerAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); + findAddInputsOutputs(M, *OS); + if (!OS->IgnoreRegion) + OutlinedRegions.push_back(OS); + else + OS->reattachCandidate(); } - // Create functions out of all the sections, and mark them as outlined - std::vector OutlinedRegions; + CurrentGroup.Regions = std::move(OutlinedRegions); + + // Create functions out of all the sections, and mark them as outlined. + OutlinedRegions.clear(); for (OutlinableRegion *OS : CurrentGroup.Regions) { OutlinedFunctionNum++; bool FunctionOutlined = extractSection(*OS); Index: llvm/test/Transforms/IROutliner/extraction.ll =================================================================== --- llvm/test/Transforms/IROutliner/extraction.ll +++ llvm/test/Transforms/IROutliner/extraction.ll @@ -48,26 +48,24 @@ ret void } +; There are potential ouptuts in this sections, but we do not extract sections +; with outputs right now, since they cannot be consolidated. define void @extract_outs1() #0 { ; CHECK-LABEL: @extract_outs1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @extract_outs1.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) -; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 -; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4 -; CHECK-NEXT: call void @extract_outs1.outlined.1(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: call void @extract_outs1.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]]) ; CHECK-NEXT: ret void ; entry: @@ -88,25 +86,23 @@ ret void } +; There are potential ouptuts in this sections, but we do not extract sections +; with outputs right now, since they cannot be consolidated. define void @extract_outs2() #0 { ; CHECK-LABEL: @extract_outs2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @extract_outs2.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) -; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 -; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @extract_outs2.outlined.2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: call void @extract_outs2.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]]) ; CHECK-NEXT: ret void ; entry: Index: llvm/test/Transforms/IROutliner/illegal-assumes.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -7,19 +7,15 @@ define void @outline_assumes() { ; CHECK-LABEL: @outline_assumes( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outline_assumes.outlined.5(i1* [[D]], i1* [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] +; CHECK-NEXT: store i1 true, i1* [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, i1* [[D]], align 1 +; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL]], [[DL]] ; CHECK-NEXT: call void @outline_assumes.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) ; CHECK-NEXT: call void @outline_assumes.outlined.1(i32* [[A]], i32* [[B]], i32* [[C]]) ; CHECK-NEXT: ret void ; @@ -44,18 +40,14 @@ define void @outline_assumes2() { ; CHECK-LABEL: @outline_assumes2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outline_assumes2.outlined.6(i1* [[D]], i1* [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: store i1 false, i1* [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, i1* [[D]], align 1 ; CHECK-NEXT: call void @outline_assumes2.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) ; CHECK-NEXT: call void @outline_assumes2.outlined.2(i32* [[A]], i32* [[B]], i32* [[C]]) ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/IROutliner/illegal-memcpy.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memcpy.ll +++ llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -9,22 +9,12 @@ define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function1.outlined.1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function1.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s @@ -38,22 +28,12 @@ define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function2.outlined.2(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function2.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s Index: llvm/test/Transforms/IROutliner/illegal-memmove.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -9,22 +9,12 @@ define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function1.outlined.1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function1.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s @@ -38,22 +28,12 @@ define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function2.outlined.2(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function2.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s Index: llvm/test/Transforms/IROutliner/illegal-vaarg.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -11,20 +11,17 @@ define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { ; CHECK-LABEL: @func1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 ; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @func1.outlined(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) -; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) -; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1]]) ; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 ; CHECK-NEXT: ret i32 [[TMP]] @@ -49,20 +46,17 @@ define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind { ; CHECK-LABEL: @func2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 ; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @func2.outlined(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) -; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) -; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1]]) ; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 ; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8* ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 Index: llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at instances of constants in the different regions. If there +; is a register in the same place as a constant in a similar region of code, we +; do not outline those regions. + +; The first function tests that we do not outline with the register is +; seen first, and the second function checks that we do not outline when the +; constant is seen first. + +define void @function_registers_first(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: @function_registers_first( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[TMP0:%.*]], i32* [[A]], align 4 +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[B]], align 4 +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[C]], align 4 +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 %0, i32* %a, align 4 + store i32 %1, i32* %b, align 4 + store i32 %2, i32* %c, align 4 + ret void +next: + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + ret void +} + +define void @function_with_constants_first() { +; CHECK-LABEL: @function_with_constants_first( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 2, [[AL]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[BL]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 4, [[CL]] +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[AL]] +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], [[BL]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP2]], [[CL]] +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = add i32 2, %al + %1 = add i32 3, %bl + %2 = add i32 4, %cl + ret void +next: + %3 = add i32 %0, %al + %4 = add i32 %1, %bl + %5 = add i32 %2, %cl + ret void +} Index: llvm/test/Transforms/IROutliner/outlining-different-constants.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-different-constants.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at the constants in the regions, and if it they are the +; differents it does not outline them as they cannot be consolidated into the +; the same function. + +define void @outline_constants1() { +; CHECK-LABEL: @outline_constants1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 3, i32* [[A]], align 4 +; CHECK-NEXT: store i32 4, i32* [[B]], align 4 +; CHECK-NEXT: store i32 5, i32* [[C]], align 4 +; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 3, i32* %a, align 4 + store i32 4, i32* %b, align 4 + store i32 5, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_constants2() { +; CHECK-LABEL: @outline_constants2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 Index: llvm/test/Transforms/IROutliner/outlining-different-globals.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-different-globals.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at the globals in the regions, and makes sure they are not +; outlined if they are different values. + +@global1 = global i32 1, align 4 +@global2 = global i32 2, align 4 +@global3 = global i32 3, align 4 +@global4 = global i32 4, align 4 + +define void @outline_globals1() { +; CHECK-LABEL: @outline_globals1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @global1, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global2, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* @global1 + %1 = load i32, i32* @global2 + %2 = add i32 %0, %1 + ret void +} + +define void @outline_globals2() { +; CHECK-LABEL: @outline_globals2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @global3, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global4, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* @global3 + %1 = load i32, i32* @global4 + %2 = add i32 %0, %1 + ret void +} Index: llvm/test/Transforms/IROutliner/outlining-different-structure.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-different-structure.ll +++ llvm/test/Transforms/IROutliner/outlining-different-structure.ll @@ -60,9 +60,3 @@ ; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 - -; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) -; CHECK: entry_to_outline: -; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 -; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 -; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 Index: llvm/test/Transforms/IROutliner/outlining-same-constants.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-same-constants.ll +++ llvm/test/Transforms/IROutliner/outlining-same-constants.ll @@ -56,12 +56,3 @@ ; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 - -; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) -; CHECK: entry_to_outline: -; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 -; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 -; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 -; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 -; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 -; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4