Index: llvm/include/llvm/Transforms/IPO/IROutliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/IROutliner.h +++ llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -151,6 +151,14 @@ pruneIncompatibleRegions(std::vector &CandidateVec, OutlinableGroup &CurrentGroup); + /// Identify the needed extracted inputs in a section, and add to the overall + /// function if needed. + /// + /// \param [in] M - The module to outline from. + /// \param [in,out] Region - The region to be extracted/ + /// \returns true is the region can be extraced, and false if not. + bool findAddInputsOutputs(Module &M, OutlinableRegion &Region); + /// Extract \p Region into its own function. /// /// \param [in] Region - The region to be extracted into its own function. Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -37,6 +37,15 @@ struct OutlinableGroup { /// The sections that could be outlined std::vector Regions; + + /// For the \ref Regions, we look at every Value. If it is a constant, + /// we check whether it is the same in Region. + /// + /// \param [in,out] NotSame contains the global value numbers where the + /// constant is not always the same, and must be passed in as an argument. + /// \returns false if there is an inconsistency that would prevent the + /// section from being outlined, and true if it can be outlined. + bool findSameConstants(DenseSet &NotSame); }; /// Move the contents of \p SourceBB to before the last instruction of \p @@ -140,6 +149,176 @@ CandidateSplit = false; } +/// Find whether \p V matches the Constants previously found for the \p GVN. +/// +/// \param V - The value to check for consistency. +/// \param GVN - The global value number assigned to \p V. +/// \param GVNToConstant - The mapping of global value number to Constants. +/// \returns true if the Value matches the Constant mapped to by V and false if +/// it \p V is a Constant but does not match. +bool constantMatches(Constant *CST, unsigned GVN, + DenseMap &GVNToConstant) { + // Holds a mapping from a global value number to a Constant. + DenseMap::iterator GVNToConstantIt; + bool Inserted; + + // If we have a constant, try to make a new entry in the GVNToConstant. + std::tie(GVNToConstantIt, Inserted) = + GVNToConstant.insert(std::make_pair(GVN, CST)); + // If it was found and is not equal, it is not the same. We do not + // handle this case yet, and exit early. + if (Inserted || (GVNToConstantIt->second == CST)) return true; + + return false; +} + +/// Find whether \p Region matches the global value numbering to Constant mapping +/// found so far. +/// +/// \param Region - The OutlinableRegion we are checking for constants +/// \param NotSame - The set of global value numbers that do not have the same +/// constant in each region. +/// \returns true if all Constants are the same in every use of a Constant in \p +/// Region and false if not +static bool +collectRegionsConstants(OutlinableRegion &Region, + DenseMap &GVNToConstant, + DenseSet &NotSame) { + IRSimilarityCandidate &IRSC = *Region.Candidate; + for (IRInstructionData &ID : IRSC) { + + // Iterate over the operands in an instruction. If the global value number, + // assigned by the IRSimilarityCandidate, has been seen before, we check if + // the the number has been found to be not the same value in each instance. + for (Value *V : ID.OperVals) { + unsigned GVN = *IRSC.getGVN(V); + + // If this global value has been found to not be the same, it could have + // just been a register, check that it is not a constant value. + if (NotSame.find(GVN) != NotSame.end()) { + if (isa(V)) + return false; + continue; + } + + // If it has been the same so far, we check the value for if the + // associated Constant value match the previous instances of the same + // global value number. If the global value does not map to a Constant, + // it is considered to not be the same value. + if (Constant *C = dyn_cast(V)) { + if (!constantMatches(C, GVN, GVNToConstant)) + return false; + continue; + } + + // While this value is a register, it might not have been previously, + // make sure we don't already have a constant mapped to this global value + // number. + if (GVNToConstant.find(GVN) != GVNToConstant.end()) + return false; + + NotSame.insert(GVN); + } + } + + return true; +} + +bool OutlinableGroup::findSameConstants(DenseSet &NotSame) { + DenseMap GVNToConstant; + + return all_of(Regions, [&GVNToConstant, &NotSame](OutlinableRegion *Region) { + return collectRegionsConstants(*Region, GVNToConstant, NotSame); + }); +} + +/// Find the GVN for the inputs that have been found by the CodeExtractor, +/// excluding the ones that will be removed by llvm.assumes as these will be +/// removed by the CodeExtractor. +/// +/// \param [in] IRSC - The IRSimilarityCandidate containing the region we are +/// analyzing. +/// \param [in] CurrentInputs - The set of inputs found by the +/// CodeExtractor. +/// \param [out] CurrentInputNumbers - The global value numbers for the extracted +/// arguments. +static void mapInputsToGVNs(IRSimilarityCandidate &IRSC, + SetVector &CurrentInputs, + std::vector &EndInputNumbers) { + // Get the global value number for each input. + for (Value *Input : CurrentInputs) { + assert(Input && "Have a nullptr as an input"); + assert(IRSC.getGVN(Input).hasValue() && + "Could not find a numbering for the given input"); + EndInputNumbers.push_back(IRSC.getGVN(Input).getValue()); + } +} + +/// Find the input GVNs and the output values for a region of Instructions. +/// Using the code extractor, we collect the inputs to the extracted function. +/// +/// The \p Region can be identifed as needing to be ignored in this function. +/// It should be checked whether it should be ignored after a call to this +/// function. +/// +/// \param [in,out] Region - The region of code to be analyzed. +/// \param [out] Inputs - The global value numbers for the extracted arguments. +/// \param [out] ArgInputs - The values of the inputs to the extracted function. +/// \returns true is the region can be extraced, and false if not. +static bool getCodeExtractorArguments(OutlinableRegion &Region, + std::vector &InputGVNs, + SetVector &ArgInputs) { + IRSimilarityCandidate &IRSC = *Region.Candidate; + + // OverallInputs are the inputs to the region found by the CodeExtractor, + // SinkCands and HoistCands are used by the CodeExtractor to find sunken + // allocas of values whose lifetimes are contained completely within the + // outlined region. Outputs are values used outside of the outlined region + // found by the CodeExtractor. + SetVector OverallInputs, SinkCands, HoistCands, Outputs; + + // Use the code extractor to get the inputs and outputs, without sunken + // allocas or removing llvm.assumes. + CodeExtractor *CE = Region.CE; + CE->findInputsOutputs(OverallInputs, Outputs, SinkCands); + assert(Region.StartBB && "Region must have a start BasicBlock!"); + Function *OrigF = Region.StartBB->getParent(); + CodeExtractorAnalysisCache CEAC(*OrigF); + + // The region may be ineligible due to VarArgs in the parent function. In this + // case we ignore the region. + if (!CE->isEligible()) + return false; + + // Find if any values are going to be sunk into the function when extracted + BasicBlock *Dummy = nullptr; + CE->findAllocas(CEAC, SinkCands, HoistCands, Dummy); + CE->findInputsOutputs(ArgInputs, Outputs, SinkCands); + + // TODO: Support regions with output values. Outputs add an extra layer of + // resolution that adds too much complexity at this stage. + + // TODO: Support regions with sunken allocas: values whose lifetimes are + // contained completely within the outlined region. These are not guaranteed + // to be the same in every region, so we must elevate them all to arguments + // when they appear. If these values are not equal, it means there is some + // Input in OverallInputs that was removed for ArgInputs. + if (Outputs.size() > 0 || ArgInputs.size() != OverallInputs.size()) + return false; + + mapInputsToGVNs(IRSC, OverallInputs, InputGVNs); + + return true; +} + +bool IROutliner::findAddInputsOutputs( + Module &M, OutlinableRegion &Region) { + std::vector Inputs; + SetVector ArgInputs; + + return getCodeExtractorArguments(Region, Inputs, ArgInputs); +} + void IROutliner::pruneIncompatibleRegions( std::vector &CandidateVec, OutlinableGroup &CurrentGroup) { @@ -282,7 +461,16 @@ if (CurrentGroup.Regions.size() < 2) continue; - // Create a CodeExtractor for each outlinable region. + // Determine if there are any values that are the same constant throughout + // each section in the set. + DenseSet NotSame; + if (!CurrentGroup.findSameConstants(NotSame)) + continue; + + // Create a CodeExtractor for each outlinable region. Identify inputs and + // outputs for each section using the code extractor and create the argument + // types for the Aggregate Outlining Function. + std::vector OutlinableRegions; for (OutlinableRegion *OS : CurrentGroup.Regions) { // Break the outlinable region out of its parent BasicBlock into its own // BasicBlocks (see function implementation). @@ -291,11 +479,18 @@ OS->CE = new (CodeExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, false, "outlined"); + // We can only outline sections that can be extracted by the CodeExtractor + // and do not require extra handling of the arguments, such as requiring + // extra arguments for constants, or different numbers of outputs. + if (findAddInputsOutputs(M, *OS)) + OutlinableRegions.push_back(OS); + else + OS->reattachCandidate(); } - // Create functions out of all the sections, and mark them as outlined + // Create functions out of all the sections, and mark them as outlined. std::vector OutlinedRegions; - for (OutlinableRegion *OS : CurrentGroup.Regions) { + for (OutlinableRegion *OS : OutlinableRegions) { OutlinedFunctionNum++; bool FunctionOutlined = extractSection(*OS); if (FunctionOutlined) { Index: llvm/test/Transforms/IROutliner/extraction.ll =================================================================== --- llvm/test/Transforms/IROutliner/extraction.ll +++ llvm/test/Transforms/IROutliner/extraction.ll @@ -48,26 +48,24 @@ ret void } +; There are potential ouptuts in this sections, but we do not extract sections +; with outputs right now, since they cannot be consolidated. define void @extract_outs1() #0 { ; CHECK-LABEL: @extract_outs1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @extract_outs1.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) -; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 -; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4 -; CHECK-NEXT: call void @extract_outs1.outlined.1(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: call void @extract_outs1.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]]) ; CHECK-NEXT: ret void ; entry: @@ -88,25 +86,23 @@ ret void } +; There are potential ouptuts in this sections, but we do not extract sections +; with outputs right now, since they cannot be consolidated. define void @extract_outs2() #0 { ; CHECK-LABEL: @extract_outs2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @extract_outs2.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]]) -; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4 -; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: call void @extract_outs2.outlined.2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]]) +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: store i32 [[ADD]], i32* [[OUTPUT]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[OUTPUT]], align 4 +; CHECK-NEXT: call void @extract_outs2.outlined(i32 [[TMP2]], i32 [[ADD]], i32* [[RESULT]]) ; CHECK-NEXT: ret void ; entry: Index: llvm/test/Transforms/IROutliner/illegal-assumes.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -7,19 +7,15 @@ define void @outline_assumes() { ; CHECK-LABEL: @outline_assumes( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outline_assumes.outlined.5(i1* [[D]], i1* [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] +; CHECK-NEXT: store i1 true, i1* [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, i1* [[D]], align 1 +; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL]], [[DL]] ; CHECK-NEXT: call void @outline_assumes.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) ; CHECK-NEXT: call void @outline_assumes.outlined.1(i32* [[A]], i32* [[B]], i32* [[C]]) ; CHECK-NEXT: ret void ; @@ -44,18 +40,14 @@ define void @outline_assumes2() { ; CHECK-LABEL: @outline_assumes2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @outline_assumes2.outlined.6(i1* [[D]], i1* [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: store i1 false, i1* [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, i1* [[D]], align 1 ; CHECK-NEXT: call void @outline_assumes2.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) ; CHECK-NEXT: call void @outline_assumes2.outlined.2(i32* [[A]], i32* [[B]], i32* [[C]]) ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/IROutliner/illegal-memcpy.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memcpy.ll +++ llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -9,22 +9,12 @@ define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function1.outlined.1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function1.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s @@ -38,22 +28,12 @@ define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function2.outlined.2(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function2.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s Index: llvm/test/Transforms/IROutliner/illegal-memmove.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -9,22 +9,12 @@ define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function1.outlined.1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function1.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s @@ -38,22 +28,12 @@ define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { ; CHECK-LABEL: @function2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) -; CHECK-NEXT: call void @function2.outlined.2(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) -; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 -; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[D:%.*]], align 1 ; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: call void @function2.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) -; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) -; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[S]], align 1 +; CHECK-NEXT: ret i8 [[RET]] ; entry: %a = load i8, i8* %s Index: llvm/test/Transforms/IROutliner/illegal-vaarg.ll =================================================================== --- llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -11,20 +11,17 @@ define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { ; CHECK-LABEL: @func1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 ; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @func1.outlined(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) -; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) -; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1]]) ; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 ; CHECK-NEXT: ret i32 [[TMP]] @@ -49,20 +46,17 @@ define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind { ; CHECK-LABEL: @func2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 ; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 ; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @func2.outlined(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) -; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) ; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 -; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) -; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1]]) ; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 ; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8* ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 Index: llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-constants-vs-registers.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at instances of constants in the different regions. If there +; is a register in the same place as a constant in a similar region of code, we +; do not outline those regions. + +; The first function tests that we do not outline with the register is +; seen first, and the second function checks that we do not outline when the +; constant is seen first. + +define void @function_registers_first(i32 %0, i32 %1, i32 %2) { +; CHECK-LABEL: @function_registers_first( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[TMP0:%.*]], i32* [[A]], align 4 +; CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[B]], align 4 +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[C]], align 4 +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 %0, i32* %a, align 4 + store i32 %1, i32* %b, align 4 + store i32 %2, i32* %c, align 4 + ret void +next: + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + ret void +} + +define void @function_with_constants_first() { +; CHECK-LABEL: @function_with_constants_first( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 2, [[AL]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[BL]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 4, [[CL]] +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], [[AL]] +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], [[BL]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP2]], [[CL]] +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + %0 = add i32 2, %al + %1 = add i32 3, %bl + %2 = add i32 4, %cl + ret void +next: + %3 = add i32 %0, %al + %4 = add i32 %1, %bl + %5 = add i32 %2, %cl + ret void +} Index: llvm/test/Transforms/IROutliner/outlining-different-constants.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-different-constants.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at the constants in the regions, and if it they are the +; differents it does not outline them as they cannot be consolidated into the +; the same function. + +define void @outline_constants1() { +; CHECK-LABEL: @outline_constants1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 3, i32* [[A]], align 4 +; CHECK-NEXT: store i32 4, i32* [[B]], align 4 +; CHECK-NEXT: store i32 5, i32* [[C]], align 4 +; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 3, i32* %a, align 4 + store i32 4, i32* %b, align 4 + store i32 5, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_constants2() { +; CHECK-LABEL: @outline_constants2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 2, i32* [[A]], align 4 +; CHECK-NEXT: store i32 3, i32* [[B]], align 4 +; CHECK-NEXT: store i32 4, i32* [[C]], align 4 +; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 Index: llvm/test/Transforms/IROutliner/outlining-different-globals.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/outlining-different-globals.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks at the globals in the regions, and makes sure they are not +; outlined if they are different values. + +@global1 = global i32 1, align 4 +@global2 = global i32 2, align 4 +@global3 = global i32 3, align 4 +@global4 = global i32 4, align 4 + +define void @outline_globals1() { +; CHECK-LABEL: @outline_globals1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @global1, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global2, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* @global1 + %1 = load i32, i32* @global2 + %2 = add i32 %0, %1 + ret void +} + +define void @outline_globals2() { +; CHECK-LABEL: @outline_globals2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @global3, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global4, align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret void +; +entry: + %0 = load i32, i32* @global3 + %1 = load i32, i32* @global4 + %2 = add i32 %0, %1 + ret void +} Index: llvm/test/Transforms/IROutliner/outlining-different-structure.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-different-structure.ll +++ llvm/test/Transforms/IROutliner/outlining-different-structure.ll @@ -60,9 +60,3 @@ ; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 - -; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) -; CHECK: entry_to_outline: -; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 -; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 -; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 Index: llvm/test/Transforms/IROutliner/outlining-same-constants.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-same-constants.ll +++ llvm/test/Transforms/IROutliner/outlining-same-constants.ll @@ -56,12 +56,3 @@ ; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 ; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 - -; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) -; CHECK: entry_to_outline: -; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 -; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 -; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 -; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 -; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 -; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4