Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -47,18 +47,13 @@ /// ///@{ - /// @see The ScalarMap and PHIOpMap member. - using ScalarAllocaMapTy = DenseMap, AssertingVH>; - /// @brief Simple vector of instructions to store escape users. using EscapeUserVectorTy = SmallVector; /// @brief Map type to resolve escaping users for scalar instructions. /// /// @see The EscapeMap member. - using EscapeUsersAllocaMapTy = - DenseMap, EscapeUserVectorTy>>; + using EscapeUsersAllocaMapTy = DenseMap; ///@} @@ -70,8 +65,8 @@ /// @param LI The loop info for the current function /// @param SE The scalar evolution info for the current function /// @param DT The dominator tree of this function. - /// @param ScalarMap Map from scalars to their demoted location. - /// @param PHIOpMap Map from PHIs to their demoted operand location. + /// @param LCPHIs Map from loop carried PHI nodes to "floating" copies. + /// @param LoopDepth The loop depth of the block beeing copied. /// @param EscapeMap Map from scalars to their escape users and locations. /// @param GlobalMap A mapping from llvm::Values used in the original scop /// region to a new set of llvm::Values. Each reference to @@ -79,9 +74,9 @@ /// with the new value it is mapped to. /// @param ExprBuilder An expression builder to generate new access functions. BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE, - DominatorTree &DT, ScalarAllocaMapTy &ScalarMap, - ScalarAllocaMapTy &PHIOpMap, EscapeUsersAllocaMapTy &EscapeMap, - ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder = nullptr); + DominatorTree &DT, ValueMapT &LCPHIs, int &LoopDepth, + EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap, + IslExprBuilder *ExprBuilder = nullptr); /// @brief Copy the basic block. /// @@ -89,66 +84,22 @@ /// with references to new values, as defined by GlobalMap. /// /// @param Stmt The block statement to code generate. + /// @param ScalarMap The scalar mappings that hold when @p Stmt is entered. /// @param LTS A map from old loops to new induction variables as /// SCEVs. /// @param NewAccesses A map from memory access ids to new ast expressions, /// which may contain new access expressions for certain /// memory accesses. - void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, + void copyStmt(ScopStmt &Stmt, ValueMapT &ScalarMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); - /// @brief Return the scalar alloca for @p ScalarBase - /// - /// If no alloca was mapped to @p ScalarBase a new one is created. - /// - /// @param ScalarBase The demoted scalar value. - /// @param GlobalMap A mapping from Allocas to other memory locations that - /// can be used to replace the original alloca locations - /// with new memory locations, e.g. when passing values to - /// subfunctions while offloading parallel sections. - /// - /// @returns The alloca for @p ScalarBase or a replacement value taken from - /// GlobalMap. - Value *getOrCreateScalarAlloca(Value *ScalarBase); - - /// @brief Return the PHi-node alloca for @p ScalarBase - /// - /// If no alloca was mapped to @p ScalarBase a new one is created. - /// - /// @param ScalarBase The demoted scalar value. - /// - /// @returns The alloca for @p ScalarBase or a replacement value taken from - /// GlobalMap. - Value *getOrCreatePHIAlloca(Value *ScalarBase); - - /// @brief Return the alloca for @p Access - /// - /// If no alloca was mapped for @p Access a new one is created. - /// - /// @param Access The memory access for which to generate the alloca - /// - /// @returns The alloca for @p Access or a replacement value taken from - /// GlobalMap. - Value *getOrCreateAlloca(MemoryAccess &Access); - - /// @brief Return the alloca for @p Array - /// - /// If no alloca was mapped for @p Array a new one is created. - /// - /// @param Array The array for which to generate the alloca - /// - /// @returns The alloca for @p Array or a replacement value taken from - /// GlobalMap. - Value *getOrCreateAlloca(const ScopArrayInfo *Array); - /// @brief Finalize the code generation for the SCoP @p S. /// - /// This will initialize and finalize the scalar variables we demoted during - /// the code generation. + /// @param ScalarMap The scalar mappings that hold after @p S. /// - /// @see createScalarInitialization(Scop &) - /// @see createScalarFinalization(Region &) - void finalizeSCoP(Scop &S); + /// @see createScalarFinalization() + /// @see createExitPHINodeMerges () + void finalizeSCoP(Scop &S, ValueMapT &ScalarMap); /// @brief An empty destructor virtual ~BlockGenerator(){}; @@ -167,134 +118,11 @@ /// @brief The entry block of the current function. BasicBlock *EntryBB; - /// @brief Maps to resolve scalar dependences for PHI operands and scalars. - /// - /// When translating code that contains scalar dependences as they result from - /// inter-block scalar dependences (including the use of data carrying - /// PHI nodes), we do not directly regenerate in-register SSA code, but - /// instead allocate some stack memory through which these scalar values are - /// passed. Only a later pass of -mem2reg will then (re)introduce in-register - /// computations. - /// - /// To keep track of the memory location(s) used to store the data computed by - /// a given SSA instruction, we use the maps 'ScalarMap' and 'PHIOpMap'. Each - /// maps a given scalar value to a junk of stack allocated memory. - /// - /// 'ScalarMap' is used for normal scalar dependences that go from a scalar - /// definition to its use. Such dependences are lowered by directly writing - /// the value an instruction computes into the corresponding chunk of memory - /// and reading it back from this chunk of memory right before every use of - /// this original scalar value. The memory locations in 'ScalarMap' end with - /// '.s2a'. - /// - /// 'PHIOpMap' is used to model PHI nodes. For each PHI nodes we introduce, - /// besides the memory in 'ScalarMap', a second chunk of memory into which we - /// write at the end of each basic block preceeding the PHI instruction the - /// value passed through this basic block. At the place where the PHI node is - /// executed, we replace the PHI node with a load from the corresponding - /// memory location in the 'PHIOpMap' table. The memory locations in - /// 'PHIOpMap' end with '.phiops'. - /// - /// The ScopArrayInfo objects of accesses that belong to a PHI node may have - /// identical base pointers, even though they refer to two different memory - /// locations, the normal '.s2a' locations and the special '.phiops' - /// locations. For historic reasons we keep such accesses in two maps - /// 'ScalarMap' and 'PHIOpMap', index by the BasePointer. An alternative - /// implemenation, could use a single map that uses the ScopArrayInfo object - /// as index. - /// - /// Example: - /// - /// Input C Code - /// ============ - /// - /// S1: x1 = ... - /// for (i=0...N) { - /// S2: x2 = phi(x1, add) - /// S3: add = x2 + 42; - /// } - /// S4: print(x1) - /// print(x2) - /// print(add) - /// - /// - /// Unmodified IR IR After expansion - /// ============= ================== - /// - /// S1: x1 = ... S1: x1 = ... - /// x1.s2a = s1 - /// x2.phiops = s1 - /// | | - /// | <--<--<--<--< | <--<--<--<--< - /// | / \ | / \ . - /// V V \ V V \ . - /// S2: x2 = phi (x1, add) | S2: x2 = x2.phiops | - /// | x2.s2a = x2 | - /// | | - /// S3: add = x2 + 42 | S3: add = x2 + 42 | - /// | add.s2a = add | - /// | x2.phiops = add | - /// | \ / | \ / - /// | \ / | \ / - /// | >-->-->-->--> | >-->-->-->--> - /// V V - /// - /// S4: x1 = x1.s2a - /// S4: ... = x1 ... = x1 - /// x2 = x2.s2a - /// ... = x2 ... = x2 - /// add = add.s2a - /// ... = add ... = add - /// - /// ScalarMap = { x1 -> x1.s2a, x2 -> x2.s2a, add -> add.s2a } - /// PHIOpMap = { x2 -> x2.phiops } - /// - /// - /// ??? Why does a PHI-node require two memory chunks ??? - /// - /// One may wonder why a PHI node requires two memory chunks and not just - /// all data is stored in a single location. The following example tries - /// to store all data in .s2a and drops the .phiops location: - /// - /// S1: x1 = ... - /// x1.s2a = s1 - /// x2.s2a = s1 // use .s2a instead of .phiops - /// | - /// | <--<--<--<--< - /// | / \ . - /// V V \ . - /// S2: x2 = x2.s2a | // value is same as above, but read - /// | // from .s2a - /// | - /// x2.s2a = x2 | // store into .s2a as normal - /// | - /// S3: add = x2 + 42 | - /// add.s2a = add | - /// x2.s2a = add | // use s2a instead of .phiops - /// | \ / // !!! This is wrong, as x2.s2a now - /// | >-->-->-->--> // contains add instead of x2. - /// V - /// - /// S4: x1 = x1.s2a - /// ... = x1 - /// x2 = x2.s2a // !!! We now read 'add' instead of - /// ... = x2 // 'x2' - /// add = add.s2a - /// ... = add - /// - /// As visible in the example, the SSA value of the PHI node may still be - /// needed _after_ the basic block, which could conceptually branch to the - /// PHI node, has been run and has overwritten the PHI's old value. Hence, a - /// single memory location is not enough to code-generate a PHI node. - /// - ///{ - /// - /// @brief Memory locations used for the special PHI node modeling. - ScalarAllocaMapTy &PHIOpMap; - - /// @brief Memory locations used to model scalar dependences. - ScalarAllocaMapTy &ScalarMap; - ///} + /// @brief Map from loop carried PHI nodes to "floating" copies. + ValueMapT &LCPHIs; + + /// @brief The loop depth of the currently copied block in the new schedule. + int &LoopDepth; /// @brief Map from instructions to their escape users as well as the alloca. EscapeUsersAllocaMapTy &EscapeMap; @@ -304,6 +132,23 @@ /// code generation. ValueMapT &GlobalMap; + /// @brief Copy interesting mappings from @p BBmap to @p ScalarMap. + /// + /// @param Stmt The statement to code generate. + /// @param BB The basic block to code generate. + /// @param ScalarMap Will be filled with mappings that hold after @p BB. + /// @param BBMap A mapping from old values to their new values + /// (for values recalculated within this basic block). + /// @param LTS A map from old loops to new induction variables as SCEVs. + /// + /// Not all copied instructions need to be merged (using PHI nodes) at join + /// points but only those that might be used later on. To this end this + /// function copies the mapping from @p BBMap to the @p ScalarMap for which a + /// scalar memory access in @p Stmt exists. + void generateScalarMappings(ScopStmt &Stmt, BasicBlock *BB, + ValueMapT &ScalarMap, ValueMapT &BBMap, + LoopToScevMapT <S); + /// @brief Split @p BB to create a new one we can use to clone @p BB in. BasicBlock *splitBB(BasicBlock *BB); @@ -339,40 +184,6 @@ ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); - /// @brief Return the alloca for @p ScalarBase in @p Map. - /// - /// If no alloca was mapped to @p ScalarBase in @p Map a new one is created - /// and named after @p ScalarBase with the suffix @p NameExt. - /// - /// @param ScalarBase The demoted scalar value. - /// @param Map The map we should look for a mapped alloca value. - /// @param NameExt The suffix we add to the name of a new created alloca. - /// - /// @returns The alloca for @p ScalarBase. - Value *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map, - const char *NameExt); - - /// @brief Generate reload of scalars demoted to memory and needed by @p Stmt. - /// - /// @param Stmt The statement we generate code for. - /// @param BBMap A mapping from old values to their new values in this block. - void generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap); - - /// @brief Generate the scalar stores for the given statement. - /// - /// After the statement @p Stmt was copied all inner-SCoP scalar dependences - /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to - /// be demoted to memory. - /// - /// @param Stmt The statement we generate code for. - /// @param LTS A mapping from loops virtual canonical induction - /// variable to their new values - /// (for values recalculated in the new ScoP, but not - /// within this basic block) - /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, - ValueMapT &BBMap); - /// @brief Handle users of @p Inst outside the SCoP. /// /// @param R The current SCoP region. @@ -391,11 +202,6 @@ /// @param S The scop for which to find the outside users. void findOutsideUsers(Scop &S); - /// @brief Initialize the memory of demoted scalars. - /// - /// @param S The scop for which to generate the scalar initializers. - void createScalarInitialization(Scop &S); - /// @brief Create exit PHI node merges for PHI nodes with more than two edges /// from inside the scop. /// @@ -405,19 +211,14 @@ /// PHI node from inside the optimized version of the scop. To do so ScopInfo /// models the possible incoming values as write accesses of the ScopStmts. /// - /// This function creates corresponding code to reload the computed outgoing - /// value from the stack slot it has been stored into and to pass it on to the - /// PHI node in the original exit block. - /// - /// @param S The scop for which to generate the exiting PHI nodes. - void createExitPHINodeMerges(Scop &S); + /// @param S The scop for which to generate the exiting PHI nodes. + /// @param ScalarMap The scalar mappings that hold after @p S. + void createExitPHINodeMerges(Scop &S, ValueMapT &ScalarMap); - /// @brief Promote the values of demoted scalars after the SCoP. + /// @brief Merge scalars escaping the SCoP with their original counterpart. /// - /// If a scalar value was used outside the SCoP we need to promote the value - /// stored in the memory cell allocated for that scalar and combine it with - /// the original value in the non-optimized SCoP. - void createScalarFinalization(Region &R); + /// @param ScalarMap The scalar mappings that hold after @p S. + void createScalarFinalization(Region &R, ValueMapT &ScalarMap); /// @brief Try to synthesize a new value /// @@ -468,7 +269,7 @@ /// o The new value, if available. /// o NULL, if no value is found. Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, Loop *L) const; + LoopToScevMapT <S, Loop *L); void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, LoopToScevMapT <S); @@ -502,16 +303,25 @@ LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); + /// @brief Create a loop carried PHI for the value @p V. + /// + /// @param V A (possibly) loop carried value. + /// @param BBMap A mapping from old values to their new values + /// (for values recalculated within this basic block). + /// + /// @returns The new loop carried PHI for @p V. + Value *createLoopCarriedPHI(Value *V, ValueMapT &BBMap); + /// @brief Copy a single PHI instruction. /// - /// The implementation in the BlockGenerator is trivial, however it allows - /// subclasses to handle PHIs different. + /// @param Stmt The statement to code generate. + /// @param PHI The PHI that should be copied. + /// @param BBMap A mapping from old values to their new values + /// @param LTS A map from old loops to new induction variables as SCEVs. /// - /// @returns The nullptr as the BlockGenerator does not copy PHIs. - virtual Value *copyPHIInstruction(ScopStmt &, PHINode *, ValueMapT &, - LoopToScevMapT &) { - return nullptr; - } + /// @returns A nullptr as the BlockGenerator does not copy PHIs (in-place). + virtual Value *copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI, + ValueMapT &BBMap, LoopToScevMapT <S); /// @brief Copy a single Instruction. /// @@ -532,10 +342,15 @@ /// @param NewAccesses A map from memory access ids to new ast expressions, /// which may contain new access expressions for certain /// memory accesses. + /// @param ForceCopy Flag to indicate a copy is supposed to be made (if + /// possible). If this flag is not set synthezisable + /// instructions will not be copied as they are generated + /// on demand. void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, - LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); + LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses, + bool ForceCopy = false); - /// @brief Helper to get the newest version of @p ScalarValue. + /// @brief Helper to determine if @p Inst can be synthezised in @p Stmt. /// /// @param ScalarValue The original value needed. /// @param R The current SCoP region. @@ -583,11 +398,11 @@ /// which may contain new access expressions for certain /// memory accesses. static void generate(BlockGenerator &BlockGen, ScopStmt &Stmt, - std::vector &VLTS, + ValueMapT &ScalarMap, std::vector &VLTS, __isl_keep isl_map *Schedule, __isl_keep isl_id_to_ast_expr *NewAccesses) { VectorBlockGenerator Generator(BlockGen, VLTS, Schedule); - Generator.copyStmt(Stmt, NewAccesses); + Generator.copyStmt(Stmt, ScalarMap, NewAccesses); } private: @@ -714,18 +529,6 @@ bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap); - /// @brief Generate vector loads for scalars. - /// - /// @param Stmt The scop statement for which to generate the loads. - /// @param VectorBlockMap A map that will be updated to relate the original - /// values with the newly generated vector loads. - void generateScalarVectorLoads(ScopStmt &Stmt, ValueMapT &VectorBlockMap); - - /// @brief Verify absence of scalar stores. - /// - /// @param Stmt The scop statement to check for scalar stores. - void verifyNoScalarStores(ScopStmt &Stmt); - /// @param NewAccesses A map from memory access ids to new ast expressions, /// which may contain new access expressions for certain /// memory accesses. @@ -736,7 +539,8 @@ /// @param NewAccesses A map from memory access ids to new ast expressions, /// which may contain new access expressions for certain /// memory accesses. - void copyStmt(ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses); + void copyStmt(ScopStmt &Stmt, ValueMapT &ScalarMap, + __isl_keep isl_id_to_ast_expr *NewAccesses); }; /// @brief Generator for new versions of polyhedral region statements. @@ -756,8 +560,9 @@ /// GlobalMap. /// /// @param Stmt The statement to code generate. + /// @param ScalarMap The scalar mappings that hold when @p Stmt is entered. /// @param LTS A map from old loops to new induction variables as SCEVs. - void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, + void copyStmt(ScopStmt &Stmt, ValueMapT &ScalarMap, LoopToScevMapT <S, __isl_keep isl_id_to_ast_expr *IdToAstExp); private: @@ -766,11 +571,17 @@ /// @brief The "BBMaps" for the whole region (one for each block). DenseMap RegionMaps; + DenseMap ScalarMaps; /// @brief Mapping to remember PHI nodes that still need incoming values. using PHINodePairTy = std::pair; DenseMap> IncompletePHINodeMap; + /// @brief Create merge PHI nodes in @p BB for predecessors inside @p R. + /// + /// @returns The PHI mappings are stored in @p MergeScalarMap. + void createMergePHIs(BasicBlock *BB, Region &R, ValueMapT &MergeScalarMap); + /// @brief Repair the dominance tree after we created a copy block for @p BB. /// /// @returns The immediate dominator in the DT for @p BBCopy if in the region. @@ -782,24 +593,12 @@ /// @param PHI The original PHI we copy. /// @param PHICopy The copy of @p PHI. /// @param IncomingBB An incoming block of @p PHI. + /// @param BBMap A mapping from old values to their new values /// @param LTS A map from old loops to new induction variables as /// SCEVs. void addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, PHINode *PHICopy, - BasicBlock *IncomingBB, LoopToScevMapT <S); - - /// @brief Generate the scalar stores for the given statement. - /// - /// After the statement @p Stmt was copied all inner-SCoP scalar dependences - /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to - /// be demoted to memory. - /// - /// @param Stmt The statement we generate code for. - /// @param LTS A mapping from loops virtual canonical induction variable to - /// their new values (for values recalculated in the new ScoP, - /// but not within this basic block) - /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, - ValueMapT &BBMAp) override; + BasicBlock *IncomingBB, ValueMapT &BBMap, + LoopToScevMapT <S); /// @brief Copy a single PHI instruction. /// Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -32,9 +32,9 @@ IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, Scop &S) - : S(S), Builder(Builder), Annotator(Annotator), + : S(S), Builder(Builder), Annotator(Annotator), LoopDepth(0), ExprBuilder(S, Builder, IDToValue, ValueMap, DL, SE, DT, LI), - BlockGen(Builder, LI, SE, DT, ScalarMap, PHIOpMap, EscapeMap, ValueMap, + BlockGen(Builder, LI, SE, DT, LCPHIs, LoopDepth, EscapeMap, ValueMap, &ExprBuilder), RegionGen(BlockGen), P(P), DL(DL), LI(LI), SE(SE), DT(DT) {} @@ -46,40 +46,41 @@ /// @brief Preload all memory loads that are invariant. bool preloadInvariantLoads(); + /// @brief Map PHIs in the SCoP entry block to their initial values. + void createPHIInitialization(); + /// @brief Finalize code generation for the SCoP @p S. /// /// @see BlockGenerator::finalizeSCoP(Scop &S) - void finalizeSCoP(Scop &S) { BlockGen.finalizeSCoP(S); } + void finalizeSCoP(Scop &S) { BlockGen.finalizeSCoP(S, ScalarMap); } IslExprBuilder &getExprBuilder() { return ExprBuilder; } - /// @brief Get the associated block generator. - /// - /// @return A referecne to the associated block generator. - BlockGenerator &getBlockGenerator() { return BlockGen; } - protected: Scop &S; PollyIRBuilder &Builder; ScopAnnotator &Annotator; - IslExprBuilder ExprBuilder; - - /// @brief Maps used by the block and region generator to demote scalars. + /// @brief Maps used to resolve inter-block scalar uses. /// ///@{ - /// @brief See BlockGenerator::ScalarMap. - BlockGenerator::ScalarAllocaMapTy ScalarMap; + /// @brief Map from loop carried PHI nodes to "floating" copies. + ValueMapT LCPHIs; - /// @brief See BlockGenerator::PhiOpMap. - BlockGenerator::ScalarAllocaMapTy PHIOpMap; + /// @brief Currently valid scalar mappings. + ValueMapT ScalarMap; /// @brief See BlockGenerator::EscapeMap. BlockGenerator::EscapeUsersAllocaMapTy EscapeMap; ///@} + /// @brief The current loop depth. + int LoopDepth; + + IslExprBuilder ExprBuilder; + /// @brief The generator used to copy a basic block. BlockGenerator BlockGen; Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -14,6 +14,8 @@ #ifndef POLLY_SUPPORT_IRHELPER_H #define POLLY_SUPPORT_IRHELPER_H +#include "polly/CodeGen/IRBuilder.h" + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/ValueHandle.h" @@ -164,5 +166,17 @@ /// otherwise return false. bool canSynthesize(const llvm::Value *V, const llvm::LoopInfo *LI, llvm::ScalarEvolution *SE, const llvm::Region *R); + +/// @brief Use @p Builder to create merge PHIs. +/// +/// @param Builder The IRBuilder to create the PHIs. +/// @param BBs The incoming blocks (ordered!). +/// @param Maps The scalar mappings in the incoming blocks (ordered!). +/// +/// @returns The merged mappings in @p MergeMap. +void createMergePHIs(PollyIRBuilder &Builder, + const llvm::ArrayRef &BBs, + const llvm::ArrayRef &Maps, + ValueMapT &MergeMap); } #endif Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -50,13 +50,12 @@ BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, - ScalarAllocaMapTy &ScalarMap, - ScalarAllocaMapTy &PHIOpMap, + ValueMapT &LCPHIs, int &LoopDepth, EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder) : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT), - EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), + EntryBB(nullptr), LCPHIs(LCPHIs), LoopDepth(LoopDepth), EscapeMap(EscapeMap), GlobalMap(GlobalMap) {} Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, @@ -93,7 +92,7 @@ } Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, Loop *L) const { + LoopToScevMapT <S, Loop *L) { // Constants that do not reference any named value can always remain // unchanged. Handle them early to avoid expensive map lookups. We do not take // the fast-path for external constants which are referenced through globals @@ -127,10 +126,26 @@ return Old; // A scop-constant value defined by an instruction executed outside the scop. - if (const Instruction *Inst = dyn_cast(Old)) + if (const Instruction *Inst = dyn_cast(Old)) { if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) return Old; + // If the value is a scalar read that has not yet been mapped to a new value + // we either (1) generate dead code that is not constraint by dependences or + // (2) we reversed the textual order of a scalar dependence in a loop. In + // the first case we can safely use undef, in the second we introduce a loop + // carried PHI node that will be later rewired correctly. + for (auto *MA : Stmt) { + if (MA->getAccessValue() != Old) + continue; + + if (!L) + return UndefValue::get(Old->getType()); + + return createLoopCarriedPHI(Old, BBMap); + } + } + // The scalar dependence is neither available nor SCEVCodegenable. llvm_unreachable("Unexpected scalar dependence in region!"); return nullptr; @@ -206,7 +221,7 @@ Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { - if (Value *PreloadLoad = GlobalMap.lookup(Load)) + if (Value *PreloadLoad = BBMap.lookup(Load)) return PreloadLoad; auto *Pointer = Load->getPointerOperand(); @@ -238,6 +253,30 @@ Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlignment()); } +Value *BlockGenerator::createLoopCarriedPHI(Value *V, ValueMapT &BBMap) { + auto *PHI = PHINode::Create(V->getType(), 2, V->getName() + ".polly.lc"); + + assert(LCPHIs.count(V) == 0); + LCPHIs[V] = PHI; + BBMap[V] = PHI; + + return PHI; +} + +Value *BlockGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI, + ValueMapT &BBMap, + LoopToScevMapT <S) { + if (!LI.isLoopHeader(PHI->getParent())) + return nullptr; + + if (LoopDepth == 0 || canSyntheziseInStmt(Stmt, PHI)) + return nullptr; + + createLoopCarriedPHI(PHI, BBMap); + + return nullptr; +} + bool BlockGenerator::canSyntheziseInStmt(ScopStmt &Stmt, Instruction *Inst) { Loop *L = getLoopForInst(Inst); return (Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) && @@ -246,14 +285,15 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, LoopToScevMapT <S, - isl_id_to_ast_expr *NewAccesses) { + isl_id_to_ast_expr *NewAccesses, + bool ForceCopy) { // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. if (Inst->isTerminator()) return; // Synthesizable statements will be generated on-demand. - if (canSyntheziseInStmt(Stmt, Inst)) + if (!ForceCopy && canSyntheziseInStmt(Stmt, Inst)) return; if (auto *Load = dyn_cast(Inst)) { @@ -282,15 +322,22 @@ copyInstScalar(Stmt, Inst, BBMap, LTS); } -void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, +void BlockGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &ScalarMap, + LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { assert(Stmt.isBlockStmt() && "Only block statements can be copied by the block generator"); - ValueMapT BBMap; + // Initialize the block intern mappings with all mapping that hold when this + // block is entered. + ValueMapT BBMap = ScalarMap; BasicBlock *BB = Stmt.getBasicBlock(); copyBB(Stmt, BB, BBMap, LTS, NewAccesses); + + // Copy scalar mappings from the BBMap back to the ScalarMap if they + // might be needed after this block. + generateScalarMappings(Stmt, BB, ScalarMap, BBMap, LTS); } BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) { @@ -305,13 +352,9 @@ isl_id_to_ast_expr *NewAccesses) { BasicBlock *CopyBB = splitBB(BB); Builder.SetInsertPoint(&CopyBB->front()); - generateScalarLoads(Stmt, BBMap); copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses); - // After a basic block was copied store all scalars that escape this block in - // their alloca. - generateScalarStores(Stmt, LTS, BBMap); return CopyBB; } @@ -324,48 +367,6 @@ copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); } -Value *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, - ScalarAllocaMapTy &Map, - const char *NameExt) { - // If no alloca was found create one and insert it in the entry block. - if (!Map.count(ScalarBase)) { - auto *Ty = ScalarBase->getType(); - auto NewAddr = new AllocaInst(Ty, ScalarBase->getName() + NameExt); - EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); - NewAddr->insertBefore(&*EntryBB->getFirstInsertionPt()); - Map[ScalarBase] = NewAddr; - } - - auto Addr = Map[ScalarBase]; - - if (GlobalMap.count(Addr)) - return GlobalMap[Addr]; - - return Addr; -} - -Value *BlockGenerator::getOrCreateAlloca(MemoryAccess &Access) { - if (Access.isPHIKind()) - return getOrCreatePHIAlloca(Access.getBaseAddr()); - else - return getOrCreateScalarAlloca(Access.getBaseAddr()); -} - -Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) { - if (Array->isPHIKind()) - return getOrCreatePHIAlloca(Array->getBasePtr()); - else - return getOrCreateScalarAlloca(Array->getBasePtr()); -} - -Value *BlockGenerator::getOrCreateScalarAlloca(Value *ScalarBase) { - return getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); -} - -Value *BlockGenerator::getOrCreatePHIAlloca(Value *ScalarBase) { - return getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); -} - void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, Value *Address) { // If there are escape users we get the alloca for this instruction and put it @@ -392,152 +393,66 @@ if (EscapeUsers.empty()) return; - // Get or create an escape alloca for this instruction. - auto *ScalarAddr = Address ? Address : getOrCreateScalarAlloca(Inst); - // Remember that this instruction has escape uses and the escape alloca. - EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); -} - -void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) { - for (MemoryAccess *MA : Stmt) { - if (MA->isArrayKind() || MA->isWrite()) - continue; - - auto *Address = getOrCreateAlloca(*MA); - BBMap[MA->getBaseAddr()] = - Builder.CreateLoad(Address, Address->getName() + ".reload"); - } -} - -Value *BlockGenerator::getNewScalarValue(Value *ScalarValue, const Region &R, - ScopStmt &Stmt, LoopToScevMapT <S, - ValueMapT &BBMap) { - // If the value we want to store is an instruction we might have demoted it - // in order to make it accessible here. In such a case a reload is - // necessary. If it is no instruction it will always be a value that - // dominates the current point and we can just use it. In total there are 4 - // options: - // (1) The value is no instruction ==> use the value. - // (2) The value is an instruction that was split out of the region prior to - // code generation ==> use the instruction as it dominates the region. - // (3) The value is an instruction: - // (a) The value was defined in the current block, thus a copy is in - // the BBMap ==> use the mapped value. - // (b) The value was defined in a previous block, thus we demoted it - // earlier ==> use the reloaded value. - Instruction *ScalarValueInst = dyn_cast(ScalarValue); - if (!ScalarValueInst) - return ScalarValue; - - if (!R.contains(ScalarValueInst)) { - if (Value *ScalarValueCopy = GlobalMap.lookup(ScalarValueInst)) - return /* Case (3a) */ ScalarValueCopy; - else - return /* Case 2 */ ScalarValue; - } - - if (Value *ScalarValueCopy = BBMap.lookup(ScalarValueInst)) - return /* Case (3a) */ ScalarValueCopy; - - if ((Stmt.isBlockStmt() && - Stmt.getBasicBlock() == ScalarValueInst->getParent()) || - (Stmt.isRegionStmt() && Stmt.getRegion()->contains(ScalarValueInst))) { - auto SynthesizedValue = trySynthesizeNewValue( - Stmt, ScalarValueInst, BBMap, LTS, getLoopForInst(ScalarValueInst)); - - if (SynthesizedValue) - return SynthesizedValue; - } - - // Case (3b) - Value *Address = getOrCreateScalarAlloca(ScalarValueInst); - ScalarValue = Builder.CreateLoad(Address, Address->getName() + ".reload"); - - return ScalarValue; + EscapeMap[Inst] = std::move(EscapeUsers); } -void BlockGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, - ValueMapT &BBMap) { - const Region &R = Stmt.getParent()->getRegion(); +void BlockGenerator::generateScalarMappings(ScopStmt &Stmt, BasicBlock *BB, + ValueMapT &ScalarMap, + ValueMapT &BBMap, + LoopToScevMapT <S) { - assert(Stmt.isBlockStmt() && "Region statements need to use the " - "generateScalarStores() function in the " - "RegionGenerator"); + Scop &S = *Stmt.getParent(); + for (auto *MA : Stmt) { - for (MemoryAccess *MA : Stmt) { if (MA->isArrayKind() || MA->isRead()) continue; - Value *Val = MA->getAccessValue(); - auto *Address = getOrCreateAlloca(*MA); - - Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); - Builder.CreateStore(Val, Address); - } -} - -void BlockGenerator::createScalarInitialization(Scop &S) { - Region &R = S.getRegion(); - BasicBlock *ExitBB = R.getExit(); - - // The split block __just before__ the region and optimized region. - BasicBlock *SplitBB = R.getEnteringBlock(); - BranchInst *SplitBBTerm = cast(SplitBB->getTerminator()); - assert(SplitBBTerm->getNumSuccessors() == 2 && "Bad region entering block!"); - - // Get the start block of the __optimized__ region. - BasicBlock *StartBB = SplitBBTerm->getSuccessor(0); - if (StartBB == R.getEntry()) - StartBB = SplitBBTerm->getSuccessor(1); - - Builder.SetInsertPoint(StartBB->getTerminator()); + // For each scalar defined in this statement that has a MemoryAccess + // we make sure the access value was actually copied. + auto *AccessValue = MA->getAccessValue(); + if (auto *AccessValueInst = dyn_cast(AccessValue)) { + if (AccessValueInst->getParent() == BB && + canSyntheziseInStmt(Stmt, AccessValueInst)) + copyInstruction(Stmt, AccessValueInst, BBMap, LTS, nullptr, + /* Force */ true); + } - for (auto &Pair : S.arrays()) { - auto &Array = Pair.second; - if (Array->getNumberOfDimensions() != 0) + // If the scalar was not defined in this block (only for region statements) + // we can skip it for now and only proceed when the block containing the + // access instruction is copied. + if (BB != MA->getAccessInstruction()->getParent()) continue; - if (Array->isPHIKind()) { - // For PHI nodes, the only values we need to store are the ones that - // reach the PHI node from outside the region. In general there should - // only be one such incoming edge and this edge should enter through - // 'SplitBB'. - auto PHI = cast(Array->getBasePtr()); - - for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++) - if (!R.contains(*BI) && *BI != SplitBB) - llvm_unreachable("Incoming edges from outside the scop should always " - "come from SplitBB"); - - int Idx = PHI->getBasicBlockIndex(SplitBB); - if (Idx < 0) - continue; - - Value *ScalarValue = PHI->getIncomingValue(Idx); - Builder.CreateStore(ScalarValue, getOrCreatePHIAlloca(PHI)); - continue; + auto *BaseAddr = MA->getBaseAddr(); + auto *NewVal = getNewValue(Stmt, AccessValue, BBMap, LTS, + getLoopForInst(MA->getAccessInstruction())); + + // If the base address has a loop carried PHI it is either a real PHI + // (= MA is PHIKind) or a scalar that was used before we saw a definition. + // In the second case we map both, the PHICopy as well as the BaseAddr to + // the NewValue to allow uses textually before as well as after the scalar + // definition. + if (Value *PHICopy = LCPHIs.lookup(BaseAddr)) { + if (MA->isPHIKind() && LI.isLoopHeader(BB)) + BaseAddr = PHICopy; + else if (NewVal != PHICopy) + ScalarMap[PHICopy] = NewVal; } - auto *Inst = dyn_cast(Array->getBasePtr()); - - if (Inst && R.contains(Inst)) - continue; - - // PHI nodes that are not marked as such in their SAI object are either exit - // PHI nodes we model as common scalars but without initialization, or - // incoming phi nodes that need to be initialized. Check if the first is the - // case for Inst and do not create and initialize memory if so. - if (auto *PHI = dyn_cast_or_null(Inst)) - if (!S.hasSingleExitEdge() && PHI->getBasicBlockIndex(ExitBB) >= 0) - continue; + // For exit PHIs that have been split due to single exit edge creation + // we map the PHI with multiple exits to the new value, not the new one. + if (MA->isExitPHIKind() && !S.hasSingleExitEdge()) { + auto *PHI = cast(BaseAddr); + BaseAddr = PHI->getIncomingValueForBlock(S.getRegion().getExit()); + } - Builder.CreateStore(Array->getBasePtr(), - getOrCreateScalarAlloca(Array->getBasePtr())); + assert(BaseAddr); + ScalarMap[BaseAddr] = NewVal; } } -void BlockGenerator::createScalarFinalization(Region &R) { +void BlockGenerator::createScalarFinalization(Region &R, ValueMapT &ScalarMap) { // The exit block of the __unoptimized__ region. BasicBlock *ExitBB = R.getExitingBlock(); // The merge block __just after__ the region and the optimized region. @@ -553,15 +468,10 @@ // Extract the escaping instruction and the escaping users as well as the // alloca the instruction was demoted to. Instruction *EscapeInst = EscapeMapping.getFirst(); - const auto &EscapeMappingValue = EscapeMapping.getSecond(); - const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second; - Value *ScalarAddr = EscapeMappingValue.first; + const EscapeUserVectorTy &EscapeUsers = EscapeMapping.getSecond(); - // Reload the demoted instruction in the optimized version of the SCoP. - Value *EscapeInstReload = - Builder.CreateLoad(ScalarAddr, EscapeInst->getName() + ".final_reload"); - EscapeInstReload = - Builder.CreateBitOrPointerCast(EscapeInstReload, EscapeInst->getType()); + Value *EscapeInstMapped = ScalarMap[EscapeInst]; + assert(EscapeInstMapped); // Create the merge PHI that merges the optimized and unoptimized version. PHINode *MergePHI = PHINode::Create(EscapeInst->getType(), 2, @@ -569,7 +479,7 @@ MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt()); // Add the respective values to the merge PHI. - MergePHI->addIncoming(EscapeInstReload, OptExitBB); + MergePHI->addIncoming(EscapeInstMapped, OptExitBB); MergePHI->addIncoming(EscapeInst, ExitBB); // The information of scalar evolution about the escaping instruction needs @@ -609,7 +519,7 @@ } } -void BlockGenerator::createExitPHINodeMerges(Scop &S) { +void BlockGenerator::createExitPHINodeMerges(Scop &S, ValueMapT &ScalarMap) { if (S.hasSingleExitEdge()) return; @@ -636,24 +546,28 @@ continue; std::string Name = PHI->getName(); - Value *ScalarAddr = getOrCreateScalarAlloca(PHI); - Value *Reload = Builder.CreateLoad(ScalarAddr, Name + ".ph.final_reload"); - Reload = Builder.CreateBitOrPointerCast(Reload, PHI->getType()); Value *OriginalValue = PHI->getIncomingValueForBlock(MergeBB); + Value *CopiedValue = ScalarMap.lookup(OriginalValue); + if (!CopiedValue) { + assert(!isa(OriginalValue) || + !R.contains(cast(OriginalValue))); + CopiedValue = OriginalValue; + } + assert(CopiedValue); + auto *MergePHI = PHINode::Create(PHI->getType(), 2, Name + ".ph.merge"); MergePHI->insertBefore(&*MergeBB->getFirstInsertionPt()); - MergePHI->addIncoming(Reload, OptExitBB); + MergePHI->addIncoming(CopiedValue, OptExitBB); MergePHI->addIncoming(OriginalValue, ExitBB); int Idx = PHI->getBasicBlockIndex(MergeBB); PHI->setIncomingValue(Idx, MergePHI); } } -void BlockGenerator::finalizeSCoP(Scop &S) { +void BlockGenerator::finalizeSCoP(Scop &S, ValueMapT &ScalarMap) { findOutsideUsers(S); - createScalarInitialization(S); - createExitPHINodeMerges(S); - createScalarFinalization(S.getRegion()); + createExitPHINodeMerges(S, ScalarMap); + createScalarFinalization(S.getRegion(), ScalarMap); } VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, @@ -773,7 +687,7 @@ void VectorBlockGenerator::generateLoad( ScopStmt &Stmt, LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { - if (Value *PreloadLoad = GlobalMap.lookup(Load)) { + if (Value *PreloadLoad = ScalarMaps[0].lookup(Load)) { VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad, Load->getName() + "_p"); return; @@ -976,38 +890,9 @@ copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps, NewAccesses); } -void VectorBlockGenerator::generateScalarVectorLoads( - ScopStmt &Stmt, ValueMapT &VectorBlockMap) { - for (MemoryAccess *MA : Stmt) { - if (MA->isArrayKind() || MA->isWrite()) - continue; - - auto *Address = getOrCreateAlloca(*MA); - Type *VectorPtrType = getVectorPtrTy(Address, 1); - Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType, - Address->getName() + "_p_vec_p"); - auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload"); - Constant *SplatVector = Constant::getNullValue( - VectorType::get(Builder.getInt32Ty(), getVectorWidth())); - - Value *VectorVal = Builder.CreateShuffleVector( - Val, Val, SplatVector, Address->getName() + "_p_splat"); - VectorBlockMap[MA->getBaseAddr()] = VectorVal; - VectorVal->dump(); - } -} - -void VectorBlockGenerator::verifyNoScalarStores(ScopStmt &Stmt) { - for (MemoryAccess *MA : Stmt) { - if (MA->isArrayKind() || MA->isRead()) - continue; - - llvm_unreachable("Scalar stores not expected in vector loop"); - } -} - void VectorBlockGenerator::copyStmt( - ScopStmt &Stmt, __isl_keep isl_id_to_ast_expr *NewAccesses) { + ScopStmt &Stmt, ValueMapT &ScalarMap, + __isl_keep isl_id_to_ast_expr *NewAccesses) { assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " "the vector block generator"); @@ -1031,15 +916,12 @@ // in the vector map once (as it is calculating >vectorwidth< values at a // time. Or (if the values are calculated using scalar operations), it // appears once in every dimension of the scalarMap. - VectorValueMapT ScalarBlockMap(getVectorWidth()); + VectorValueMapT ScalarBlockMap; + ScalarBlockMap.append(getVectorWidth(), ScalarMap); ValueMapT VectorBlockMap; - generateScalarVectorLoads(Stmt, VectorBlockMap); - for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap, NewAccesses); - - verifyNoScalarStores(Stmt); } BasicBlock *RegionGenerator::repairDominance(BasicBlock *BB, @@ -1054,7 +936,29 @@ return BBCopyIDom; } -void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, +void RegionGenerator::createMergePHIs(BasicBlock *BB, Region &R, + ValueMapT &MergeScalarMap) { + SmallVector PredBBs; + SmallVector PredScalarMaps; + + for (BasicBlock *PredBB : predecessors(BB)) { + if (!R.contains(PredBB)) + continue; + + auto *PredBBCopy = BlockMap[PredBB]; + assert(PredBBCopy && ScalarMaps.count(PredBBCopy)); + + PredBBs.push_back(PredBBCopy); + PredScalarMaps.push_back(&ScalarMaps[PredBBCopy]); + } + + Builder.SetInsertPoint(&*BlockMap[BB]->getFirstInsertionPt()); + + polly::createMergePHIs(Builder, PredBBs, PredScalarMaps, MergeScalarMap); +} + +void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &OuterScalarMap, + LoopToScevMapT <S, isl_id_to_ast_expr *IdToAstExp) { assert(Stmt.isRegionStmt() && "Only region statements can be copied by the region generator"); @@ -1064,6 +968,7 @@ // Forget all old mappings. BlockMap.clear(); RegionMaps.clear(); + ScalarMaps.clear(); IncompletePHINodeMap.clear(); // Collection of all values related to this subregion. @@ -1081,7 +986,9 @@ Builder.SetInsertPoint(&EntryBBCopy->front()); ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy]; - generateScalarLoads(Stmt, EntryBBMap); + ValueMapT &EntryScalarMap = ScalarMaps[EntryBBCopy]; + EntryScalarMap.insert(OuterScalarMap.begin(), OuterScalarMap.end()); + EntryBBMap.insert(OuterScalarMap.begin(), OuterScalarMap.end()); for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) if (!R->contains(*PI)) @@ -1119,25 +1026,34 @@ // loads from the generated entering block (which dominates all blocks of // this subregion) or the maps of the immediate dominator, if part of the // subregion. The latter necessarily includes the former. - ValueMapT *InitBBMap; + ValueMapT *InitBBMap, *InitScalarMap; if (BBCopyIDom) { assert(RegionMaps.count(BBCopyIDom)); InitBBMap = &RegionMaps[BBCopyIDom]; - } else + InitScalarMap = &ScalarMaps[BBCopyIDom]; + } else { InitBBMap = &EntryBBMap; + InitScalarMap = &EntryScalarMap; + } auto Inserted = RegionMaps.insert(std::make_pair(BBCopy, *InitBBMap)); ValueMapT &RegionMap = Inserted.first->second; + ValueMapT &ScalarMap = + ScalarMaps.insert(std::make_pair(BBCopy, *InitScalarMap)).first->second; // Copy the block with the BlockGenerator. Builder.SetInsertPoint(&BBCopy->front()); copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp); + // TODO + generateScalarMappings(Stmt, BB, ScalarMap, RegionMap, LTS); + // In order to remap PHI nodes we store also basic block mappings. BlockMap[BB] = BBCopy; // Add values to incomplete PHI nodes waiting for this block to be copied. for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB]) - addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, LTS); + addOperandToPHI(Stmt, PHINodePair.first, PHINodePair.second, BB, + RegionMap, LTS); IncompletePHINodeMap[BB].clear(); // And continue with new successors inside the region. @@ -1216,67 +1132,29 @@ LTS[L] = SE.getUnknown(LoopPHI); } - // Continue generating code in the exit block. - Builder.SetInsertPoint(&*ExitBBCopy->getFirstInsertionPt()); + // Merge values in the non-affine region exit node (except it was split when + // the SCoP CFG structure was changed). + if (!S->hasSingleExitEdge() && + S->getRegion().getExitingBlock() == R->getExitingBlock()) + createMergePHIs(R->getExitingBlock(), *R, OuterScalarMap); + else + createMergePHIs(R->getExit(), *R, OuterScalarMap); + + Builder.SetInsertPoint(ExitBBCopy->getTerminator()); - // Write values visible to other statements. - generateScalarStores(Stmt, LTS, ValueMap); BlockMap.clear(); RegionMaps.clear(); IncompletePHINodeMap.clear(); } -void RegionGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, - ValueMapT &BBMap) { - const Region &R = Stmt.getParent()->getRegion(); - - assert(Stmt.getRegion() && - "Block statements need to use the generateScalarStores() " - "function in the BlockGenerator"); - - for (MemoryAccess *MA : Stmt) { - if (MA->isArrayKind() || MA->isRead()) - continue; - - Instruction *ScalarInst = MA->getAccessInstruction(); - Value *Val = MA->getAccessValue(); - - // In case we add the store into an exiting block, we need to restore the - // position for stores in the exit node. - BasicBlock *SavedInsertBB = Builder.GetInsertBlock(); - auto SavedInsertionPoint = Builder.GetInsertPoint(); - ValueMapT *LocalBBMap = &BBMap; - - // Scalar writes induced by PHIs must be written in the incoming blocks. - if (MA->isPHIKind() || MA->isExitPHIKind()) { - BasicBlock *ExitingBB = ScalarInst->getParent(); - BasicBlock *ExitingBBCopy = BlockMap[ExitingBB]; - Builder.SetInsertPoint(ExitingBBCopy->getTerminator()); - - // For the incoming blocks, use the block's BBMap instead of the one for - // the entire region. - LocalBBMap = &RegionMaps[ExitingBBCopy]; - } - - auto Address = getOrCreateAlloca(*MA); - - Val = getNewScalarValue(Val, R, Stmt, LTS, *LocalBBMap); - Builder.CreateStore(Val, Address); - - // Restore the insertion point if necessary. - if (MA->isPHIKind() || MA->isExitPHIKind()) - Builder.SetInsertPoint(SavedInsertBB, SavedInsertionPoint); - } -} - void RegionGenerator::addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, PHINode *PHICopy, BasicBlock *IncomingBB, - LoopToScevMapT <S) { + ValueMapT &BBMap, LoopToScevMapT <S) { Region *StmtR = Stmt.getRegion(); // If the incoming block was not yet copied mark this PHI as incomplete. // Once the block will be copied the incoming value will be added. - BasicBlock *BBCopy = BlockMap[IncomingBB]; + BasicBlock *BBCopy = BlockMap.lookup(IncomingBB); if (!BBCopy) { assert(StmtR->contains(IncomingBB) && "Bad incoming block for PHI in non-affine region"); @@ -1284,37 +1162,48 @@ return; } - Value *OpCopy = nullptr; - if (StmtR->contains(IncomingBB)) { - assert(RegionMaps.count(BBCopy) && - "Incoming PHI block did not have a BBMap"); - ValueMapT &BBCopyMap = RegionMaps[BBCopy]; + if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) + return; - Value *Op = PHI->getIncomingValueForBlock(IncomingBB); + ValueMapT &BBCopyMap = + StmtR->contains(IncomingBB) ? RegionMaps[BBCopy] : BBMap; - BasicBlock *OldBlock = Builder.GetInsertBlock(); - auto OldIP = Builder.GetInsertPoint(); - Builder.SetInsertPoint(BBCopy->getTerminator()); - OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForInst(PHI)); - Builder.SetInsertPoint(OldBlock, OldIP); - } else { + Value *Op = PHI->getIncomingValueForBlock(IncomingBB); - if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) - return; - - Value *PHIOpAddr = getOrCreatePHIAlloca(const_cast(PHI)); - OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload", - BlockMap[IncomingBB]->getTerminator()); - } + BasicBlock *OldBlock = Builder.GetInsertBlock(); + auto OldIP = Builder.GetInsertPoint(); + Builder.SetInsertPoint(BBCopy->getTerminator()); + auto *OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForInst(PHI)); + Builder.SetInsertPoint(OldBlock, OldIP); assert(OpCopy && "Incoming PHI value was not copied properly"); assert(BBCopy && "Incoming PHI block was not copied properly"); PHICopy->addIncoming(OpCopy, BBCopy); + + assert(ScalarMaps.count(BBCopy)); + if (ScalarMaps[BBCopy].count(const_cast(PHI))) { + assert(ScalarMaps.count(PHICopy->getParent())); + ScalarMaps[PHICopy->getParent()][const_cast(PHI)] = PHICopy; + } } Value *RegionGenerator::copyPHIInstruction(ScopStmt &Stmt, PHINode *PHI, ValueMapT &BBMap, LoopToScevMapT <S) { + // Check if the PHI node is in the entry of the non-affine region. If so, two + // cases need to be distinguished: + // 1) It is a loop-carried PHI of a loop that is part of the non-affine + // region. If so we treat it as any other PHI in the non-affine region. + // 2) It is not loop-carried or the loop is outside the non-affine region. + // In this case we revert to the BlockGenerator::copyPHIInstruction. + assert(Stmt.isRegionStmt()); + Region *R = Stmt.getRegion(); + if (R->getEntry() == PHI->getParent()) { + Loop *L = LI.getLoopFor(PHI->getParent()); + if (!L || L->getHeader() != PHI->getParent() || !R->contains(L)) + return BlockGenerator::copyPHIInstruction(Stmt, PHI, BBMap, LTS); + } + unsigned NumIncoming = PHI->getNumIncomingValues(); PHINode *PHICopy = Builder.CreatePHI(PHI->getType(), NumIncoming, "polly." + PHI->getName()); @@ -1322,6 +1211,6 @@ BBMap[PHI] = PHICopy; for (unsigned u = 0; u < NumIncoming; u++) - addOperandToPHI(Stmt, PHI, PHICopy, PHI->getIncomingBlock(u), LTS); + addOperandToPHI(Stmt, PHI, PHICopy, PHI->getIncomingBlock(u), BBMap, LTS); return PHICopy; } Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -162,6 +162,7 @@ } else { + NodeBuilder.createPHIInitialization(); NodeBuilder.addParameters(S.getContext()); Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "polly/CodeGen/IslNodeBuilder.h" #include "polly/CodeGen/BlockGenerators.h" #include "polly/CodeGen/CodeGeneration.h" #include "polly/CodeGen/IslAst.h" #include "polly/CodeGen/IslExprBuilder.h" +#include "polly/CodeGen/IslNodeBuilder.h" #include "polly/CodeGen/LoopGenerators.h" #include "polly/CodeGen/Utils.h" #include "polly/Config/config.h" @@ -48,6 +48,41 @@ using namespace polly; using namespace llvm; +void IslNodeBuilder::createPHIInitialization() { + + Region &R = S.getRegion(); + + // The split block __just before__ the region and optimized region. + BasicBlock *SplitBB = R.getEnteringBlock(); + BasicBlock *EntryBB = R.getEntry(); + + for (auto &Pair : S.arrays()) { + auto &Array = Pair.second; + if (Array->getNumberOfDimensions() != 0 || !Array->isPHIKind()) + continue; + + // For PHI nodes, the only values we need to store are the ones that + // reach the PHI node from outside the region. In general there should + // only be one such incoming edge and this edge should enter through + // 'SplitBB'. + auto *PHI = cast(Array->getBasePtr()); + if (PHI->getParent() != EntryBB) + continue; + + for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++) + if (!R.contains(*BI) && *BI != SplitBB) + llvm_unreachable("Incoming edges from outside the scop should always " + "come from SplitBB"); + + int Idx = PHI->getBasicBlockIndex(SplitBB); + if (Idx < 0) + continue; + + Value *ScalarValue = PHI->getIncomingValue(Idx); + ScalarMap[PHI] = ScalarValue; + } +} + __isl_give isl_ast_expr * IslNodeBuilder::getUpperBound(__isl_keep isl_ast_node *For, ICmpInst::Predicate &Predicate) { @@ -178,7 +213,7 @@ ValueMapT &GlobalMap; SetVector &Values; SetVector &SCEVs; - BlockGenerator &BlockGen; + ValueMapT &ScalarMap; }; /// @brief Extract the values and SCEVs needed to generate code for a block. @@ -229,7 +264,21 @@ continue; } - References.Values.insert(References.BlockGen.getOrCreateAlloca(*Access)); + if (!Access->isRead()) + continue; + + // Copy the newest version of the access value if it is a SCoP intern + // instruction and a new version exists. Also copy the access value if it + // is SCoP extern. + auto *AccessValue = Access->getAccessValue(); + if (auto *AccessInst = dyn_cast(AccessValue)) + if (References.R.contains(AccessInst)) { + if (Value *NewAccessInst = References.ScalarMap.lookup(AccessInst)) + References.Values.insert(NewAccessInst); + continue; + } + + References.Values.insert(AccessValue); } return isl_stat_ok; @@ -284,8 +333,8 @@ SetVector &Loops) { SetVector SCEVs; - struct SubtreeReferences References = { - LI, SE, S.getRegion(), ValueMap, Values, SCEVs, getBlockGenerator()}; + struct SubtreeReferences References = {LI, SE, S.getRegion(), ValueMap, + Values, SCEVs, ScalarMap}; for (const auto &I : IDToValue) Values.insert(I.second); @@ -344,7 +393,8 @@ auto *NewAccesses = createNewAccesses(Stmt, User); createSubstitutionsVector(Expr, Stmt, VLTS, IVS, IteratorID); - VectorBlockGenerator::generate(BlockGen, *Stmt, VLTS, S, NewAccesses); + VectorBlockGenerator::generate(BlockGen, *Stmt, ScalarMap, VLTS, S, + NewAccesses); isl_id_to_ast_expr_free(NewAccesses); isl_map_free(S); isl_id_free(Id); @@ -469,14 +519,59 @@ Predicate, &Annotator, Parallel, UseGuardBB); IDToValue[IteratorID] = IV; + auto PreMap = ScalarMap; + auto PreLCPHIs = LCPHIs; + LCPHIs.clear(); + + LoopDepth++; + create(Body); Annotator.popLoop(Parallel); IDToValue.erase(IDToValue.find(IteratorID)); + auto *IVPHI = dyn_cast(IV); + auto *HeaderBB = IVPHI ? IVPHI->getParent() : nullptr; + if (HeaderBB) { + Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); + + auto *PreLoopBB = IVPHI->getIncomingBlock(0); + auto *BackedgeBB = IVPHI->getIncomingBlock(1); + for (const auto &PHIMapping : LCPHIs) { + + Value *V = PHIMapping.first; + Value *PHICopyVal = PHIMapping.second; + PHINode *PHICopy = cast(PHICopyVal); + assert(PHICopy && ScalarMap.count(PHICopy)); + + Value *BodyVal = ScalarMap[PHICopy]; + Value *PreVal = PreMap.lookup(V); + + if (!PreVal) + PreVal = UndefValue::get(V->getType()); + + PHICopy->insertBefore(HeaderBB->getFirstNonPHI()); + PHICopy->addIncoming(PreVal, PreLoopBB); + PHICopy->addIncoming(BodyVal, BackedgeBB); + } + } + Builder.SetInsertPoint(&ExitBlock->front()); + if (UseGuardBB) { + auto PredIt = pred_begin(ExitBlock); + auto *LoopExitingBB = *PredIt++; + auto *PreLoopBB = *PredIt++; + assert(PredIt == pred_end(ExitBlock)); + createMergePHIs(Builder, {PreLoopBB, LoopExitingBB}, {&PreMap, &ScalarMap}, + PreMap); + ScalarMap = PreMap; + } + + LoopDepth--; + LCPHIs = PreLCPHIs; + isl_ast_node_free(For); isl_ast_expr_free(Iterator); isl_id_free(IteratorID); @@ -685,15 +780,29 @@ Builder.CreateBr(MergeBB); Builder.SetInsertPoint(&ThenBB->front()); + auto PreMap = ScalarMap; + create(isl_ast_node_if_get_then(If)); + auto *ThenExitBB = Builder.GetInsertBlock(); + auto ThenMap = ScalarMap; + Builder.SetInsertPoint(&ElseBB->front()); + ScalarMap = PreMap; + if (isl_ast_node_if_has_else(If)) create(isl_ast_node_if_get_else(If)); + auto ElseMap = ScalarMap; + auto *ElseExitBB = Builder.GetInsertBlock(); + Builder.SetInsertPoint(&MergeBB->front()); + ScalarMap = PreMap; + createMergePHIs(Builder, {ThenExitBB, ElseExitBB}, {&ThenMap, &ElseMap}, + ScalarMap); + isl_ast_node_free(If); } @@ -772,9 +881,9 @@ createSubstitutions(Expr, Stmt, LTS); if (Stmt->isBlockStmt()) - BlockGen.copyStmt(*Stmt, LTS, NewAccesses); + BlockGen.copyStmt(*Stmt, ScalarMap, LTS, NewAccesses); else - RegionGen.copyStmt(*Stmt, LTS, NewAccesses); + RegionGen.copyStmt(*Stmt, ScalarMap, LTS, NewAccesses); isl_id_to_ast_expr_free(NewAccesses); isl_ast_node_free(User); @@ -1041,8 +1150,10 @@ Instruction *MAAccInst = MA->getAccessInstruction(); // TODO: The bitcast here is wrong. In case of floating and non-floating // point values we need to reload the value or convert it. - ValueMap[MAAccInst] = + Value *CastedVal = Builder.CreateBitOrPointerCast(PreloadVal, MAAccInst->getType()); + ValueMap[MAAccInst] = CastedVal; + ScalarMap[MAAccInst] = CastedVal; } if (SE.isSCEVable(AccInstTy)) { @@ -1052,10 +1163,7 @@ isl_id_free(ParamId); } - BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); - auto *Alloca = new AllocaInst(AccInstTy, AccInst->getName() + ".preload.s2a"); - Alloca->insertBefore(&*EntryBB->getFirstInsertionPt()); - Builder.CreateStore(PreloadVal, Alloca); + ScalarMap[AccInst] = PreloadVal; for (auto *DerivedSAI : SAI->getDerivedSAIs()) { Value *BasePtr = DerivedSAI->getBasePtr(); @@ -1072,14 +1180,6 @@ Builder.CreateBitOrPointerCast(PreloadVal, BasePtr->getType()); DerivedSAI->setBasePtr(BasePtr); } - - // For scalar derived SAIs we remap the alloca used for the derived value. - if (BasePtr == MA->getAccessInstruction()) { - if (DerivedSAI->isPHIKind()) - PHIOpMap[BasePtr] = Alloca; - else - ScalarMap[BasePtr] = Alloca; - } } } @@ -1097,8 +1197,7 @@ if (EscapeUsers.empty()) continue; - EscapeMap[MA->getAccessInstruction()] = - std::make_pair(Alloca, std::move(EscapeUsers)); + EscapeMap[MA->getAccessInstruction()] = std::move(EscapeUsers); } return true; Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -453,3 +453,54 @@ return false; } + +static Value *mergeValues(PollyIRBuilder &Builder, Value *Val, + const ArrayRef &BBs, + const ArrayRef &Maps) { + assert(BBs.size() > 0 && BBs.size() == Maps.size()); + SmallPtrSet Values; + + auto *MergePHI = Builder.CreatePHI(Val->getType(), BBs.size()); + for (unsigned u = 0, e = BBs.size(); u < e; u++) { + Value *IncomingVal = Maps[u]->lookup(Val); + + if (!IncomingVal) + IncomingVal = UndefValue::get(Val->getType()); + + MergePHI->addIncoming(IncomingVal, BBs[u]); + Values.insert(IncomingVal); + + if (!IncomingVal->hasName() || MergePHI->hasName()) + continue; + + MergePHI->setName(IncomingVal->getName() + ".merge"); + } + + if (Values.size() == 1) { + MergePHI->eraseFromParent(); + return *Values.begin(); + } + + return MergePHI; +} + +void polly::createMergePHIs(PollyIRBuilder &Builder, + const ArrayRef &BBs, + const ArrayRef &Maps, + ValueMapT &MergeMap) { + assert(BBs.size() == Maps.size()); + + SmallPtrSet MergedValues; + for (const auto &Map : Maps) { + for (const auto &Item : *Map) { + Value *OriginalValue = Item.first; + if (!isa(OriginalValue)) + continue; + if (!MergedValues.insert(OriginalValue).second) + continue; + + auto *MergedVal = mergeValues(Builder, OriginalValue, BBs, Maps); + MergeMap[OriginalValue] = MergedVal; + } + } +} Index: test/Isl/CodeGen/MemAccess/update_access_functions.ll =================================================================== --- test/Isl/CodeGen/MemAccess/update_access_functions.ll +++ test/Isl/CodeGen/MemAccess/update_access_functions.ll @@ -2,14 +2,15 @@ ; RUN: -polly-import-jscop-postfix=transformed -polly-codegen \ ; RUN: < %s -S | FileCheck %s +; CHECK: %val_p_scalar_.merge = phi double [ undef, %polly.loop_if2 ], [ %val_p_scalar_, %polly.stmt.loop2 +; ; CHECK: polly.stmt.loop2: ; CHECK-NEXT: %polly.access.A = getelementptr double, double* %A, i64 42 ; CHECK-NEXT: %val_p_scalar_ = load double, double* %polly.access.A ; CHECK: polly.stmt.loop3: -; CHECK-NEXT: %val.s2a.reload = load double, double* %val.s2a ; CHECK-NEXT: %polly.access.A20 = getelementptr double, double* %A, i64 42 -; CHECK-NEXT: store double %val.s2a.reload, double* %polly.access.A20 +; CHECK-NEXT: store double %val_p_scalar_.merge, double* %polly.access.A20 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll =================================================================== --- test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll +++ test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll @@ -7,8 +7,8 @@ ; for (int i = 1; i < 1000; i++) ; A[i] += /* split bb */ A[0]; ; } -; A[0] tmp (unused) A -; CHECK: %polly.par.userContext = alloca { float, float*, float* } +; A[0] A +; CHECK: %polly.par.userContext = alloca { float, float* } ; ; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds ; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load Index: test/Isl/CodeGen/OpenMP/single_loop_with_param.ll =================================================================== --- test/Isl/CodeGen/OpenMP/single_loop_with_param.ll +++ test/Isl/CodeGen/OpenMP/single_loop_with_param.ll @@ -1,6 +1,5 @@ -; RUN: opt %loadPolly -polly-parallel \ -; RUN: -polly-parallel-force -polly-codegen -S -verify-dom-info < %s \ -; RUN: | FileCheck %s -check-prefix=IR +; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S \ +; RUN: -verify-dom-info < %s | FileCheck %s ; #define N 1024 ; float A[N]; @@ -12,12 +11,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -; Ensure the scalars are initialized before the OpenMP code is launched. +; CHECK-LABEL: polly.parallel.for: +; CHECK-NEXT: %0 = bitcast { float }* %polly.par.userContext to i8* +; CHECK-NEXT: call void @llvm.lifetime.start(i64 4, i8* %0) +; CHECK-NEXT: %polly.subfn.storeaddr.alpha = getelementptr inbounds { float }, { float }* %polly.par.userContext, i32 0, i32 0 +; CHECK-NEXT: store float %alpha, float* %polly.subfn.storeaddr.alpha -; IR-LABEL: polly.start: -; IR-NEXT: store float %alpha, float* %alpha.s2a - -; IR: GOMP_parallel_loop_runtime_start +; CHECK: GOMP_parallel_loop_runtime_start @A = common global [1024 x float] zeroinitializer, align 16 Index: test/Isl/CodeGen/entry_with_trivial_phi_other_bb.ll =================================================================== --- test/Isl/CodeGen/entry_with_trivial_phi_other_bb.ll +++ test/Isl/CodeGen/entry_with_trivial_phi_other_bb.ll @@ -11,9 +11,6 @@ entry: br label %entry.split -; CHECK-LABEL: polly.start: -; CHECK: store float %a, float* %b.phiops - entry.split: %b = phi float [ %a, %entry ] %cmp2 = icmp slt i64 %n, 5 @@ -29,6 +26,10 @@ store float %b, float* %arrayidx, align 4 br label %for.inc +; CHECK-LABEL: polly.stmt.for.body: +; CHECK-NEXT: %scevgep = getelementptr float, float* %A, i64 %polly.indvar +; CHECK-NEXT: store float %a, float* %scevgep, + for.inc: ; preds = %for.body %add = add nuw nsw i64 %i.0, 1 br label %for.cond Index: test/Isl/CodeGen/invariant_load_escaping.ll =================================================================== --- test/Isl/CodeGen/invariant_load_escaping.ll +++ test/Isl/CodeGen/invariant_load_escaping.ll @@ -18,18 +18,14 @@ ; CHECK: polly.preload.begin: ; CHECK: %polly.access.B = getelementptr i32, i32* %B, i64 0 ; CHECK: %polly.access.B.load = load i32, i32* %polly.access.B -; CHECK: store i32 %polly.access.B.load, i32* %tmp.preload.s2a ; ; CHECK: polly.merge_new_and_old: -; CHECK: %tmp.merge = phi i32 [ %tmp.final_reload, %polly.exiting ], [ %tmp, %do.cond ] +; CHECK: %tmp.merge = phi i32 [ %polly.access.B.load, %polly.exiting ], [ %tmp, %do.cond ] ; CHECK: br label %do.end ; ; CHECK: do.end: ; CHECK: ret i32 %tmp.merge ; -; CHECK: polly.loop_exit: -; CHECK: %tmp.final_reload = load i32, i32* %tmp.preload.s2a -; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i32 @f(i32* %A, i32* %B) { Index: test/Isl/CodeGen/invariant_load_scalar_escape_alloca_sharing.ll =================================================================== --- test/Isl/CodeGen/invariant_load_scalar_escape_alloca_sharing.ll +++ test/Isl/CodeGen/invariant_load_scalar_escape_alloca_sharing.ll @@ -1,21 +1,16 @@ ; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s ; -; Verify the preloaded %0 is stored and communicated in the same alloca. -; -; CHECK-NOT: alloca -; CHECK: %dec3.s2a = alloca i32 -; CHECK-NOT: alloca -; CHECK: %dec3.in.phiops = alloca i32 -; CHECK-NOT: alloca -; CHECK: %.preload.s2a = alloca i32 ; CHECK-NOT: alloca ; -; CHECK: %ncol.load = load i32, i32* @ncol -; CHECK-NEXT: store i32 %ncol.load, i32* %.preload.s2a +; CHECK-LABEL: polly.preload.begin: +; CHECK-NEXT: %ncol.load = load i32, i32* @ncol +; +; CHECK-LABEL: polly.merge: +; CHECK-NEXT: %ncol.load.merge = phi i32 [ %ncol.load, %polly.stmt.while.body.lr.ph ], [ undef, %polly.else ] ; -; CHECK: polly.stmt.while.body.lr.ph: -; CHECK-NEXT: %.preload.s2a.reload = load i32, i32* %.preload.s2a -; CHECK-NEXT: store i32 %.preload.s2a.reload, i32* %dec3.in.phiops +; CHECK-LABEL: polly.loop_header: +; CHECK-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.while.cond.backedge ] +; CHECK-NEXT: %dec3.in.polly.lc = phi i32 [ %ncol.load.merge, %polly.loop_preheader ], [ %p_dec3, %polly.stmt.while.cond.backedge ] ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/large-numbers-in-boundary-context.ll =================================================================== --- test/Isl/CodeGen/large-numbers-in-boundary-context.ll +++ test/Isl/CodeGen/large-numbers-in-boundary-context.ll @@ -4,7 +4,11 @@ ; we will check that we use an appropriaty typed constant, here with 65 bits. ; An alternative would be to bail out early but that would not be as easy. ; -; CHECK: %13 = icmp sge i65 %12, -9223372036854775809 +; CHECK: 9223372036854775806 +; CHECK: %[[r0:[0-9]*]] = sext i32 %tmp to i64 +; CHECK: %[[r1:[0-9]*]] = add nsw i64 %indvar, %[[r0]] +; CHECK: %[[r2:[0-9]*]] = sext i64 %[[r1]] to i65 +; CHECK: %[[r3:[0-9]*]] = icmp sge i65 %[[r2]], -9223372036854775809 ; ; CHECK: polly.start ; Index: test/Isl/CodeGen/non-affine-dominance-generated-entering.ll =================================================================== --- test/Isl/CodeGen/non-affine-dominance-generated-entering.ll +++ test/Isl/CodeGen/non-affine-dominance-generated-entering.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s ; ; llvm.org/PR25439 ; Scalar reloads in the generated entering block were not recognized as @@ -6,15 +6,9 @@ ; resulted in values defined in there (here: %cond used in subregionB_entry) not ; being copied. We check whether it is reusing the reloaded scalar. ; -; CHECK-LABEL: polly.stmt.subregionB_entry.exit: -; CHECK: store i1 %polly.cond, i1* %cond.s2a +; FIXME: SSA-Codegen does not need to place any PHI nodes here, thus we just check +; that the code is valid. ; -; CHECK-LABEL: polly.stmt.subregionB_entry.entry: -; CHECK: %cond.s2a.reload = load i1, i1* %cond.s2a -; -; CHECK-LABEL: polly.stmt.subregionB_entry: -; CHECK: br i1 %cond.s2a.reload - define void @func(i32* %A) { entry: br label %subregionA_entry Index: test/Isl/CodeGen/non-affine-exit-node-dominance.ll =================================================================== --- test/Isl/CodeGen/non-affine-exit-node-dominance.ll +++ test/Isl/CodeGen/non-affine-exit-node-dominance.ll @@ -4,14 +4,13 @@ ; The dominance of the generated non-affine subregion block was based on the ; scop's merge block, therefore resulted in an invalid DominanceTree. ; It resulted in some values as assumed to be unusable in the actual generated -; exit block. Here we check whether the value %escaping is taken from the -; generated block. +; exit block. +; +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: %escaping.merge = phi i32 [ %p_escaping, %polly.exiting ], [ %escaping, %subregion_exit.region_exiting ] ; ; CHECK-LABEL: polly.stmt.subregion_entry: ; CHECK: %p_escaping = select i1 undef, i32 undef, i32 undef -; -; CHECK-LABEL: polly.stmt.polly.merge_new_and_old.exit: -; CHECK: store i32 %p_escaping, i32* %escaping.s2a define i32 @func() { entry: Index: test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll @@ -1,5 +1,4 @@ -; RUN: opt %loadPolly -polly-codegen \ -; RUN: -S < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -9,13 +8,13 @@ ; CHECK: %p_tmp8 = fcmp olt double 1.400000e+01, %p_tmp7 ; CHECK: br i1 %p_tmp8, label %polly.stmt.bb9, label %polly.stmt.bb10 -; CHECK: polly.stmt.bb9: ; preds = %polly.stmt.bb3 -; CHECK: store double 1.000000e+00, double* %tmp12.phiops -; CHECK: br label %polly.stmt.bb11.exit - -; CHECK: polly.stmt.bb10: ; preds = %polly.stmt.bb3 -; CHECK: store double 2.000000e+00, double* %tmp12.phiops -; CHECK: br label %polly.stmt.bb11.exit +; CHECK: polly.stmt.bb11.exit: +; CHECK: %0 = phi double [ 2.000000e+00, %polly.stmt.bb10 ], [ 1.000000e+00, %polly.stmt.bb9 ] +; CHECK: br label %polly.stmt.bb11 +; +; CHECK: polly.stmt.bb11: +; CHECK: store double %0, double* %arg11 +; CHECK: br label %polly.exiting define void @hoge(i32 %arg, [1024 x double]* %arg1) { Index: test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll @@ -16,23 +16,23 @@ ; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00 -; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: br i1 branch1: br i1 %cond1, label %branch2, label %backedge ; CHECK-LABEL: polly.stmt.branch1: -; CHECK-NEXT: store float %p_val1, float* %merge.phiops ; CHECK-NEXT: br i1 branch2: br label %backedge ; CHECK-LABEL: polly.stmt.branch2: -; CHECK-NEXT: store float %p_val2, float* %merge.phiops ; CHECK-NEXT: br label +; CHECK-LABEL: polly.stmt.backedge.exit: +; CHECK-NEXT: %p_val2.merge = phi float [ %p_val2, %polly.stmt.branch2 ], [ %p_val1, %polly.stmt.branch1 ], [ %p_val0, %polly.stmt.loop ] + backedge: %merge = phi float [%val0, %loop], [%val1, %branch1], [%val2, %branch2] %indvar.next = add i64 %indvar, 1 @@ -40,6 +40,9 @@ %cmp = icmp sle i64 %indvar.next, 100 br i1 %cmp, label %loop, label %exit +; CHECK-LABEL: polly.stmt.backedge: +; CHECK-NEXT: store float %p_val2.merge, float* %A, !alias.scope !0, !noalias !2 + exit: ret void } Index: test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll @@ -14,7 +14,6 @@ ; CHECK-LABEL: polly.stmt.loop: ; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 -; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: br i1 ; The interesting instruction here is %val2, which does not dominate the exit of @@ -27,16 +26,17 @@ ; CHECK-LABEL: polly.stmt.branch1: ; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00 -; CHECK-NEXT: store float %p_val1, float* %merge.phiops ; CHECK-NEXT: br i1 branch2: br label %backedge ; CHECK-LABEL: polly.stmt.branch2: -; CHECK-NEXT: store float %p_val2, float* %merge.phiops ; CHECK-NEXT: br label +; CHECK-LABEL: polly.stmt.backedge.exit: +; CHECK-NEXT: %p_val2.merge = phi float [ %p_val2, %polly.stmt.branch2 ], [ %p_val1, %polly.stmt.branch1 ], [ %p_val0, %polly.stmt.loop ] + backedge: %merge = phi float [%val0, %loop], [%val1, %branch1], [%val2, %branch2] %indvar.next = add i64 %indvar, 1 @@ -44,6 +44,9 @@ %cmp = icmp sle i64 %indvar.next, 100 br i1 %cmp, label %loop, label %exit +; CHECK-LABEL: polly.stmt.backedge: +; CHECK-NEXT: store float %p_val2.merge, float* %A, !alias.scope !0, !noalias !2 + exit: ret void } Index: test/Isl/CodeGen/non-affine-region-exit-phi-incoming-synthesize.ll =================================================================== --- test/Isl/CodeGen/non-affine-region-exit-phi-incoming-synthesize.ll +++ test/Isl/CodeGen/non-affine-region-exit-phi-incoming-synthesize.ll @@ -6,16 +6,15 @@ ; subregion_if, but reused for the incoming value of subregion_exit, although it ; is not dominated by subregion_if. ; -; CHECK-LABEL: polly.stmt.subregion_entry: -; CHECK: %[[R0:[0-9]*]] = add i32 %n, -2 -; CHECK: store i32 %[[R0]], i32* %retval.s2a +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK: %retval.ph.merge = phi i32 [ %p_add1, %polly.exiting ], [ %add, %subregion_exit.region_exiting ] ; -; CHECK-LABEL: polly.stmt.subregion_if: -; CHECK: %[[R1:[0-9]*]] = add i32 %n, -2 -; CHECK: store i32 %[[R1]], i32* %retval.s2a +; CHECK-LABEL: subregion_exit: +; CHECK-LABEL: %retval = phi i32 [ %retval.ph.merge, %polly.merge_new_and_old ] +; CHECK-LABEL: ret i32 %retval ; -; CHECK-LABEL: polly.stmt.polly.merge_new_and_old.exit: -; CHECK: load i32, i32* %retval.s2a +; CHECK-LABEL: polly.stmt.subregion_entry: +; CHECK: %p_add1 = add nsw i32 %n, -2 define i32 @func(i32 %n){ entry: Index: test/Isl/CodeGen/non-affine-region-implicit-store.ll =================================================================== --- test/Isl/CodeGen/non-affine-region-implicit-store.ll +++ test/Isl/CodeGen/non-affine-region-implicit-store.ll @@ -7,14 +7,12 @@ ; the control flow of the generated code. This would cause that any value for ; implicit stores is assumed to be not from the scop. ; -; This checks that the stored value is indeed from the generated code. +; CHECK-LABEL: do.body: +; CHECK-NEXT: %a = phi i32 [ %a.ph, %polly.split_new_and_old ] ; -; CHECK-LABEL: polly.stmt.do.body.entry: -; CHECK: a.phiops.reload = load i32, i32* %a.phiops +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: %a.merge = phi i32 [ %a.ph, %polly.exiting ], [ %a, %end_a.region_exiting ] ; -; CHECK-LABEL: polly.stmt.polly.merge_new_and_old.exit: -; CHECK: store i32 %polly.a, i32* %a.s2a - define void @func() { entry: br label %while.body Index: test/Isl/CodeGen/non-affine-synthesized-in-branch.ll =================================================================== --- test/Isl/CodeGen/non-affine-synthesized-in-branch.ll +++ test/Isl/CodeGen/non-affine-synthesized-in-branch.ll @@ -4,10 +4,17 @@ ; %synthgep caused %gep to be synthesized in subregion_if which was reused for ; %retval in subregion_exit, even though it is not dominating subregion_exit. ; -; CHECK-LABEL: polly.stmt.polly.merge_new_and_old.exit: -; CHECK: %scevgep[[R1:[0-9]*]] = getelementptr %struct.hoge, %struct.hoge* %arg, i64 0, i32 2 -; CHECK: store double* %scevgep[[R1]], double** %gep.s2a -; CHECK: br label +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: %gep.merge = phi double* [ %p_gep, %polly.exiting ], [ %gep, %subregion_exit.region_exiting ] +; +; CHECK-LABEL: subregion_exit: +; CHECK-NEXT: %retval = load double, double* %gep.merge +; CHECK-NEXT: ret double %retval +; +; CHECK-LABEL: polly.stmt.subregion_entry: +; CHECK-NEXT: %p_cond = fcmp +; CHECK-NEXT: %p_gep = getelementptr inbounds %struct.hoge, %struct.hoge* %arg, i64 0, i32 2 + %struct.hoge = type { double, double, double } Index: test/Isl/CodeGen/non_affine_float_compare.ll =================================================================== --- test/Isl/CodeGen/non_affine_float_compare.ll +++ test/Isl/CodeGen/non_affine_float_compare.ll @@ -26,6 +26,8 @@ ; CHECK: br label %polly.stmt.bb12.[[R]] ; CHECK: polly.stmt.bb12.[[R]]: +; CHECK: %polly.indvar_next = add nsw i64 %polly.indvar, 1 +; CHECK: %polly.loop_cond = icmp sle i64 %polly.indvar, 1022 ; CHECK: br label %polly.stmt.bb12 ; CHECK: polly.stmt.bb12: @@ -33,8 +35,6 @@ ; CHECK: %tmp10b_p_scalar_ = load float, float* %scevgep[[R4]], align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp11b = fadd float %tmp10b_p_scalar_, 1.000000e+00 ; CHECK: store float %p_tmp11b, float* %scevgep[[R4]], align 4, !alias.scope !0, !noalias !2 -; CHECK: %polly.indvar_next = add nsw i64 %polly.indvar, 1 -; CHECK: %polly.loop_cond = icmp sle i64 %polly.indvar, 1022 ; CHECK: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/out-of-scop-phi-node-use.ll =================================================================== --- test/Isl/CodeGen/out-of-scop-phi-node-use.ll +++ test/Isl/CodeGen/out-of-scop-phi-node-use.ll @@ -4,13 +4,10 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %_s.sroa.343.0.ph5161118.ph.merge = phi i32 [ %_s.sroa.343.0.ph5161118.ph.final_reload, %polly.exiting ], [ %_s.sroa.343.0.ph516.lcssa2357, %for.cond.981.region_exiting ] +; CHECK-NEXT: %_s.sroa.343.0.ph5161118.ph.merge = phi i32 [ undef, %polly.exiting ], [ %_s.sroa.343.0.ph516.lcssa2357, %for.cond.981.region_exiting ] ; CHECK-LABEL: for.cond.981: -; CHECK-NEXT: %_s.sroa.343.0.ph5161118 = phi i32 [ undef, %for.cond ], [ %_s.sroa.343.0.ph5161118.ph.merge, %polly.merge_new_and_old ] - -; CHECK-LABEL: polly.exiting: -; CHECK-NEXT: %_s.sroa.343.0.ph5161118.ph.final_reload = load i32, i32* %_s.sroa.343.0.ph5161118.s2a +; CHECK-NEXT: %_s.sroa.343.0.ph5161118 = phi i32 [ undef, %for.cond ], [ %_s.sroa.343.0.ph5161118.ph.merge, %polly.merge_new_and_old ] ; Function Attrs: nounwind uwtable define void @lzmaDecode() #0 { Index: test/Isl/CodeGen/phi-defined-before-scop.ll =================================================================== --- test/Isl/CodeGen/phi-defined-before-scop.ll +++ test/Isl/CodeGen/phi-defined-before-scop.ll @@ -1,11 +1,10 @@ ; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %tmp7.ph.merge = phi %struct.wibble* [ %tmp7.ph.final_reload, %polly.exiting ], [ %tmp7.ph, %bb6.region_exiting ] +; CHECK-NEXT: %tmp7.ph.merge = phi %struct.wibble* [ %tmp2.merge, %polly.exiting ], [ %tmp7.ph, %bb6.region_exiting ] -; CHECK-LABEL: polly.stmt.bb3: -; CHECK-NEXT: %tmp2.s2a.reload = load %struct.wibble*, %struct.wibble** %tmp2.s2a -; CHECK-NEXT: store %struct.wibble* %tmp2, %struct.wibble** %tmp7.s2a +; CHECK-LABEL: polly.stmt.bb6.region_exiting: +; CHECK-NEXT: %tmp2.merge = phi %struct.wibble* [ %tmp2, %polly.stmt.bb3 ], [ undef, %polly.stmt.bb5 ] target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -15,7 +14,7 @@ @global = external global %struct.blam*, align 8 ; Function Attrs: nounwind uwtable -define void @wobble() #0 { +define void @wobble(i1 %b) #0 { bb: br label %bb1 @@ -26,7 +25,7 @@ bb3: ; preds = %bb1 %tmp4 = getelementptr inbounds %struct.blam, %struct.blam* %tmp, i64 0, i32 1 - br i1 false, label %bb6, label %bb5 + br i1 %b, label %bb6, label %bb5 bb5: ; preds = %bb3 br label %bb6 Index: test/Isl/CodeGen/phi-in-non-affine-subregion-entry.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi-in-non-affine-subregion-entry.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s +; +; CHECK: if.end41.region_exiting: +; CHECK: %ic.sroa.3.0.ph = phi i64 [ 0, %if.end22 ], [ undef, %if.then33 ] +; CHECK: br label %polly.merge_new_and_old +; +; CHECK: polly.merge_new_and_old: +; CHECK: %ic.sroa.3.0.ph.merge = phi i64 [ %0, %polly.exiting ], [ %ic.sroa.3.0.ph, %if.end41.region_exiting ] +; CHECK: br label %if.end41 +; +; CHECK: if.end41: +; CHECK: %ic.sroa.3.0 = phi i64 [ %ic.sroa.3.0.ph.merge, %polly.merge_new_and_old ] +; CHECK: ret void +; +; CHECK: polly.stmt.if.end41.region_exiting.exit: +; CHECK: %0 = phi i64 [ 0, %polly.stmt.if.end22 ], [ undef, %polly.stmt.if.then33 ] + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define void @intcoord1() #0 { +entry: + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %entry + unreachable + +if.end: ; preds = %entry + br i1 false, label %if.then14, label %if.end22 + +if.then14: ; preds = %if.end + %conv17 = fptosi double undef to i32 + %phitmp19 = zext i32 %conv17 to i64 + %phitmp20 = shl nuw i64 %phitmp19, 32 + br label %if.end22 + +if.end22: ; preds = %if.then14, %if.end + %ic.sroa.2.0 = phi i64 [ %phitmp20, %if.then14 ], [ 0, %if.end ] + %or.cond2 = and i1 undef, undef + br i1 %or.cond2, label %if.then33, label %if.end41 + +if.then33: ; preds = %if.end22 + br label %if.end41 + +if.end41: ; preds = %if.then33, %if.end22 + %ic.sroa.3.0 = phi i64 [ undef, %if.then33 ], [ 0, %if.end22 ] + ret void +} Index: test/Isl/CodeGen/phi_condition_modeling_1.ll =================================================================== --- test/Isl/CodeGen/phi_condition_modeling_1.ll +++ test/Isl/CodeGen/phi_condition_modeling_1.ll @@ -11,16 +11,13 @@ ; } ; } ; -; CHECK-LABEL: bb: -; CHECK: %tmp.0.phiops = alloca i32 -; CHECK-LABEL: polly.stmt.bb8: -; CHECK: %tmp.0.phiops.reload = load i32, i32* %tmp.0.phiops -; CHECK: store i32 %tmp.0.phiops.reload, i32* -; CHECK-LABEL: polly.stmt.bb7: -; CHECK: store i32 5, i32* %tmp.0.phiops -; CHECK-LABEL: polly.stmt.bb6: -; CHECK: store i32 3, i32* %tmp.0.phiops - +; CHECK: polly.merge: +; CHECK: %4 = phi i32 [ 5, %polly.stmt.bb7 ], [ 3, %polly.stmt.bb6 ] +; CHECK: br label %polly.stmt.bb8 +; +; CHECK: polly.stmt.bb8: +; CHECK: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar +; CHECK: store i32 %4, i32* %scevgep, target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @f(i32* %A, i32 %c, i32 %N) { Index: test/Isl/CodeGen/phi_condition_modeling_2.ll =================================================================== --- test/Isl/CodeGen/phi_condition_modeling_2.ll +++ test/Isl/CodeGen/phi_condition_modeling_2.ll @@ -11,19 +11,28 @@ ; } ; } ; -; CHECK-LABEL: bb: -; CHECK-DAG: %tmp.0.s2a = alloca i32 -; CHECK-DAG: %tmp.0.phiops = alloca i32 -; CHECK-LABEL: polly.stmt.bb8: -; CHECK: %tmp.0.phiops.reload = load i32, i32* %tmp.0.phiops -; CHECK: store i32 %tmp.0.phiops.reload, i32* %tmp.0.s2a + +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: br label %bb11 +; +; CHECK-LABEL: bb11: +; CHECK-NEXT: ret void +; +; CHECK-LABEL: polly.loop_exit: +; TODO %1 is not needed +; CHECK-NEXT: %1 = phi i32 [ undef, %polly.loop_if ], [ %4, %polly.stmt.bb8b ] +; CHECK-NEXT: br label %polly.exiting +; +; CHECK-LABEL: polly.exiting: +; CHECK-NEXT: br label %polly.merge_new_and_old +; +; CHECK-LABEL: polly.merge: +; CHECK-NEXT: %4 = phi i32 [ 5, %polly.stmt.bb7 ], [ 3, %polly.stmt.bb6 ] +; CHECK-NEXT: br label %polly.stmt.bb8 +; ; CHECK-LABEL: polly.stmt.bb8b: -; CHECK: %tmp.0.s2a.reload = load i32, i32* %tmp.0.s2a -; CHECK: store i32 %tmp.0.s2a.reload, -; CHECK-LABEL: polly.stmt.bb7: -; CHECK: store i32 5, i32* %tmp.0.phiops -; CHECK-LABEL: polly.stmt.bb6: -; CHECK: store i32 3, i32* %tmp.0.phiops +; CHECK-NEXT: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar +; CHECK-NEXT: store i32 %4, i32* %scevgep, target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/phi_conditional_simple_1.ll =================================================================== --- test/Isl/CodeGen/phi_conditional_simple_1.ll +++ test/Isl/CodeGen/phi_conditional_simple_1.ll @@ -18,18 +18,13 @@ ; AST: Stmt_if_end(c0); ; AST: } ; -; CHECK-LABEL: entry: -; CHECK-NEXT: %phi.phiops = alloca i32 +; CHECK-LABEL: polly.merge: +; CHECK-NEXT: %[[r:[a-zA-Z0-9_.]*]] = phi i32 [ 1, %polly.stmt.if.then ], [ 2, %polly.stmt.if.else ] +; CHECK-NEXT: br label %polly.stmt.if.end + ; CHECK-LABEL: polly.stmt.if.end: -; CHECK-NEXT: %phi.phiops.reload = load i32, i32* %phi.phiops -; CHECK-NEXT: %scevgep -; CHECK-NEXT: store i32 %phi.phiops.reload, i32* -; CHECK-LABEL: polly.stmt.if.then: -; CHECK-NEXT: store i32 1, i32* %phi.phiops -; CHECK-NEXT: br label %polly.merge{{[.]?}} -; CHECK-LABEL: polly.stmt.if.else: -; CHECK-NEXT: store i32 2, i32* %phi.phiops -; CHECK-NEXT: br label %polly.merge{{[.]?}} +; CHECK-NEXT: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar +; CHECK-NEXT: store i32 %[[r]], i32* %scevgep ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/phi_in_exit_early_lnt_failure_2.ll =================================================================== --- test/Isl/CodeGen/phi_in_exit_early_lnt_failure_2.ll +++ test/Isl/CodeGen/phi_in_exit_early_lnt_failure_2.ll @@ -3,18 +3,14 @@ ; This caused an lnt crash at some point, just verify it will run through and ; produce the PHI node in the exit we are looking for. ; -; CHECK: %eps1.addr.0.s2a = alloca double -; CHECK-NOT: %eps1.addr.0.ph.s2a = alloca double +; CHECK-NOT: alloca double ; ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %eps1.addr.0.ph.merge = phi double [ %eps1.addr.0.ph.final_reload, %polly.exiting ], [ %eps1.addr.0.ph, %if.end.47.region_exiting ] -; -; CHECK-LABEL: polly.start: -; CHECK-NEXT: store double %eps1, double* %eps1.s2a -; -; CHECK-LABEL: polly.exiting: -; CHECK-NEXT: %eps1.addr.0.ph.final_reload = load double, double* %eps1.addr.0.s2a +; CHECK: %eps1.addr.0.ph.merge = phi double [ %eps1.merge, %polly.exiting ], [ %eps1.addr.0.ph, %if.end.47.region_exiting ] ; +; CHECK-LABEL: polly.stmt.if.end.47.region_exiting.exit: +; CHECK-NEXT: %eps1.merge = phi double [ %eps1, %polly.stmt.for.end ], [ %_p_scalar_, %polly.stmt.if.then.46 ] + define void @dbisect(double* %c, double* %b, double %eps1, double* %eps2) { entry: br label %entry.split Index: test/Isl/CodeGen/phi_in_exit_early_lnt_failure_3.ll =================================================================== --- test/Isl/CodeGen/phi_in_exit_early_lnt_failure_3.ll +++ test/Isl/CodeGen/phi_in_exit_early_lnt_failure_3.ll @@ -4,7 +4,7 @@ ; produce the PHI node in the exit we are looking for. ; ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %n2ptr.2.ph.merge = phi i8* [ %n2ptr.2.ph.final_reload, %polly.exiting ], [ %n2ptr.2.ph, %if.end.45.region_exiting ] +; CHECK-NEXT: %n2ptr.2.ph.merge = phi i8* [ %uglygep.merge, %polly.exiting ], [ %n2ptr.2.ph, %if.end.45.region_exiting ] ; ; CHECK-LABEL: if.end.45: ; CHECK-NEXT: %n2ptr.2 = phi i8* [ %add.ptr25, %entry ], [ %add.ptr25, %while.cond.preheader ], [ %n2ptr.2.ph.merge, %polly.merge_new_and_old ] Index: test/Isl/CodeGen/phi_in_exit_early_lnt_failure_5.ll =================================================================== --- test/Isl/CodeGen/phi_in_exit_early_lnt_failure_5.ll +++ test/Isl/CodeGen/phi_in_exit_early_lnt_failure_5.ll @@ -4,7 +4,7 @@ ; produce the PHI node in the exit we are looking for. ; ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %eps1.addr.0.ph.merge = phi double [ %eps1.addr.0.ph.final_reload, %polly.exiting ], [ %eps1.addr.0.ph, %if.end.47.region_exiting ] +; CHECK-NEXT: %eps1.addr.0.ph.merge = phi double [ %eps1.merge, %polly.exiting ], [ %eps1.addr.0.ph, %if.end.47.region_exiting ] ; CHECK-NEXT: br label %if.end.47 ; ; CHECK-LABEL: if.end.47: Index: test/Isl/CodeGen/phi_loop_carried_float.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float.ll +++ test/Isl/CodeGen/phi_loop_carried_float.ll @@ -7,35 +7,29 @@ ; } ; ; CHECK: bb: -; CHECK-NOT: %tmp7{{[.*]}} = alloca float -; CHECK-DAG: %tmp.0.s2a = alloca float -; CHECK-NOT: %tmp7{{[.*]}} = alloca float -; CHECK-DAG: %tmp.0.phiops = alloca float -; CHECK-NOT: %tmp7{{[.*]}} = alloca float +; CHECK-NOT: alloca -; CHECK-LABEL: exit: -; CHECK-NEXT: ret +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: br label ; CHECK-LABEL: polly.start: ; CHECK-NEXT: sext -; CHECK-NEXT: store float 0.000000e+00, float* %tmp.0.phiops +; CHECK-NEXT: br label + +; CHECK-LABEL: polly.loop_exit: +; CHECK-DAG: %tmp.0.polly.lc.merge = phi float [ 0.000000e+00, %polly.loop_if ], [ %tmp.0.polly.lc, %polly.merge ] +; CHECK-DAG: %p_tmp7.merge.merge = phi float [ undef, %polly.loop_if ], [ %p_tmp7.merge, %polly.merge ] ; CHECK-LABEL: polly.exiting: ; CHECK-NEXT: br label %polly.merge_new_and_old -; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}: -; CHECK-NEXT: %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops -; CHECK: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a +; CHECK-LABEL: polly.loop_header: +; CHECK-NEXT: %polly.indvar = phi i64 +; CHECK-NEXT: %tmp.0.polly.lc = phi float [ 0.000000e+00, %polly.loop_preheader ], [ %p_tmp7.merge, %polly.merge ] -; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 -; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ -; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops +; CHECK-LABEL: polly.merge: +; CHECK-NEXT: %p_tmp7.merge = phi float [ %p_tmp7, %polly.stmt.bb4 ], [ undef, %polly.else ] -; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}: -; CHECK-NEXT: %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops -; CHECK: store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/phi_loop_carried_float_2.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_loop_carried_float_2.ll @@ -0,0 +1,51 @@ +; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s +; +; float f(float *A, int N) { +; float tmp = 0; +; int i = 0; +; do { +; tmp += A[i]; +; } while (i++ < N); +; } +; +; CHECK-NOT: alloca + +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-DAG: %tmp7.merge = phi float [ %p_tmp[[r:[0-9]*]].merge, %polly.exiting ], [ %tmp7, %bb8 ] +; CHECK-DAG: %tmp.0.merge = phi float [ %tmp.0.polly.lc.merge, %polly.exiting ], [ %tmp.0, %bb8 ] +; CHECK-NEXT: br label %exit +; +; CHECK-LABEL: exit: +; CHECK-NEXT: %add_exit = fadd float %tmp.0.merge, %tmp7.merge +; CHECK-NEXT: ret float %add_exit +; +; CHECK-LABEL: polly.loop_header: +; CHECK-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.bb1 ] +; CHECK-NEXT: %tmp.0.polly.lc = phi float [ 0.000000e+00, %polly.loop_preheader ], [ %p_tmp7, %polly.stmt.bb1 ] +; +; CHECK: %p_tmp7 = fadd float %tmp.0.polly.lc, %tmp6_p_scalar_ + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define float @f(float* %A, i32 %N) { +bb: + %tmp = sext i32 %N to i64 + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb1 ], [ 0, %bb ] + %tmp.0 = phi float [ 0.000000e+00, %bb ], [ %tmp7, %bb1 ] + %tmp5 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp6 = load float, float* %tmp5, align 4 + %tmp7 = fadd float %tmp.0, %tmp6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %tmp2 = icmp slt i64 %indvars.iv, %tmp + br i1 %tmp2, label %bb1, label %bb8 + +bb8: ; preds = %bb1 + br label %exit + +exit: + %add_exit = fadd float %tmp.0, %tmp7 + ret float %add_exit +} Index: test/Isl/CodeGen/phi_loop_carried_float_3.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_loop_carried_float_3.ll @@ -0,0 +1,49 @@ +; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s +; +; float f(float *A) { +; float tmp = 0; +; int i = 0; +; do { +; tmp += A[i]; +; } while (i++ < 100); +; } +; +; CHECK-NOT: alloca +; +; CHECK-LABEL: polly.loop_header: +; CHECK-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.bb2 ] +; CHECK-NEXT: %tmp.0.polly.lc = phi float [ 0.000000e+00, %polly.loop_preheader ], [ %p_tmp7, %polly.stmt.bb2 ] +; +; CHECK: %p_tmp7 = fadd float %tmp.0.polly.lc, %tmp6_p_scalar_ + +; CHECK-LABEL: polly.stmt.bb2: +; CHECK-NEXT: %p_tmp7copy = fadd float %tmp.0.polly.lc, %p_tmp7 + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define float @f(float* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb2 ], [ 0, %bb ] + %tmp.0 = phi float [ 0.000000e+00, %bb ], [ %tmp7, %bb2 ] + %tmp5 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp6 = load float, float* %tmp5, align 4 + %tmp7 = fadd float %tmp.0, %tmp6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %tmp2 = icmp slt i64 %indvars.iv, 100 + br label %bb2 + +bb2: + %tmp7copy = fadd float %tmp.0, %tmp7 + br i1 %tmp2, label %bb1, label %bb8 + +bb8: ; preds = %bb1 + br label %exit + +exit: + %add_exit = fadd float %tmp.0, %tmp7 + ret float %add_exit +} Index: test/Isl/CodeGen/phi_loop_carried_float_4.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_loop_carried_float_4.ll @@ -0,0 +1,180 @@ +; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s +; +; float f() { +; int i = 0; +; float lc_a, lc_b, lc_c; +; float a, b, c; +; a = b = c = 0; +; +; do { +; lc_a = a; +; lc_b = b; +; lc_c = c; +; if (i > 50) +; a += 1; +; else +; b -= 2; +; c += a + b; +; } while (i++ < 100); +; return a + b + c + lc_a + lc_b + lc_c; +; } +; +; CHECK: do.body: +; CHECK-DAG: %a.0 = phi float [ %a.1, %do.cond ], [ 0.000000e+00, %polly.split_new_and_old ] +; CHECK-DAG: %b.0 = phi float [ %b.1, %do.cond ], [ 0.000000e+00, %polly.split_new_and_old ] +; CHECK-DAG: %c.0 = phi float [ %add2, %do.cond ], [ 0.000000e+00, %polly.split_new_and_old ] +; CHECK-DAG: %i.0 = phi i32 [ %inc, %do.cond ], [ 0, %polly.split_new_and_old ] +; CHECK: %cmp = icmp sgt i32 %i.0, 50 +; CHECK: br i1 %cmp, label %if.then, label %if.else +; +; CHECK: if.then: +; CHECK: %add = fadd float %a.0, 1.000000e+00 +; CHECK: br label %if.end +; +; CHECK: if.else: +; CHECK: %sub = fadd float %b.0, -2.000000e+00 +; CHECK: br label %if.end +; +; CHECK: if.end: +; CHECK-DAG: %a.1 = phi float [ %add, %if.then ], [ %a.0, %if.else ] +; CHECK-DAG: %b.1 = phi float [ %b.0, %if.then ], [ %sub, %if.else ] +; CHECK: %add1 = fadd float %a.1, %b.1 +; CHECK: %add2 = fadd float %c.0, %add1 +; CHECK: br label %do.cond +; +; CHECK: do.cond: +; CHECK: %inc = add nuw nsw i32 %i.0, 1 +; CHECK: %exitcond = icmp ne i32 %inc, 101 +; CHECK: br i1 %exitcond, label %do.body, label %polly.merge_new_and_old +; +; CHECK: polly.merge_new_and_old: +; CHECK-DAG: %add2.merge = phi float [ %p_add2, %polly.exiting ], [ %add2, %do.cond ] +; CHECK-DAG: %c.0.merge = phi float [ %c.0.polly.lc, %polly.exiting ], [ %c.0, %do.cond ] +; CHECK-DAG: %a.0.merge = phi float [ %a.0.polly.lc, %polly.exiting ], [ %a.0, %do.cond ] +; CHECK-DAG: %b.1.merge = phi float [ %p_sub.merge, %polly.exiting ], [ %b.1, %do.cond ] +; CHECK-DAG: %b.0.merge = phi float [ %b.0.polly.lc, %polly.exiting ], [ %b.0, %do.cond ] +; CHECK-DAG: %a.1.merge = phi float [ %a.0.polly.lc.merge, %polly.exiting ], [ %a.1, %do.cond ] +; CHECK: br label %do.end +; +; CHECK: do.end: +; CHECK-DAG: %add2.lcssa = phi float [ %add2.merge, %polly.merge_new_and_old ] +; CHECK-DAG: %b.1.lcssa = phi float [ %b.1.merge, %polly.merge_new_and_old ] +; CHECK-DAG: %a.1.lcssa = phi float [ %a.1.merge, %polly.merge_new_and_old ] +; CHECK-DAG: %c.0.lcssa = phi float [ %c.0.merge, %polly.merge_new_and_old ] +; CHECK-DAG: %b.0.lcssa = phi float [ %b.0.merge, %polly.merge_new_and_old ] +; CHECK-DAG: %a.0.lcssa = phi float [ %a.0.merge, %polly.merge_new_and_old ] +; CHECK: %add4 = fadd float %a.1.lcssa, %b.1.lcssa +; CHECK: %add5 = fadd float %add4, %add2.lcssa +; CHECK: %add6 = fadd float %add5, %a.0.lcssa +; CHECK: %add7 = fadd float %add6, %b.0.lcssa +; CHECK: %add8 = fadd float %add7, %c.0.lcssa +; CHECK: ret float %add8 +; +; CHECK: polly.start: +; CHECK: br label %polly.loop_preheader +; +; CHECK: polly.loop_exit: +; CHECK: br label %polly.exiting +; +; CHECK: polly.exiting: +; CHECK: br label %polly.merge_new_and_old +; +; CHECK: polly.loop_header: +; CHECK-DAG: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.do.cond ] +; CHECK-DAG: %c.0.polly.lc = phi float [ 0.000000e+00, %polly.loop_preheader ], [ %p_add2, %polly.stmt.do.cond ] +; CHECK-DAG: %a.0.polly.lc = phi float [ 0.000000e+00, %polly.loop_preheader ], [ %a.0.polly.lc.merge, %polly.stmt.do.cond ] +; CHECK-DAG: %b.0.polly.lc = phi float [ 0.000000e+00, %polly.loop_preheader ], [ %p_sub.merge, %polly.stmt.do.cond ] +; CHECK: br label %polly.stmt.do.body +; +; CHECK: polly.stmt.do.body: +; CHECK: %0 = trunc i64 %polly.indvar to i32 +; CHECK: %p_cmp = icmp sgt i32 %0, 50 +; CHECK: br label %polly.cond +; +; CHECK: polly.cond: +; CHECK: %1 = icmp sle i64 %polly.indvar, 50 +; CHECK: br i1 %1, label %polly.then, label %polly.else +; +; CHECK: polly.merge: +; CHECK-DAG: %p_sub.merge = phi float [ %p_sub, %polly.stmt.if.else ], [ %b.0.polly.lc, %polly.stmt.if.then ] +; CHECK-DAG: %a.0.polly.lc.merge = phi float [ %a.0.polly.lc, %polly.stmt.if.else ], [ %p_add, %polly.stmt.if.then ] +; CHECK: br label %polly.stmt.if.end +; +; CHECK: polly.stmt.if.end: +; CHECK: %p_add1 = fadd float %a.0.polly.lc.merge, %p_sub.merge +; CHECK: %p_add2 = fadd float %c.0.polly.lc, %p_add1 +; CHECK: br label %polly.stmt.do.cond +; +; CHECK: polly.stmt.do.cond: +; CHECK: %2 = trunc i64 %polly.indvar to i32 +; CHECK: %3 = add i32 %2, 1 +; CHECK: %p_exitcond = icmp ne i32 %3, 101 +; CHECK: %polly.indvar_next = add nsw i64 %polly.indvar, 1 +; CHECK: %polly.loop_cond = icmp sle i64 %polly.indvar, 99 +; CHECK: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit +; +; CHECK: polly.loop_preheader: +; CHECK: br label %polly.loop_header +; +; CHECK: polly.then: +; CHECK: br label %polly.stmt.if.else +; +; CHECK: polly.stmt.if.else: +; CHECK: %p_sub = fadd float %b.0.polly.lc, -2.000000e+00 +; CHECK: br label %polly.merge +; +; CHECK: polly.else: +; CHECK: br label %polly.stmt.if.then +; +; CHECK: polly.stmt.if.then: +; CHECK: %p_add = fadd float %a.0.polly.lc, 1.000000e+00 +; CHECK: br label %polly.merge +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define float @f() { +entry: + br label %do.body + +do.body: + %a.0 = phi float [ 0.000000e+00, %entry ], [ %a.1, %do.cond ] + %b.0 = phi float [ 0.000000e+00, %entry ], [ %b.1, %do.cond ] + %c.0 = phi float [ 0.000000e+00, %entry ], [ %add2, %do.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.cond ] + %cmp = icmp sgt i32 %i.0, 50 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %add = fadd float %a.0, 1.000000e+00 + br label %if.end + +if.else: + %sub = fadd float %b.0, -2.000000e+00 + br label %if.end + +if.end: + %a.1 = phi float [ %add, %if.then ], [ %a.0, %if.else ] + %b.1 = phi float [ %b.0, %if.then ], [ %sub, %if.else ] + %add1 = fadd float %a.1, %b.1 + %add2 = fadd float %c.0, %add1 + br label %do.cond + +do.cond: + %inc = add nuw nsw i32 %i.0, 1 + %exitcond = icmp ne i32 %inc, 101 + br i1 %exitcond, label %do.body, label %do.end + +do.end: + %add2.lcssa = phi float [ %add2, %do.cond ] + %b.1.lcssa = phi float [ %b.1, %do.cond ] + %a.1.lcssa = phi float [ %a.1, %do.cond ] + %c.0.lcssa = phi float [ %c.0, %do.cond ] + %b.0.lcssa = phi float [ %b.0, %do.cond ] + %a.0.lcssa = phi float [ %a.0, %do.cond ] + %add4 = fadd float %a.1.lcssa, %b.1.lcssa + %add5 = fadd float %add4, %add2.lcssa + %add6 = fadd float %add5, %a.0.lcssa + %add7 = fadd float %add6, %b.0.lcssa + %add8 = fadd float %add7, %c.0.lcssa + ret float %add8 +} Index: test/Isl/CodeGen/phi_loop_carried_float_5.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_loop_carried_float_5.ll @@ -0,0 +1,83 @@ +; RUN: opt %loadPolly -analyze < %s | FileCheck %s +; +; FIXME: Edit the run line and add checks! +; +; XFAIL: * +; +; float f() { +; int i = 0; +; float lc_a; +; float a = 0; +; +; do { +; lc_a = a; +; +; if (i++ > 100) +; break; +; +; if (i > 50) { +; a += 1; +; continue; +; } +; +; if (i > 25) { +; a -= 2; +; continue; +; } +; +; } while (1); +; +; return a + lc_a; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define float @f() { +entry: + br label %do.body + +do.body: ; preds = %do.cond, %entry + %a.0 = phi float [ 0.000000e+00, %entry ], [ %a.1, %do.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %do.cond ] + %inc = add nuw nsw i32 %i.0, 1 + %cmp = icmp sgt i32 %i.0, 100 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %do.body + %a.0.lcssa = phi float [ %a.0, %do.body ] + br label %do.end + +if.end: ; preds = %do.body + %cmp1 = icmp sgt i32 %i.0, 49 + br i1 %cmp1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + %add = fadd float %a.0, 1.000000e+00 + br label %do.cond + +if.end3: ; preds = %if.end + %cmp4 = icmp sgt i32 %i.0, 24 + br i1 %cmp4, label %if.then5, label %if.end6 + +if.then5: ; preds = %if.end3 + %sub = fadd float %a.0, -2.000000e+00 + br label %do.cond + +if.end6: ; preds = %if.end3 + br label %do.cond + +do.cond: ; preds = %if.end6, %if.then5, %if.then2 + %a.1 = phi float [ %add, %if.then2 ], [ %sub, %if.then5 ], [ %a.0, %if.end6 ] + br i1 true, label %do.body, label %do.end.loopexit + +do.end.loopexit: ; preds = %do.cond + %a.1.lcssa = phi float [ %a.1, %do.cond ] + %a.0.lcssa1 = phi float [ %a.0, %do.cond ] + br label %do.end + +do.end: ; preds = %do.end.loopexit, %if.then + %a.02 = phi float [ %a.0.lcssa, %if.then ], [ %a.0.lcssa1, %do.end.loopexit ] + %a.2 = phi float [ %a.0.lcssa, %if.then ], [ %a.1.lcssa, %do.end.loopexit ] + %add7 = fadd float %a.2, %a.02 + ret float %add7 +} Index: test/Isl/CodeGen/phi_loop_carried_float_6.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_loop_carried_float_6.ll @@ -0,0 +1,83 @@ +; RUN: opt %loadPolly -analyze < %s | FileCheck %s +; +; FIXME: Edit the run line and add checks! +; +; XFAIL: * +; +; float f() { +; int i = 0; +; float lc_a; +; float a = 0; +; +; do { +; lc_a = a; +; +; if (i++ > 100) +; break; +; +; if (i > 50) { +; a += 1; +; continue; +; } +; +; if (i > 25) { +; a -= 2; +; continue; +; } +; +; } while (1); +; +; return a + lc_a; +; } +; + +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: %a.0.merge = phi float [ %a.0.polly.lc, %polly.exiting ], [ %a.0, %do.body ] +; +; CHECK: %a.0.polly.lc = phi float [ 0.000000e+00, %polly.loop_preheader ], [ %a.0.polly.lc.merge, %polly.merge ] +; +; CHECK: %a.0.polly.lc.merge = phi float [ %a.0.polly.lc, %polly.stmt.if.end6 ], [ %p_sub.merge, %polly.merge2 ] +; +; CHECK: %p_sub.merge = phi float [ %p_sub, %polly.stmt.if.then5 ], [ %p_add.merge, %polly.merge6 ] +; +; CHECK: %p_sub = fadd float %a.0.polly.lc, -2.000000e+00 +; +; CHECK: %p_add.merge = phi float [ %p_add, %polly.stmt.if.then2 ], [ undef, %polly.else8 ] +; +; CHECK: %p_add = fadd float %a.0.polly.lc, 1.000000e+00 +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define float @f() { +entry: + br label %do.body + +do.body: ; preds = %do.cond, %entry + %a.0 = phi float [ 0.000000e+00, %entry ], [ %add, %if.then2 ], [ %sub, %if.then5 ], [ %a.0, %if.end6 ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.then2 ], [ %inc, %if.then5 ], [ %inc, %if.end6 ] + %inc = add nuw nsw i32 %i.0, 1 + %cmp = icmp sgt i32 %i.0, 100 + br i1 %cmp, label %do.end, label %if.end + +if.end: ; preds = %do.body + %cmp1 = icmp sgt i32 %i.0, 49 + br i1 %cmp1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + %add = fadd float %a.0, 1.000000e+00 + br label %do.body + +if.end3: ; preds = %if.end + %cmp4 = icmp sgt i32 %i.0, 24 + br i1 %cmp4, label %if.then5, label %if.end6 + +if.then5: ; preds = %if.end3 + %sub = fadd float %a.0, -2.000000e+00 + br label %do.body + +if.end6: ; preds = %if.end3 + br label %do.body + +do.end: ; preds = %do.end.loopexit, %if.then + ret float %a.0 +} Index: test/Isl/CodeGen/phi_loop_carried_float_escape.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float_escape.ll +++ test/Isl/CodeGen/phi_loop_carried_float_escape.ll @@ -12,30 +12,27 @@ ; } ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.exiting ], [ %tmp.0, %bb8 ] +; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.polly.lc.merge, %polly.exiting ], [ %tmp.0, %bb8 ] ; CHECK-NEXT: br label %exit -; CHECK-LABEL: polly.start: -; CHECK-NEXT: sext -; CHECK-NEXT: store float 0.000000e+00, float* %tmp.0.phiops +; CHECK-LABEL: polly.loop_exit: +; CHECK-DAG: %tmp.0.polly.lc.merge = phi float [ 0.000000e+00, %polly.loop_if ], [ %tmp.0.polly.lc, %polly.merge ] +; CHECK-DAG: %p_tmp7.merge.merge = phi float [ undef, %polly.loop_if ], [ %p_tmp7.merge, %polly.merge ] ; CHECK-LABEL: polly.exiting: -; CHECK-NEXT: %tmp.0.final_reload = load float, float* %tmp.0.s2a ; CHECK-NEXT: br label %polly.merge_new_and_old -; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}: -; CHECK-NEXT: %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops -; CHECK-: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a +; CHECK-LABEL: polly.loop_header: +; CHECK-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.merge ] +; CHECK-NEXT: %tmp.0.polly.lc = phi float [ 0.000000e+00, %polly.loop_preheader ], [ %p_tmp7.merge, %polly.merge ] -; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 -; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ -; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops +; CHECK-LABEL: polly.merge: +; CHECK-NEXT: %p_tmp7.merge = phi float [ %p_tmp7, %polly.stmt.bb4 ], [ undef, %polly.else ] -; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}: -; CHECK-NEXT: %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops -; CHECK: store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a +; CHECK-LABEL: polly.stmt.bb4: +; CHECK-NEXT: %scevgep = getelementptr float, float* %A, i64 %polly.indvar +; CHECK-NEXT: %tmp6_p_scalar_ = load float, float* %scevgep +; CHECK-NEXT: %p_tmp7 = fadd float %tmp.0.polly.lc, %tmp6_p_scalar_ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/phi_scalar_simple_1.ll =================================================================== --- test/Isl/CodeGen/phi_scalar_simple_1.ll +++ test/Isl/CodeGen/phi_scalar_simple_1.ll @@ -12,55 +12,70 @@ define i32 @jd(i32* noalias %A, i32 %x, i32 %N) { entry: ; CHECK-LABEL: entry: -; CHECK-DAG: %x.addr.1.lcssa.s2a = alloca i32 -; CHECK-DAG: %x.addr.1.lcssa.phiops = alloca i32 -; CHECK-DAG: %x.addr.1.s2a = alloca i32 -; CHECK-DAG: %x.addr.1.phiops = alloca i32 -; CHECK-DAG: %x.addr.0.s2a = alloca i32 -; CHECK-DAG: %x.addr.0.phiops = alloca i32 +; CHECK-NOT: alloca %tmp = sext i32 %N to i64 br label %for.cond ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.exiting ], [ %x.addr.0, %for.cond ] +; CHECK: %x.addr.0.merge = phi i32 [ %x.merge.merge, %polly.exiting ], [ %x.addr.0, %for.cond ] ; CHECK: ret i32 %x.addr.0.merge ; CHECK-LABEL: polly.start: -; CHECK: store i32 %x, i32* %x.addr.0.phiops +; CHECK-NEXT: br label %polly.cond ; CHECK-LABEL: polly.merge: -; CHECK: %x.addr.0.final_reload = load i32, i32* %x.addr.0.s2a +; CHECK: %x.merge.merge = phi i32 [ %x.merge, %polly.loop_exit ], [ %x, %polly.stmt.for.cond{{2[0-9]}} ] + +; CHECK-LABEL: polly.loop_exit: +; CHECK: %x.merge = phi i32 [ %x, %polly.loop_if ], [ %x.addr.0.polly.lc.merge.merge[[p:[0-9]*]].merge, %polly.merge2 ] + +; CHECK-LABEL: polly.loop_header: +; CHECK: %x.addr.0.polly.lc = phi i32 [ %x, %polly.loop_preheader ], [ %x.addr.0.polly.lc.merge.merge[[p]].merge, %polly.merge2 ] for.cond: ; preds = %for.inc4, %entry ; CHECK-LABEL: polly.stmt.for.cond{{[0-9]*}}: -; CHECK: %x.addr.0.phiops.reload[[R1:[0-9]*]] = load i32, i32* %x.addr.0.phiops -; CHECK: store i32 %x.addr.0.phiops.reload[[R1]], i32* %x.addr.0.s2a +; CHECK-NOT: load +; CHECK-NOT: store %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc4 ], [ 1, %entry ] %x.addr.0 = phi i32 [ %x, %entry ], [ %x.addr.1.lcssa, %for.inc4 ] %cmp = icmp slt i64 %indvars.iv, %tmp br i1 %cmp, label %for.body, label %for.end6 +; CHECK-LABEL: polly.merge2: +; CHECK: %x.addr.0.polly.lc.merge.merge[[p]].merge = phi i32 [ %x.addr.0.polly.lc.merge.merge[[k:[0-9]*]], %polly.merge[[t:2[0-9]]] ], [ %x.addr.0.polly.lc, %polly.else4 ] + for.body: ; preds = %for.cond ; CHECK-LABEL: polly.stmt.for.body: -; CHECK: %x.addr.0.s2a.reload[[R2:[0-9]*]] = load i32, i32* %x.addr.0.s2a -; CHECK: store i32 %x.addr.0.s2a.reload[[R2]], i32* %x.addr.1.phiops +; CHECK-NOT: load +; CHECK-NOT: store br label %for.cond1 +; CHECK-LABEL: polly.loop_exit8: +; CHECK: %x.addr.0.polly.lc.merge = phi i32 [ %x.addr.0.polly.lc, %polly.loop_if5 ], [ %p_add.merge, %polly.merge15 ] + +; CHECK: polly.merge[[t]]: +; CHECK: %x.addr.0.polly.lc.merge.merge[[k]] = phi i32 [ %x.addr.0.polly.lc.merge, %polly.stmt.for.inc4 ], [ %x.addr.0.polly.lc, %polly.else{{2[0-9]}} ] + +; CHECK-LABEL: polly.loop_header6: +; CHECK: %x.addr.1.polly.lc = phi i32 [ %x.addr.0.polly.lc, %polly.loop_preheader7 ], [ %p_add.merge, %polly.merge15 ] + for.cond1: ; preds = %for.inc, %for.body ; CHECK-LABEL: polly.stmt.for.cond1: -; CHECK: %x.addr.1.phiops.reload = load i32, i32* %x.addr.1.phiops -; CHECK: store i32 %x.addr.1.phiops.reload, i32* %x.addr.1.s2a[[R6:[0-9]*]] -; CHECK: store i32 %x.addr.1.phiops.reload, i32* %x.addr.1.lcssa.phiops +; CHECK-NOT: load +; CHECK-NOT: store %x.addr.1 = phi i32 [ %x.addr.0, %for.body ], [ %add, %for.inc ] %j.0 = phi i32 [ 3, %for.body ], [ %inc, %for.inc ] %exitcond = icmp ne i32 %j.0, %N br i1 %exitcond, label %for.body3, label %for.end +; CHECK-LABEL: polly.merge15: +; CHECK-LABEL: %p_add.merge = phi i32 [ %p_add, %polly.stmt.for.inc ], [ %x.addr.1.polly.lc, %polly.else17 ] + for.inc: ; preds = %for.body3 ; CHECK-LABEL: polly.stmt.for.inc: -; CHECK: %x.addr.1.s2a.reload[[R3:[0-9]*]] = load i32, i32* %x.addr.1.s2a -; CHECK: %p_add = add nsw i32 %x.addr.1.s2a.reload[[R3]], %tmp1_p_scalar_ -; CHECK: store i32 %p_add, i32* %x.addr.1.phiops +; CHECK: %p_add = add nsw i32 %x.addr.1.polly.lc, %tmp1_p_scalar_ +; CHECK-NOT: load +; CHECK-NOT: store %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv %tmp1 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %x.addr.1, %tmp1 @@ -69,15 +84,15 @@ for.end: ; preds = %for.cond1 ; CHECK-LABEL: polly.stmt.for.end: -; CHECK-NEXT: %x.addr.1.lcssa.phiops.reload = load i32, i32* %x.addr.1.lcssa.phiops -; CHECK-NEXT: store i32 %x.addr.1.lcssa.phiops.reload, i32* %x.addr.1.lcssa.s2a[[R4:[0-9]*]] +; CHECK-NOT: load +; CHECK-NOT: store %x.addr.1.lcssa = phi i32 [ %x.addr.1, %for.cond1 ] br label %for.inc4 for.inc4: ; preds = %for.end ; CHECK-LABEL: polly.stmt.for.inc4: -; CHECK: %x.addr.1.lcssa.s2a.reload[[R5:[0-9]*]] = load i32, i32* %x.addr.1.lcssa.s2a[[R4]] -; CHECK: store i32 %x.addr.1.lcssa.s2a.reload[[R5]], i32* %x.addr.0.phiops +; CHECK-NOT: load +; CHECK-NOT: store %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 br label %for.cond Index: test/Isl/CodeGen/phi_scalar_simple_2.ll =================================================================== --- test/Isl/CodeGen/phi_scalar_simple_2.ll +++ test/Isl/CodeGen/phi_scalar_simple_2.ll @@ -13,84 +13,43 @@ define i32 @jd(i32* noalias %A, i32 %x, i32 %N, i32 %c) { entry: ; CHECK-LABEL: entry: -; CHECK-DAG: %x.addr.2.s2a = alloca i32 -; CHECK-DAG: %x.addr.2.phiops = alloca i32 -; CHECK-DAG: %x.addr.1.s2a = alloca i32 -; CHECK-DAG: %x.addr.1.phiops = alloca i32 -; CHECK-DAG: %x.addr.0.s2a = alloca i32 -; CHECK-DAG: %x.addr.0.phiops = alloca i32 +; CHECK-NOT: alloca %tmp = sext i32 %N to i64 %tmp1 = sext i32 %c to i64 br label %for.cond -; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.exiting ], [ %x.addr.0, %for.cond ] -; CHECK: ret i32 %x.addr.0.merge - -; CHECK-LABEL: polly.start: -; CHECK-NEXT: sext -; CHECK-NEXT: store i32 %x, i32* %x.addr.0.phiops - -; CHECK-LABEL: polly.merge21: -; CHECK: %x.addr.0.final_reload = load i32, i32* %x.addr.0.s2a - for.cond: ; preds = %for.inc5, %entry -; CHECK-LABEL: polly.stmt.for.cond{{[0-9]*}}: -; CHECK: %x.addr.0.phiops.reload[[R1:[0-9]*]] = load i32, i32* %x.addr.0.phiops -; CHECK: store i32 %x.addr.0.phiops.reload[[R1]], i32* %x.addr.0.s2a %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc5 ], [ 0, %entry ] %x.addr.0 = phi i32 [ %x, %entry ], [ %x.addr.1, %for.inc5 ] %cmp = icmp slt i64 %indvars.iv, %tmp br i1 %cmp, label %for.body, label %for.end7 for.body: ; preds = %for.cond -; CHECK-LABEL: polly.stmt.for.body: -; CHECK: %x.addr.0.s2a.reload[[R2:[0-9]*]] = load i32, i32* %x.addr.0.s2a -; CHECK: store i32 %x.addr.0.s2a.reload[[R2]], i32* %x.addr.1.phiops br label %for.cond1 for.inc5: ; preds = %for.end -; CHECK-LABEL: polly.stmt.for.inc5: -; CHECK: %x.addr.1.s2a.reload[[R5:[0-9]*]] = load i32, i32* %x.addr.1.s2a -; CHECK: store i32 %x.addr.1.s2a.reload[[R5]], i32* %x.addr.0.phiops %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 br label %for.cond for.cond1: ; preds = %for.inc, %for.body -; CHECK-LABEL: polly.stmt.for.cond1: -; CHECK: %x.addr.1.phiops.reload = load i32, i32* %x.addr.1.phiops -; CHECK: store i32 %x.addr.1.phiops.reload, i32* %x.addr.1.s2a %x.addr.1 = phi i32 [ %x.addr.0, %for.body ], [ %x.addr.2, %for.inc ] %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] %exitcond = icmp ne i32 %j.0, %N br i1 %exitcond, label %for.body3, label %for.end for.body3: ; preds = %for.cond1 -; CHECK-LABEL: polly.stmt.for.body3: -; CHECK: %x.addr.1.s2a.reload = load i32, i32* %x.addr.1.s2a -; CHECK: store i32 %x.addr.1.s2a.reload, i32* %x.addr.2.phiops %cmp4 = icmp slt i64 %indvars.iv, %tmp1 br i1 %cmp4, label %if.then, label %if.end if.end: ; preds = %if.then, %for.body3 -; CHECK-LABEL: polly.stmt.if.end: -; CHECK: %x.addr.2.phiops.reload = load i32, i32* %x.addr.2.phiops -; CHECK: store i32 %x.addr.2.phiops.reload, i32* %x.addr.2.s2a %x.addr.2 = phi i32 [ %add, %if.then ], [ %x.addr.1, %for.body3 ] br label %for.inc for.inc: ; preds = %if.end -; CHECK-LABEL: polly.stmt.for.inc: -; CHECK: %x.addr.2.s2a.reload[[R3:[0-9]*]] = load i32, i32* %x.addr.2.s2a -; CHECK: store i32 %x.addr.2.s2a.reload[[R3]], i32* %x.addr.1.phiops %inc = add nsw i32 %j.0, 1 br label %for.cond1 if.then: ; preds = %for.body3 -; CHECK-LABEL: polly.stmt.if.then: -; CHECK: %x.addr.1.s2a.reload[[R5:[0-9]*]] = load i32, i32* %x.addr.1.s2a -; CHECK: %p_add = add nsw i32 %x.addr.1.s2a.reload[[R5]], %tmp2_p_scalar_ -; CHECK: store i32 %p_add, i32* %x.addr.2.phiops %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv %tmp2 = load i32, i32* %arrayidx, align 4 %add = add nsw i32 %x.addr.1, %tmp2 Index: test/Isl/CodeGen/phi_with_multi_exiting_edges_2.ll =================================================================== --- test/Isl/CodeGen/phi_with_multi_exiting_edges_2.ll +++ test/Isl/CodeGen/phi_with_multi_exiting_edges_2.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s ; ; CHECK: polly.merge_new_and_old: -; CHECK: %result.ph.merge = phi float [ %result.ph.final_reload, %polly.exiting ], [ %result.ph, %next.region_exiting ] +; CHECK: %result.ph.merge = phi float [ %p_sumB.merge, %polly.exiting ], [ %result.ph, %next.region_exiting ] ; CHECK: br label %next ; ; CHECK: next: Index: test/Isl/CodeGen/phi_with_one_exit_edge.ll =================================================================== --- test/Isl/CodeGen/phi_with_one_exit_edge.ll +++ test/Isl/CodeGen/phi_with_one_exit_edge.ll @@ -2,7 +2,7 @@ ; ; ; CHECK: polly.merge_new_and_old: -; CHECK: %sumA.merge = phi float [ %sumA.final_reload, %polly.exiting ], [ %sumA, %loopA ] +; CHECK: %sumA.merge = phi float [ %p_sumA, %polly.exiting ], [ %sumA, %loopA ] ; CHECK: br label %next ; ; CHECK: next: Index: test/Isl/CodeGen/pr25241.ll =================================================================== --- test/Isl/CodeGen/pr25241.ll +++ test/Isl/CodeGen/pr25241.ll @@ -4,14 +4,16 @@ ; Ensure that synthesized values of a PHI node argument are generated in the ; incoming block, not in the PHI's block. -; CHECK-LABEL: polly.stmt.if.then.862: -; CHECK: %[[R1:[0-9]+]] = add i32 %tmp, 1 -; CHECK: store i32 %0, i32* %curr.3.s2a -; CHECK: br label - -; CHECK-LABEL: polly.stmt.polly.merge_new_and_old.exit: -; CHECK: %curr.3.ph.final_reload = load i32, i32* %curr.3.s2a -; CHECK: br label +; CHECK: polly.stmt.if.then.862: ; preds = %polly.stmt.if.then.813 +; CHECK: %[[r:[0-9a-zA-Z_.]*]] = add nsw i32 %tmp, 1 +; CHECK: br label %polly.stmt.while.body.740.region_exiting +; +; CHECK: polly.stmt.if.else.864: ; preds = %polly.stmt.if.then.813 +; CHECK: br label %polly.stmt.while.body.740.region_exiting +; +; CHECK: polly.stmt.while.body.740.region_exiting: ; preds = %polly.stmt.if.else.864, %polly.stmt.if.then.862 +; CHECK: %polly.curr.3.ph = phi i32 [ undef, %polly.stmt.if.else.864 ], [ %[[r]], %polly.stmt.if.then.862 ] +; CHECK: br label %polly.stmt.polly.merge_new_and_old.exit target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/read-only-scalars.ll =================================================================== --- test/Isl/CodeGen/read-only-scalars.ll +++ test/Isl/CodeGen/read-only-scalars.ll @@ -6,17 +6,7 @@ ; RUN: -S < %s | FileCheck %s -check-prefix=SCALAR ; CHECK-NOT: alloca - -; SCALAR-LABEL: entry: -; SCALAR-NEXT: %scalar.s2a = alloca float - -; SCALAR-LABEL: polly.start: -; SCALAR-NEXT: store float %scalar, float* %scalar.s2a - -; SCALAR-LABEL: polly.stmt.stmt1: -; SCALAR-NEXT: %scalar.s2a.reload = load float, float* %scalar.s2a -; SCALAR-NEXT: %val_p_scalar_ = load float, float* %A, -; SCALAR-NEXT: %p_sum = fadd float %val_p_scalar_, %scalar.s2a.reload +; SCALAR-NOT: alloca define void @foo(float* noalias %A, float %scalar) { entry: Index: test/Isl/CodeGen/scalar-dependence-reverse-text-order-two-uses.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/scalar-dependence-reverse-text-order-two-uses.ll @@ -0,0 +1,59 @@ +;RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -analyze < %s | FileCheck %s --check-prefix=AST +;RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-codegen -S < %s | FileCheck %s +; +; AST: for (int c0 = 0; c0 <= 101; c0 += 1) { +; AST: if (c0 >= 1) +; AST: Stmt_loopB(c0 - 1); +; AST: if (c0 <= 100) { +; AST: Stmt_loopA(c0); +; AST: Stmt_loopC(c0); +; AST: } +; AST: } +; +; CHECK: polly.loop_header: +; CHECK-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.merge[[M:[0-9]*]] ] +; CHECK-NEXT: %val.polly.lc = phi float [ undef, %polly.loop_preheader ], [ %val_p_scalar_.merge[[N:[0-9]*]], %polly.merge[[M]] ] +; +; CHECK: polly.merge[[M]]: +; CHECK-DAG: %val_p_scalar_.merge = phi float [ %val_p_scalar_, %polly.stmt.loopC ], [ undef, %polly.else[[P:[0-9]*]] ] +; CHECK-DAG: %val_p_scalar_.merge[[N]] = phi float [ %val_p_scalar_, %polly.stmt.loopC ], [ undef, %polly.else[[P]] ] +; +; CHECK: polly.stmt.loopB: +; CHECK-NEXT: %scevgep = getelementptr float, float* %B, i64 +; CHECK-NEXT: store float %val.polly.lc, float* %scevgep +; +; CHECK: polly.stmt.loopA: +; CHECK-NEXT: %scevgep[[K:[0-9]*]] = getelementptr float, float* %A, i64 %polly.indvar +; CHECK-NEXT: %val_p_scalar_ = load float, float* %scevgep[[K]] +; CHECK-NEXT: br label %polly.stmt.loopC +; +; CHECK: polly.stmt.loopC: +; CHECK-NEXT: %scevgep[[L:[0-9]*]] = getelementptr float, float* %C, i64 %polly.indvar +; CHECK-NEXT: store float %val_p_scalar_, float* %scevgep[[L]] +; CHECK-NEXT: br label %polly.merge[[M]] +; +define void @sdrtotu(float* %A, float* %B, float* %C) { +entry: + br label %loopA + +loopA: + %indvar = phi i64 [0, %entry], [%indvar.next, %loopB] + %ptrA = getelementptr float, float* %A, i64 %indvar + %val = load float, float* %ptrA + br label %loopC + +loopC: + %ptrC = getelementptr float, float* %C, i64 %indvar + store float %val, float* %ptrC + br label %loopB + +loopB: + %indvar.next = add i64 %indvar, 1 + %ptrB = getelementptr float, float* %B, i64 %indvar + store float %val, float* %ptrB + %cmp = icmp sge i64 %indvar, 100 + br i1 %cmp, label %exit, label %loopA + +exit: + ret void +} Index: test/Isl/CodeGen/scalar-dependence-reverse-text-order.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/scalar-dependence-reverse-text-order.ll @@ -0,0 +1,47 @@ +;RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -analyze < %s | FileCheck %s --check-prefix=AST +;RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-codegen -S < %s | FileCheck %s +; +; AST: for (int c0 = 0; c0 <= 101; c0 += 1) { +; AST: if (c0 >= 1) +; AST: Stmt_loopB(c0 - 1); +; AST: if (c0 <= 100) +; AST: Stmt_loopA(c0); +; AST: } +; +; CHECK: polly.loop_header: +; CHECK-NEXT: %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.merge4 ] +; CHECK-NEXT: %val.polly.lc = phi float [ undef, %polly.loop_preheader ], [ %val_p_scalar_.merge[[N:[0-9]*]], %polly.merge4 ] +; +; CHECK: polly.merge4: +; CHECK-DAG: %val_p_scalar_.merge = phi float [ %val_p_scalar_, %polly.stmt.loopA ], [ undef, %polly.else6 ] +; CHECK-DAG: %val_p_scalar_.merge[[N]] = phi float [ %val_p_scalar_, %polly.stmt.loopA ], [ undef, %polly.else6 ] +; +; CHECK: polly.stmt.loopB: +; CHECK-NEXT: %scevgep = getelementptr float, float* %B, i64 +; CHECK-NEXT: store float %val.polly.lc, float* %scevgep +; +; CHECK: polly.stmt.loopA: +; CHECK-NEXT: %scevgep7 = getelementptr float, float* %A, i64 %polly.indvar +; CHECK-NEXT: %val_p_scalar_ = load float, float* %scevgep7 +; CHECK-NEXT: br label %polly.merge4 +; +define void @sdrto(float* %A, float* %B) { +entry: + br label %loopA + +loopA: + %indvar = phi i64 [0, %entry], [%indvar.next, %loopB] + %ptrA = getelementptr float, float* %A, i64 %indvar + %val = load float, float* %ptrA + br label %loopB + +loopB: + %indvar.next = add i64 %indvar, 1 + %ptrB = getelementptr float, float* %B, i64 %indvar + store float %val, float* %ptrB + %cmp = icmp sge i64 %indvar, 100 + br i1 %cmp, label %exit, label %loopA + +exit: + ret void +} Index: test/Isl/CodeGen/scalar-store-from-same-bb.ll =================================================================== --- test/Isl/CodeGen/scalar-store-from-same-bb.ll +++ test/Isl/CodeGen/scalar-store-from-same-bb.ll @@ -1,11 +1,17 @@ -; RUN: opt %loadPolly \ -; RUN: -polly-codegen -S < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s -; This test ensures that the expression N + 1 that is stored in the phi-node -; alloca, is directly computed and not incorrectly transfered through memory. +; CHECK: polly.merge_new_and_old: +; CHECK: %res.merge = phi i64 [ %p_sum.merge, %polly.exiting ], [ %res, %merge ] +; CHECK: br label %exit +; +; CHECK: exit: +; CHECK: ret i64 %res.merge +; +; CHECK: polly.merge: +; CHECK: %p_sum.merge = phi i64 [ %p_sum, %polly.loop_exit ], [ 0, %polly.else ] -; CHECK: store i64 %2, i64* %res.phiops -; CHECK: %2 = add i64 %N, 1 +; CHECK: polly.stmt.loop: +; CHECK: %p_sum = add i64 %N, 1 define i64 @foo(float* %A, i64 %N) { entry: Index: test/Isl/CodeGen/sdrto___%loopA---%exit.jscop =================================================================== --- /dev/null +++ test/Isl/CodeGen/sdrto___%loopA---%exit.jscop @@ -0,0 +1,36 @@ +{ + "context" : "{ : }", + "name" : "loopA => exit", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_loopA[i0] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_loopA[i0] -> MemRef_val[] }" + } + ], + "domain" : "{ Stmt_loopA[i0] : i0 <= 100 and i0 >= 0 }", + "name" : "Stmt_loopA", + "schedule" : "{ Stmt_loopA[i0] -> [i0, 1] }" + }, + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_loopB[i0] -> MemRef_val[] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_loopB[i0] -> MemRef_B[i0] }" + } + ], + "domain" : "{ Stmt_loopB[i0] : i0 <= 100 and i0 >= 0 }", + "name" : "Stmt_loopB", + "schedule" : "{ Stmt_loopB[i0] -> [i0+1, 0] }" + } + ] +} Index: test/Isl/CodeGen/sdrtotu___%loopA---%exit.jscop =================================================================== --- /dev/null +++ test/Isl/CodeGen/sdrtotu___%loopA---%exit.jscop @@ -0,0 +1,51 @@ +{ + "context" : "{ : }", + "name" : "loopA => exit", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_loopA[i0] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_loopA[i0] -> MemRef_val[] }" + } + ], + "domain" : "{ Stmt_loopA[i0] : i0 <= 100 and i0 >= 0 }", + "name" : "Stmt_loopA", + "schedule" : "{ Stmt_loopA[i0] -> [i0, 1] }" + }, + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_loopC[i0] -> MemRef_val[] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_loopC[i0] -> MemRef_C[i0] }" + } + ], + "domain" : "{ Stmt_loopC[i0] : i0 <= 100 and i0 >= 0 }", + "name" : "Stmt_loopC", + "schedule" : "{ Stmt_loopC[i0] -> [i0, 2] }" + }, + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_loopB[i0] -> MemRef_val[] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_loopB[i0] -> MemRef_B[i0] }" + } + ], + "domain" : "{ Stmt_loopB[i0] : i0 <= 100 and i0 >= 0 }", + "name" : "Stmt_loopB", + "schedule" : "{ Stmt_loopB[i0] -> [i0+1, 0] }" + } + ] +} Index: test/Isl/CodeGen/simple_vec_call.ll =================================================================== --- test/Isl/CodeGen/simple_vec_call.ll +++ test/Isl/CodeGen/simple_vec_call.ll @@ -28,9 +28,9 @@ ; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] ; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] ; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] -; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0 -; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1 -; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2 -; CHECK: %7 = insertelement <4 x float> %6, float [[RES4]], i32 3 -; CHECK: store <4 x float> %7 +; CHECK: %0 = insertelement <4 x float> undef, float [[RES1]], i32 0 +; CHECK: %1 = insertelement <4 x float> %0, float [[RES2]], i32 1 +; CHECK: %2 = insertelement <4 x float> %1, float [[RES3]], i32 2 +; CHECK: %3 = insertelement <4 x float> %2, float [[RES4]], i32 3 +; CHECK: store <4 x float> %3 ; CHECK: attributes [[NUW]] = { nounwind } Index: test/Isl/CodeGen/simple_vec_stride_one.ll =================================================================== --- test/Isl/CodeGen/simple_vec_stride_one.ll +++ test/Isl/CodeGen/simple_vec_stride_one.ll @@ -1,7 +1,12 @@ ; RUN: opt %loadPolly -polly-codegen -polly-vectorizer=polly \ ; RUN: < %s -S | FileCheck %s -; CHECK: store <4 x double> %val.s2a_p_splat, <4 x double>* %vector_ptr +; CHECK: polly.stmt.loop3: +; CHECK: store double %val_p_scalar_, double* %scevgep8, align 8, !alias.scope !0, !noalias !2 +; CHECK: store double %val_p_scalar_, double* %scevgep9, align 8, !alias.scope !0, !noalias !2 +; CHECK: store double %val_p_scalar_, double* %scevgep10, align 8, !alias.scope !0, !noalias !2 +; CHECK: store double %val_p_scalar_, double* %scevgep11, align 8, !alias.scope !0, !noalias !2 + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define void @update_access_functions(i64 %arg, double* %A, double* %B) { Index: test/Isl/CodeGen/srem-in-other-bb.ll =================================================================== --- test/Isl/CodeGen/srem-in-other-bb.ll +++ test/Isl/CodeGen/srem-in-other-bb.ll @@ -8,11 +8,9 @@ ; ; CHECK: polly.stmt.bb2: ; CHECK-NEXT: %p_tmp = srem i64 %n, 42 -; CHECK-NEXT: store i64 %p_tmp, i64* %tmp.s2a ; ; CHECK: polly.stmt.bb3: -; CHECK: %tmp.s2a.reload = load i64, i64* %tmp.s2a -; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %tmp.s2a.reload +; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %p_tmp define void @pos(float* %A, i64 %n) { bb: Index: test/Isl/CodeGen/uninitialized_scalar_memory.ll =================================================================== --- test/Isl/CodeGen/uninitialized_scalar_memory.ll +++ test/Isl/CodeGen/uninitialized_scalar_memory.ll @@ -1,12 +1,18 @@ ; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s ; -; Verify we initialize the scalar locations reserved for the incoming phi -; values. +; CHECK-NOT: alloca +; +; CHECK: polly.merge_new_and_old: +; CHECK-DAG: %ebig.1.merge = phi float [ %tmp4_p_scalar_.merge, %polly.exiting ], [ %ebig.1, %for.inc ] +; CHECK-DAG: %indvars.iv.next.merge = phi i64 [ %p_indvars.iv.next, %polly.exiting ], [ %indvars.iv.next, %for.inc ] +; CHECK-DAG: %iebig.1.merge = phi i32 [ %p_conv8.merge, %polly.exiting ], [ %iebig.1, %for.inc ] +; CHECK: br label %for.cond +; +; CHECK: polly.stmt.if.end.9.exit: +; CHECK-DAG: %tmp4_p_scalar_.merge = phi float [ %tmp4_p_scalar_, %polly.stmt.if.then.2 ], [ %ebig.0, %polly.stmt.if.end ] +; CHECK-DAG: %p_conv8.merge = phi i32 [ %p_conv8, %polly.stmt.if.then.2 ], [ %iebig.0, %polly.stmt.if.end ] +; CHECK: br label %polly.stmt.if.end.9 ; -; CHECK: polly.start: -; CHECK-NEXT: store float %ebig.0, float* %ebig.0.s2a -; CHECK-NEXT: store i32 %iebig.0, i32* %iebig.0.s2a -; CHECK-NEXT: br label %polly.stmt.if.end.entry ; ; int g(void); ; float M; Index: test/ScopInfo/invariant_load_access_classes_different_base_type.ll =================================================================== --- test/ScopInfo/invariant_load_access_classes_different_base_type.ll +++ test/ScopInfo/invariant_load_access_classes_different_base_type.ll @@ -20,13 +20,10 @@ ; CHECK: Execution Context: { : } ; CHECK: } ; -; CODEGEN: %S.b.preload.s2a = alloca float -; CODEGEN: %S.a.preload.s2a = alloca i32 +; CODEGEN-NOT: alloca ; ; CODEGEN: %.load = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0) -; CODEGEN: store i32 %.load, i32* %S.a.preload.s2a ; CODEGEN: %.load12 = load float, float* bitcast (i32* getelementptr (i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0), i64 1) to float*) -; CODEGEN: store float %.load12, float* %S.b.preload.s2a ; ; CODEGEN: polly.stmt.for.body: ; CODEGEN: %p_conv = sitofp i32 %.load to float Index: test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll =================================================================== --- test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll +++ test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll @@ -39,29 +39,22 @@ ; CHECK-NOT: Access ; CHECK: } ; -; CODEGEN: entry: -; CODEGEN: %S.b.preload.s2a = alloca float -; CODEGEN: %S.a.preload.s2a = alloca i32 ; ; CODEGEN: polly.preload.begin: ; CODEGEN: %.load = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0) -; CODEGEN: store i32 %.load, i32* %S.a.preload.s2a ; CODEGEN: %.load12 = load float, float* bitcast (i32* getelementptr (i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @S, i32 0, i32 0), i64 1) to float*) -; CODEGEN: store float %.load12, float* %S.b.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: -; CODEGEN-DAG: %S.b.merge = phi float [ %S.b.final_reload, %polly.exiting ], [ %S.b, %do.cond ] -; CODEGEN-DAG: %S.a.merge = phi i32 [ %S.a.final_reload, %polly.exiting ], [ %S.a, %do.cond ] +; CODEGEN-DAG: %S.b.merge = phi float [ %.load12, %polly.exiting ], [ %S.b, %do.cond ] +; CODEGEN-DAG: %S.a.merge = phi i32 [ %.load, %polly.exiting ], [ %S.a, %do.cond ] ; ; CODEGEN: do.end: ; CODEGEN: %conv3 = sitofp i32 %S.a.merge to float ; CODEGEN: %add4 = fadd float %conv3, %S.b.merge ; CODEGEN: ret float %add4 ; -; CODEGEN: polly.loop_exit: -; CODEGEN-DAG: %S.b.final_reload = load float, float* %S.b.preload.s2a -; CODEGEN-DAG: %S.a.final_reload = load i32, i32* %S.a.preload.s2a - +; CODEGEN: polly.loop_exit: +; CODEGEN-NEXT: br label ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll =================================================================== --- test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll +++ test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll @@ -25,16 +25,15 @@ ; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; ; CHECK: } ; -; CODEGEN: entry: -; CODEGEN: %U.f.preload.s2a = alloca float -; CODEGEN: br label %polly.split_new_and_old +; CODEGEN: entry: +; CODEGEN-NOT: alloca +; CODEGEN: br label %polly.split_new_and_old ; ; CODEGEN: polly.preload.begin: ; CODEGEN: %U.load1 = load float, float* bitcast (i32* @U to float*) ; TODO FIXME There should not be a bitcast but either a real conversion or ; another load as one type is FP the other is not. ; CODEGEN: %0 = bitcast float %U.load1 to i32 -; CODEGEN: store float %U.load1, float* %U.f.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: ; CODEGEN-NOT: merge = phi Index: test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll =================================================================== --- test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll +++ test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll @@ -31,22 +31,18 @@ ; CHECK: } ; ; CODEGEN: entry: -; CODEGEN: %U.f.preload.s2a = alloca float ; CODEGEN: br label %polly.split_new_and_old ; ; CODEGEN: polly.preload.begin: ; CODEGEN: %U.load1 = load float, float* bitcast (i32* @U to float*) ; CODEGEN: %0 = bitcast float %U.load1 to i32 -; CODEGEN: store float %U.load1, float* %U.f.preload.s2a ; ; CODEGEN: polly.merge_new_and_old: -; CODEGEN-DAG: %U.f.merge = phi float [ %U.f.final_reload, %polly.exiting ], [ %U.f, %do.cond ] -; CODEGEN-DAG: %U.i.merge = phi i32 [ %5, %polly.exiting ], [ %U.i, %do.cond ] +; CODEGEN-DAG: %U.f.merge = phi float [ %U.load1, %polly.exiting ], [ %U.f, %do.cond ] +; CODEGEN-DAG: %U.i.merge = phi i32 [ %0, %polly.exiting ], [ %U.i, %do.cond ] ; -; CODEGEN: polly.loop_exit: -; CODEGEN-DAG: %U.f.final_reload = load float, float* %U.f.preload.s2a -; CODEGEN-DAG: %U.i.final_reload = load float, float* %U.f.preload.s2a -; CODEGEN-DAG: %5 = bitcast float %U.i.final_reload to i32 +; CODEGEN: polly.loop_exit: +; CODEGEN-NEXT: br label ; ; CODEGEN: polly.stmt.do.body: ; CODEGEN: %p_conv = fptosi float %U.load1 to i32 Index: test/ScopInfo/invariant_load_zext_parameter.ll =================================================================== --- test/ScopInfo/invariant_load_zext_parameter.ll +++ test/ScopInfo/invariant_load_zext_parameter.ll @@ -17,7 +17,6 @@ ; CODEGEN: polly.preload.begin: ; CODEGEN-NEXT: %polly.access.I0 = getelementptr i32, i32* %I0, i64 0 ; CODEGEN-NEXT: %polly.access.I0.load = load i32, i32* %polly.access.I0 -; CODEGEN-NEXT: store i32 %polly.access.I0.load, i32* %loadI0.preload.s2a ; CODEGEN-NEXT: %0 = zext i32 %polly.access.I0.load to i64 ; CODEGEN-NEXT: %1 = icmp eq i64 %0, 0 ; CODEGEN-NEXT: br label %polly.preload.cond Index: test/ScopInfo/out-of-scop-use-in-region-entry-phi-node-nonaffine-subregion.ll =================================================================== --- test/ScopInfo/out-of-scop-use-in-region-entry-phi-node-nonaffine-subregion.ll +++ test/ScopInfo/out-of-scop-use-in-region-entry-phi-node-nonaffine-subregion.ll @@ -7,21 +7,19 @@ ; CHECK: %loop_carried.ph = phi float [ %newval.merge, %backedge ], [ undef, %entry ] ; ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %newval.merge = phi float [ %newval.final_reload, %polly.exiting ], [ %newval, %subregion_exit.region_exiting ] +; CHECK-DAG: %newval.merge = phi float [ %p_newval, %polly.exiting ], [ %newval, %subregion_exit.region_exiting ] +; CHECK-DAG: %indvar.merge = phi i32 [ %[[r:[0-9a-zA-Z._]*]], %polly.exiting ], [ %indvar, %subregion_exit.region_exiting ] +; CHECK-NEXT: br label %subregion_exit ; ; CHECK-LABEL: polly.start: -; CHECK: store float %loop_carried.ph, float* %loop_carried.phiops -; -; CHECK-LABEL: polly.stmt.subregion_entry.entry: -; CHECK: %loop_carried.phiops.reload = load float, float* %loop_carried.phiops +; CHECK-NEXT: br label %polly.stmt.subregion_entry.entry ; ; CHECK-LABEL: polly.stmt.subregion_entry: -; CHECK: %polly.loop_carried = phi float [ %loop_carried.phiops.reload2, %polly.stmt.subregion_entry.entry ] -; CHECK: %p_newval = fadd float %polly.loop_carried, 1.000000e+00 +; CHECK-NEXT: %p_newval = fadd float %loop_carried.ph, 1.000000e+00 +; CHECK-NEXT: %p_cmp +; CHECK-NEXT: %[[p:[0-9a-zA-Z._]*]] = trunc i64 %[[q:[0-9a-zA-Z._]*]] to i32 +; CHECK-NEXT: %[[r]] = add i32 %[[p]], 1 ; -; CHECK-LABEL: polly.stmt.polly.merge_new_and_old.exit: -; CHECK: %newval.final_reload = load float, float* %newval.s2a - define void @func() { entry: br label %subregion_entry