Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -419,6 +419,17 @@ /// the original value in the non-optimized SCoP. void createScalarFinalization(Region &R); + /// @brief Recompute scalars needed in this statement. + /// + /// During SCoP creation scalars can be virtually moved to simplify the SCoP + /// description as well as the dependences. However, they are only moved if + /// we can recompute them in the statements they are used in. This method will + /// perform the recomputation before we clone the original statement into the + /// new, optimized region. Thus it ensures that all scalars are available. + void recomputeDependentScalars(ScopStmt &Stmt, ValueMapT &BBMap, + LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses); + /// @brief Try to synthesize a new value /// /// Given an old value, we try to synthesize it in a new context from its @@ -441,7 +452,7 @@ /// @returns o A newly synthesized value. /// o NULL, if synthesizing the value failed. Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, Loop *L) const; + LoopToScevMapT <S, Loop *L); /// @brief Get the new version of a value. /// @@ -463,15 +474,18 @@ /// @param L The loop that surrounded the instruction that referenced /// this value in the original code. This loop is used to /// evaluate the scalar evolution at the right scope. + /// @param TryOnly Flag to indicate that nullptr is a valid return value + /// if no new value was found. /// /// @returns o The old value, if it is still valid. /// o The new value, if available. - /// o NULL, if no value is found. + /// o NULL, if no value is found and TryOnly is set. + /// o Otherwise a trap is triggered. Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, Loop *L) const; + LoopToScevMapT <S, Loop *L, bool TryOnly = false); void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, - LoopToScevMapT <S); + LoopToScevMapT <S, bool Recompute = false); /// @brief Get the innermost loop that surrounds the statement @p Stmt. Loop *getLoopForStmt(const ScopStmt &Stmt) const; @@ -528,8 +542,13 @@ /// @param NewAccesses A map from memory access ids to new ast expressions, /// which may contain new access expressions for certain /// memory accesses. + /// @param Recompute Flag to indicate that the instruction is a scalar that + /// needs to be recomputed in this statement. It basically + /// forces us to copy not only the instruction but also all + /// operands if we cannot find a local or global mapping. void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, - LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); + LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses, + bool Recompute = false); /// @brief Helper to determine if @p Inst can be synthezised in @p Stmt. /// Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -328,6 +328,9 @@ /// normal scalar array modeling. bool isPHIKind() const { return Kind == MK_PHI; }; + /// @rbeif Return the memory kind of this array. + MemoryKind getKind() const { return Kind; }; + /// @brief Dump a readable representation to stderr. void dump() const; @@ -636,6 +639,12 @@ /// @param SAI Info object for the accessed array. void buildAccessRelation(const ScopArrayInfo *SAI); + /// @brief Copy this memory access into the given statement @p Stmt. + /// + /// @param AccList The list that contains all accesses for @p Stmt. + /// @param NewStmt The statement the copied access should reside in. + MemoryAccess *copyTo(AccFuncSetType &AccList, ScopStmt *NewStmt) const; + public: /// @brief Create a new MemoryAccess. /// @@ -678,7 +687,7 @@ } /// @brief Get the type of a memory access. - enum AccessType getType() { return AccType; } + enum AccessType getType() const { return AccType; } /// @brief Is this a reduction like access? bool isReductionLike() const { return RedType != RT_NONE; } @@ -968,6 +977,9 @@ std::string BaseName; + /// @brief Set of scalar values that need to be recomputed in this statement. + SetVector DependentScalars; + /// Build the statement. //@{ void buildDomain(); @@ -1138,13 +1150,20 @@ } /// @brief Add @p Access to this statement's list of accesses. - void addAccess(MemoryAccess *Access); + /// + /// @param Access The access to add. + /// @param Front Flag to indicate where the access should be added. + void addAccess(MemoryAccess *Access, bool Front = false); /// @brief Remove the memory access in @p InvMAs. /// - /// Note that scalar accesses that are caused by any access in @p InvMAs will - /// be eliminated too. - void removeMemoryAccesses(MemoryAccessList &InvMAs); + /// @param OnlyMA Flag to indicate if all accesses caused by the access + /// instruction of @p MA should be removed or only @MA. + /// + /// Note that if @p OnlyMA is not set, scalar accesses that are caused by any + /// access in @p InvMAs will be eliminated too. + void removeMemoryAccesses(const MemoryAccessList &InvMAs, + bool OnlyMA = false); typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; @@ -1168,6 +1187,14 @@ /// @brief Get the isl AST build. __isl_keep isl_ast_build *getAstBuild() const { return Build; } + /// @brief Add a scalar that needs to be recomputed in this statement. + void addDependentScalar(Instruction *Inst) { DependentScalars.insert(Inst); } + + /// @brief Return the scalars that need to be recomputed in this statement. + const SetVector &getDependentScalars() const { + return DependentScalars; + } + /// @brief Restrict the domain of the statement. /// /// @param NewDomain The new statement domain. @@ -1386,7 +1413,7 @@ /// @brief Initialize this ScopInfo . void init(AliasAnalysis &AA, AssumptionCache &AC, ScopDetection &SD, - DominatorTree &DT, LoopInfo &LI); + DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE); /// @brief Add loop carried constraints to the header block of the loop @p L. /// @@ -1519,6 +1546,77 @@ /// @brief Add invariant loads listed in @p InvMAs with the domain of @p Stmt. void addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs); + /// @brief Check if we can recompute all instructions in @p Stmt. + /// + /// @param Stmt The statement we want to recompute @p Insts in. + /// @param Insts The instructions we need to recompute. + /// + /// @returns True, if all instructions can be recomputed in @p Stmt. + bool canRecomputeInStmt(ScopStmt &Stmt, SmallPtrSet &Insts); + + /// @brief Type for outside operands and their in SCoP users. + using OutsideOperandsSetTy = + SmallVector, 4>; + + /// @brief Type for a set of instructions. + using InstructionSetTy = SmallPtrSet; + + /// @brief Type for non-trivially copy/recompute-able operands. + /// + /// The first element of the pair contains instructions that are actually not + /// trivially copy/recompute-able, e.g., PHI nodes and loads. The second part + /// contains all in SCoP operands that use outside instructions. They are + /// non-trivial as we need to model the scalar read accesses of the outside + /// operands properly. + using NonTrivialOperandsPairTy = + std::pair; + + /// @brief Map from in SCoP instructions to their non-trivial operands. + DenseMap NonTrivialOperandsMap; + + /// @brief Collect non-trivial operands for all scalar write accesses. + /// + /// @param LI The LoopInfo for the current function. + /// @param SE The ScalarEvolution for the current function. + /// + void collectNonTrivialOperands(LoopInfo &LI, ScalarEvolution &SE); + + /// @brief Remove scalar reads that can be recomputed. + /// + /// This function will check if a scalar read can be recomputed instead of + /// reloaded/communicated. If so it will remove that access and add the + /// definition to the ScopStmt::DependentScalars of that statement instead. + /// During code generation these scalars will be recomputed before the + /// instructions in the statement are copied, thus can be used in the + /// statement without the need communicate them through memory. Additionally, + /// this function might introduce new read accesses for operands that are + /// defined outside the SCoP in order to allow e.g., OpenMP code generation + /// easy access to the values needed in a statement. + void removeRecomputableScalarReads(ScopStmt &Stmt); + + /// @brief Remove scalar writes that are not read in the SCoP or at its exit. + /// + /// When Scop::recomputeScalarReads() removes read accesses the corresponding + /// write accesses might not be needed anymore. To this end, this function + /// removes scalar writes without scalar reads inside the SCoP or an escaping + /// use that would cause a reload and merge at the SCoP exit. + void removeUnreadScalarWrites(ScopStmt &Stmt); + + /// @brief Simplify the scalar accesses in this SCoP. + /// + /// Scalar accesses are often not needed and only caused by the placement in + /// the code. Additionally it is sometimes possible to recompute scalars to + /// avoid communication. As scalars basically sequentialize all loops they are + /// in, we try to avoid scalar accesses as much as possible. To this end we + /// will virtually move and later recompute them in the code generation. This + /// allows more freedom for the scheduler while we do not need to change the + /// original code region at all. + /// + /// @param LI The LoopInfo for the current function. + /// @param SE The ScalarEvolution for the current function. + /// + void simplifyScalarAccesses(LoopInfo &LI, ScalarEvolution &SE); + /// @brief Build the Context of the Scop. void buildContext(); @@ -1876,6 +1974,9 @@ const_reverse_iterator rend() const { return Stmts.rend(); } //@} + /// @brief Remove the ScopArrayInfo object @p SAI. + void removeScopArrayInfo(const ScopArrayInfo *SAI); + /// @brief Return the (possibly new) ScopArrayInfo object for @p Access. /// /// @param ElementType The type of the elements stored in this array. Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -783,6 +783,27 @@ Id = isl_id_alloc(Stmt->getParent()->getIslCtx(), IdName.c_str(), this); } +MemoryAccess *MemoryAccess::copyTo(AccFuncSetType &AccList, + ScopStmt *NewStmt) const { + AccList.emplace_back(NewStmt, getAccessInstruction(), getType(), + getBaseAddr(), getElemSizeInBytes(), isAffine(), + Subscripts, Sizes, getAccessValue(), Kind, + getBaseName()); + MemoryAccess *CopyMA = &AccList.back(); + + unsigned CurStmtDims = getStatement()->getNumIterators(); + unsigned NewStmtDims = NewStmt->getNumIterators(); + assert(NewStmtDims >= CurStmtDims); + + isl_map *AR = getAccessRelation(); + if (NewStmtDims > CurStmtDims) + AR = isl_map_add_dims(AR, isl_dim_in, NewStmtDims - CurStmtDims); + + CopyMA->AccessRelation = + isl_map_set_tuple_id(AR, isl_dim_in, NewStmt->getDomainId()); + return CopyMA; +} + void MemoryAccess::realignParams() { isl_space *ParamSpace = Statement->getParent()->getParamSpace(); AccessRelation = isl_map_align_params(AccessRelation, ParamSpace); @@ -966,7 +987,7 @@ } } -void ScopStmt::addAccess(MemoryAccess *Access) { +void ScopStmt::addAccess(MemoryAccess *Access, bool Front) { Instruction *AccessInst = Access->getAccessInstruction(); if (Access->isArrayKind()) { @@ -990,7 +1011,10 @@ PHIWrites[PHI] = Access; } - MemAccs.push_back(Access); + if (Front) + MemAccs.insert(MemAccs.begin(), Access); + else + MemAccs.push_back(Access); } void ScopStmt::realignParams() { @@ -1547,7 +1571,8 @@ void ScopStmt::dump() const { print(dbgs()); } -void ScopStmt::removeMemoryAccesses(MemoryAccessList &InvMAs) { +void ScopStmt::removeMemoryAccesses(const MemoryAccessList &InvMAs, + bool OnlyMA) { // Remove all memory accesses in @p InvMAs from this statement // together with all scalar accesses that were caused by them. // MK_Value READs have no access instruction, hence would not be removed by @@ -1556,11 +1581,19 @@ // are no MK_Value READ accesses to be removed. for (MemoryAccess *MA : InvMAs) { auto Predicate = [&](MemoryAccess *Acc) { - return Acc->getAccessInstruction() == MA->getAccessInstruction(); + return Acc == MA || + (!OnlyMA && + Acc->getAccessInstruction() == MA->getAccessInstruction()); }; MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate), MemAccs.end()); InstructionToAccess.erase(MA->getAccessInstruction()); + if (MA->isValueKind() && MA->isWrite()) + ValueWrites.erase(MA->getAccessInstruction()); + else if (MA->isValueKind() && MA->isRead()) + ValueReads.erase(MA->getAccessValue()); + else if (MA->isAnyPHIKind() && MA->isWrite()) + PHIWrites.erase(cast(MA->getBaseAddr())); } } @@ -2746,7 +2779,7 @@ } void Scop::init(AliasAnalysis &AA, AssumptionCache &AC, ScopDetection &SD, - DominatorTree &DT, LoopInfo &LI) { + DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) { addUserAssumptions(AC, DT, LI); buildInvariantEquivalenceClasses(SD); @@ -2776,6 +2809,7 @@ buildAliasChecks(AA); hoistInvariantLoads(SD); + simplifyScalarAccesses(LI, SE); simplifySCoP(false, DT, LI); } @@ -3055,6 +3089,263 @@ verifyInvariantLoads(SD); } +bool Scop::canRecomputeInStmt(ScopStmt &Stmt, + SmallPtrSet &Insts) { + if (Insts.empty()) + return true; + + // TODO: Check if we can actually move the instructions. + return false; +} + +void Scop::collectNonTrivialOperands(LoopInfo &LI, ScalarEvolution &SE) { + // See: Scop::simplifyScalarAccesses() + + SmallPtrSet Visited; + for (ScopStmt &Stmt : *this) { + for (MemoryAccess *MA : Stmt) { + if (MA->isArrayKind() || MA->isRead() || MA->isPHIKind()) + continue; + + Instruction *AccessInst = cast(MA->getAccessValue()); + if (NonTrivialOperandsMap.count(AccessInst)) + continue; + + DEBUG(dbgs() << "\nCheck operand tree of '" << *AccessInst << "'\n"); + + auto &NonTrivialOperands = NonTrivialOperandsMap[AccessInst]; + auto &SideEffectOperands = NonTrivialOperands.first; + auto &OutsideOperands = NonTrivialOperands.second; + + SmallPtrSet Worklist; + Worklist.insert(AccessInst); + Visited.clear(); + + while (!Worklist.empty()) { + Instruction *Inst = *Worklist.begin(); + Worklist.erase(Inst); + + if (!Visited.insert(Inst).second) + continue; + + for (auto &InstOp : Inst->operands()) + if (Instruction *InstOpInst = dyn_cast(InstOp)) { + if (R.contains(InstOpInst)) + Worklist.insert(InstOpInst); + else + OutsideOperands.push_back(std::make_pair(InstOpInst, Inst)); + } + + if (Inst->mayHaveSideEffects() || Inst->mayReadFromMemory()) + SideEffectOperands.insert(Inst); + + if (!isa(Inst)) + continue; + + if (R.contains(Inst) && canSynthesize(Inst, &LI, &SE, &R)) + continue; + + SideEffectOperands.insert(Inst); + } + + DEBUG({ + dbgs() << "\tSideEffectOperands: {\n"; + for (auto *Op : SideEffectOperands) + dbgs() << "\t\t" << *Op << "\n"; + dbgs() << "\t}\n"; + dbgs() << "\tOutsideOperands: {\n"; + for (const auto &OpPair : OutsideOperands) { + dbgs() << "\t\t" << *OpPair.first << "\n"; + dbgs() << "\t\tused by " << *OpPair.second << "\n"; + } + dbgs() << "\t}\n\n"; + }); + } + } +} + +void Scop::removeRecomputableScalarReads(ScopStmt &Stmt) { + // See: Scop::simplifyScalarAccesses() + + BasicBlock *StmtBB = + Stmt.isBlockStmt() ? Stmt.getBasicBlock() : Stmt.getRegion()->getEntry(); + AccFuncSetType &AccList = AccFuncMap[StmtBB]; + + SmallVector AdditionalAccesses; + MemoryAccessList ResolvedAccesses; + + for (MemoryAccess *MA : Stmt) { + if (!(MA->isValueKind() && MA->isRead())) + continue; + + auto *DefInst = dyn_cast(MA->getAccessValue()); + + // Skip read-only scalars. + if (!DefInst) + continue; + + // Skip read-only scalars and scalars defined in non-affine regions. + auto *DefStmt = getStmtForBasicBlock(DefInst->getParent()); + if (!DefStmt || DefStmt->isRegionStmt()) + continue; + + // Check if the scalar can be recomputed in this statement. + auto &NonTrivialOperands = NonTrivialOperandsMap[DefInst]; + auto &SideEffectOperands = NonTrivialOperands.first; + if (!canRecomputeInStmt(Stmt, SideEffectOperands)) + continue; + + // If the scalar can be recomputed, we copy all accesses of read-only + // scalars that are part of the operand tree of the recomputed value. + auto &OutsideOperands = NonTrivialOperands.second; + for (auto &OutsideOperandPair : OutsideOperands) { + Instruction *OutsideOperand = OutsideOperandPair.first; + Instruction *OutsideUser = OutsideOperandPair.second; + ScopStmt *UserStmt = getStmtForBasicBlock(OutsideUser->getParent()); + if (!UserStmt) + continue; + + // If an access to the read-only scalar is already present, continue. + if (Stmt.lookupValueReadOf(OutsideOperand)) + continue; + + // If an access to the read-only scalar was generated copy it. + auto *UseMA = UserStmt->lookupValueReadOf(OutsideOperand); + if (UseMA) + AdditionalAccesses.push_back(UseMA->copyTo(AccList, &Stmt)); + } + + Stmt.addDependentScalar(DefInst); + ResolvedAccesses.push_front(MA); + } + + Stmt.removeMemoryAccesses(ResolvedAccesses, true); + for (MemoryAccess *MA : AdditionalAccesses) + Stmt.addAccess(MA, true); +} + +void Scop::removeUnreadScalarWrites(ScopStmt &Stmt) { + // See: Scop::simplifyScalarAccesses() + + // Predicate that will evaluate to true if we have to assume that + // @p AccessValue will be read in @p BB and additionally escape @p BB. + auto ContainsRead = [&](BasicBlock *BB, Value *AccessValue) { + auto *UStmt = getStmtForBasicBlock(BB); + if (!UStmt) + return false; + + // TODO: This is too conservative. + if (UStmt->isRegionStmt()) + return true; + + // If there is no write in @p BB it will not escape @p BB, thus the + // read is a no-op. + bool ContainsWrite = false; + for (auto *UStmtMA : *UStmt) + ContainsWrite |= UStmtMA->isWrite(); + if (!ContainsWrite) + return false; + + return UStmt->lookupValueReadOf(AccessValue) != nullptr; + }; + + // Predicate that will evaluate to true if @p UserInst is a user of + // @p AccessValue in the current polyhedral description of the SCoP. + auto IsRemainingUser = [&](Instruction *UserInst, Value *AccessValue) { + auto *UserBB = UserInst->getParent(); + + // If the user is outside the SCoP the @p AccessValue is escaping and + // we implicitly model the escaping use. + if (!R.contains(UserBB)) + return true; + + auto *UserPHI = dyn_cast(UserInst); + if (!UserPHI) + return ContainsRead(UserBB, AccessValue); + + // + if (UserBB == R.getEntry() && !R.getEnteringBlock()) + return true; + + // TODO: This is too conservative. + if (auto *UserPHIStmt = getStmtForBasicBlock(UserBB)) + if (UserPHIStmt->isRegionStmt()) + return true; + + // If the user is a PHI node we have to check the incoming blocks of that + // PHI that are associated with the access instruction for possible uses. + for (unsigned u = 0, e = UserPHI->getNumIncomingValues(); u < e; u++) + if (UserPHI->getIncomingValue(u) == AccessValue) + if (ContainsRead(UserPHI->getIncomingBlock(u), AccessValue)) + return true; + + return false; + }; + + // Iterate over all accesses in @p Stmt and check if for all value writes + // if we still have a (possible) user. If not we will remove the access + // as well as the corresponding ScopArrayInfo object. + SmallVector ResolvedAccesses; + for (MemoryAccess *MA : Stmt) { + if (!(MA->isValueKind() && MA->isWrite())) + continue; + + bool AllUsersRemoved = true; + auto *AccessInst = cast(MA->getAccessValue()); + for (auto *User : AccessInst->users()) + AllUsersRemoved &= !IsRemainingUser(cast(User), AccessInst); + + if (AllUsersRemoved) + ResolvedAccesses.push_back(MA); + } + + for (MemoryAccess *MA : ResolvedAccesses) { + // Remove the ScopArrayInfo object for this scalar as we do neither + // define nor read it in the SCoP description but only copy the original + // version in the unoptimized region. + removeScopArrayInfo(MA->getScopArrayInfo()); + + Stmt.removeMemoryAccesses({MA}, true); + } +} + +void Scop::simplifyScalarAccesses(LoopInfo &LI, ScalarEvolution &SE) { + // First iterate over all implicit write accesses, hence scalar definitions + // and collect all operands that might have side effects or read memory as + // well as all operands that are outside the SCoP. The former is needed to + // decide if we can recompute the scalar definition to another statement. + // The latter to add read-only scalar accesses to the statement in which the + // scalar is recomputed. That allows us to identify values needed e.g., for + // parallel code generation. + collectNonTrivialOperands(LI, SE); + + // In the second step traverse all implicit read accesses, hence scalar uses + // in statements that do not define the scalar. However, at the moment we + // exclude PHIs to simplify the logic. For each use we will check if we can + // recompute the definition to this block (see canRecomputeInStmt()). + // If so we will: + // o Add the definition to the dependent scalars set of the use statement. + // o Add read accesses for all prior to the SCoP defined values if they + // were present in the definition statement. + // o Remove the use access from the use statement as it will be recomputed + // and does not need to be communicated anymore. + for (ScopStmt &Stmt : *this) + removeRecomputableScalarReads(Stmt); + + // In the third and final step we iterate over the scalar definitions in + // the SCoP again. We will check if we removed the accesses for all users + // of the scalar in the SCoP. If so, we can safely remove the scalar write + // access as all users will recompute the value. As we currently cannot simply + // use this logic to recompute values at the exit of the SCoP we will not + // remove scalars that escape the SCoP. + for (ScopStmt &Stmt : *this) + removeUnreadScalarWrites(Stmt); +} + +void Scop::removeScopArrayInfo(const ScopArrayInfo *SAI) { + ScopArrayInfoMap.erase(std::make_pair(SAI->getBasePtr(), SAI->getKind())); +} + const ScopArrayInfo * Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, ArrayRef Sizes, @@ -4153,7 +4444,7 @@ buildAccessFunctions(R, *R.getExit(), *SD->getInsnToMemAccMap(&R), nullptr, /* IsExitBlock */ true); - scop->init(*AA, AC, *SD, *DT, *LI); + scop->init(*AA, AC, *SD, *DT, *LI, *SE); } void ScopInfo::print(raw_ostream &OS, const Module *) const { Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -59,10 +59,18 @@ EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), EscapeMap(EscapeMap), GlobalMap(GlobalMap) {} +void BlockGenerator::recomputeDependentScalars( + ScopStmt &Stmt, ValueMapT &BBMap, LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses) { + + for (auto *Inst : Stmt.getDependentScalars()) + if (!GlobalMap.count(Inst)) + copyInstruction(Stmt, Inst, BBMap, LTS, NewAccesses, true); +} + Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, - Loop *L) const { + LoopToScevMapT <S, Loop *L) { if (!SE.isSCEVable(Old->getType())) return nullptr; @@ -74,6 +82,17 @@ return nullptr; const SCEV *NewScev = apply(Scev, LTS, SE); + + // Recompute scalars needed for this SCEV + const Region &R = Stmt.getParent()->getRegion(); + SetVector Values; + findValues(NewScev, Values); + for (Value *Val : Values) { + if (Instruction *Inst = dyn_cast(Val)) + if (R.contains(Inst)) + copyInstruction(Stmt, Inst, BBMap, LTS, nullptr, true); + } + ValueMapT VTV; VTV.insert(BBMap.begin(), BBMap.end()); VTV.insert(GlobalMap.begin(), GlobalMap.end()); @@ -93,7 +112,7 @@ } Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, Loop *L) const { + LoopToScevMapT <S, Loop *L, bool TryOnly) { // Constants that do not reference any named value can always remain // unchanged. Handle them early to avoid expensive map lookups. We do not take // the fast-path for external constants which are referenced through globals @@ -119,8 +138,9 @@ if (Value *New = BBMap.lookup(Old)) return New; - if (Value *New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L)) - return New; + if (canSynthesize(Old, &LI, &SE, &Stmt.getParent()->getRegion())) + if (Value *New = trySynthesizeNewValue(Stmt, Old, BBMap, LTS, L)) + return New; // A scop-constant value defined by a global or a function parameter. if (isa(Old) || isa(Old)) @@ -131,29 +151,44 @@ if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) return Old; + if (TryOnly) + return nullptr; + // The scalar dependence is neither available nor SCEVCodegenable. llvm_unreachable("Unexpected scalar dependence in region!"); - return nullptr; } void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst, - ValueMapT &BBMap, LoopToScevMapT <S) { + ValueMapT &BBMap, LoopToScevMapT <S, + bool Recompute) { // We do not generate debug intrinsics as we did not investigate how to // copy them correctly. At the current state, they just crash the code // generation as the meta-data operands are not correctly copied. if (isa(Inst)) return; + const Region &R = Stmt.getParent()->getRegion(); Instruction *NewInst = Inst->clone(); // Replace old operands with the new ones. for (Value *OldOperand : Inst->operands()) { - Value *NewOperand = - getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForStmt(Stmt)); + Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, LTS, + getLoopForStmt(Stmt), Recompute); + + if (Recompute) { + Instruction *NewOperandInst = dyn_cast_or_null(NewOperand); + if (!NewOperand || (NewOperandInst && R.contains(NewOperandInst))) { + if (Instruction *OldOperandInst = dyn_cast(OldOperand)) { + copyInstruction(Stmt, OldOperandInst, BBMap, LTS, nullptr, Recompute); + NewOperand = BBMap[OldOperand]; + } + } + } if (!NewOperand) { assert(!isa(NewInst) && "Store instructions are always needed!"); + assert(!Recompute && "Recompute copy should never fail"); delete NewInst; return; } @@ -244,7 +279,9 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, LoopToScevMapT <S, - isl_id_to_ast_expr *NewAccesses) { + isl_id_to_ast_expr *NewAccesses, + bool Recompute) { + // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. if (Inst->isTerminator()) @@ -277,7 +314,7 @@ if (isIgnoredIntrinsic(Inst)) return; - copyInstScalar(Stmt, Inst, BBMap, LTS); + copyInstScalar(Stmt, Inst, BBMap, LTS, Recompute); } void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, @@ -303,6 +340,9 @@ isl_id_to_ast_expr *NewAccesses) { BasicBlock *CopyBB = splitBB(BB); Builder.SetInsertPoint(&CopyBB->front()); + + recomputeDependentScalars(Stmt, BBMap, LTS, NewAccesses); + generateScalarLoads(Stmt, BBMap); copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses); @@ -1043,6 +1083,7 @@ Builder.SetInsertPoint(&EntryBBCopy->front()); ValueMapT &EntryBBMap = RegionMaps[EntryBBCopy]; + recomputeDependentScalars(Stmt, EntryBBMap, LTS, IdToAstExp); generateScalarLoads(Stmt, EntryBBMap); for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) Index: test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll =================================================================== --- test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll +++ test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll @@ -7,8 +7,8 @@ ; for (int i = 1; i < 1000; i++) ; A[i] += /* split bb */ A[0]; ; } -; A[0] tmp (unused) A -; CHECK: %polly.par.userContext = alloca { float, float*, float* } +; A[0] A +; CHECK: %polly.par.userContext = alloca { float, float* } ; ; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds ; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load Index: test/Isl/CodeGen/eliminate-multiple-scalar-fp-reads.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/eliminate-multiple-scalar-fp-reads.ll @@ -0,0 +1,90 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; +; SCOP-NOT: Scalar: 1 +; SCOP-NOT: ReadAccess +; +; Verify the original region is untouched but all computation is moved to the +; only place it is needed in the generated region. +; +; CHECK: for.body.f: +; CHECK-NEXT: %idxprom = sext i32 %i.0 to i64 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %A, i64 %idxprom +; CHECK-NEXT: store float %add5, float* %arrayidx +; +; CHECK: polly.stmt.for.body.f: +; CHECK: %0 = trunc i64 %polly.indvar to i32 +; CHECK: %1 = shl i32 %0, 1 +; CHECK: %p_conv = sitofp i32 %1 to float +; CHECK: %p_add = fadd float %p_conv, %p_conv +; CHECK: %p_add3 = fadd float %p_conv, %p_add +; CHECK: %p_add1 = fadd float %p_add, %p_conv +; CHECK: %p_add4 = fadd float %p_add3, %p_add1 +; CHECK: %p_add2 = fadd float %p_conv, %p_conv +; CHECK: %p_add5 = fadd float %p_add4, %p_add2 +; CHECK: %scevgep = getelementptr float, float* %A, i64 %polly.indvar +; CHECK: store float %p_add5, float* %scevgep +; +; void f(float *A) { +; for (int i = 0; i < 1000; i++) { +; float a = i * 2; +; /* split BB */ +; float b = a + a; +; /* split BB */ +; float c = b + a; +; /* split BB */ +; float d = a + a; +; /* split BB */ +; float e = a + b + c + d; +; /* split BB */ +; A[i] = e; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, 1000 + br i1 %cmp, label %for.body.a, label %for.end + +for.body.a: + %mul = mul nsw i32 %i.0, 2 + %conv = sitofp i32 %mul to float + br label %for.body.b + +for.body.b: + %add = fadd float %conv, %conv + br label %for.body.c + +for.body.c: + %add1 = fadd float %add, %conv + br label %for.body.d + +for.body.d: + %add2 = fadd float %conv, %conv + br label %for.body.e + +for.body.e: + %add3 = fadd float %conv, %add + %add4 = fadd float %add3, %add1 + %add5 = fadd float %add4, %add2 + br label %for.body.f + +for.body.f: + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds float, float* %A, i64 %idxprom + store float %add5, float* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/eliminate-multiple-scalar-reads.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/eliminate-multiple-scalar-reads.ll @@ -0,0 +1,82 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; +; SCOP-NOT: Scalar: 1 +; SCOP-NOT: ReadAccess +; +; Verify the original region is untouched but all computation is moved to the +; only place it is needed in the generated region. +; +; CHECK: for.body.f: +; CHECK-NEXT: %idxprom6 = sext i32 %i.0 to i64 +; CHECK-NEXT: %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %idxprom6 +; CHECK-NEXT: store i32 %add5, i32* %arrayidx7, align 4 +; +; CHECK: polly.stmt.for.body.f: +; CHECK: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar +; CHECK: %0 = trunc i64 %polly.indvar to i32 +; CHECK: %1 = shl i32 %0, 4 +; CHECK: store i32 %1, i32* %scevgep +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int a = i * 2; +; /* split BB */ +; int b = a + a; +; /* split BB */ +; int c = b + a; +; /* split BB */ +; int d = a + a; +; /* split BB */ +; int e = a + b + c + d; +; /* split BB */ +; A[i] = e; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, 1000 + br i1 %cmp, label %for.body.a, label %for.end + +for.body.a: ; preds = %for.cond + %tmp = mul nsw i32 %i.0, 2 + br label %for.body.b + +for.body.b: + %add = add nsw i32 %tmp, %tmp + br label %for.body.c + +for.body.c: + %add1 = add nsw i32 %add, %tmp + br label %for.body.d + +for.body.d: + %add2 = add nsw i32 %tmp, %tmp + br label %for.body.e + +for.body.e: + %add3 = add nsw i32 %tmp, %add + %add4 = add nsw i32 %add3, %add1 + %add5 = add nsw i32 %add4, %add2 + br label %for.body.f + +for.body.f: + %idxprom6 = sext i32 %i.0 to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %idxprom6 + store i32 %add5, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/eliminate-scalars-with-outside-load.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/eliminate-scalars-with-outside-load.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN +; +; Verify that we will virtually move %mul but also the read of %tmp to the +; for.body.split block. +; +; CHECK: Stmt_for_body_split +; CHECK-NOT: MemRef_mul +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: { Stmt_for_body_split[i0] -> MemRef_tmp[] }; +; CHECK-NOT: MemRef_mul +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_split[i0] -> MemRef_A[i0] }; +; CHECK-NOT: MemRef_mul +; +; CODEGEN: polly.stmt.for.body.split: +; CODEGEN-NEXT: %p_mul = fmul float %tmp, 2.000000e+00 +; // Unused: +; CODEGEN-NEXT: %tmp.s2a.reload = load float, float* %tmp.s2a +; +; CODEGEN-NEXT: %scevgep = getelementptr float, float* %A, i64 %polly.indvar +; CODEGEN-NEXT: store float %p_mul, +; +; void f(float *A) { +; float x = A[-1]; +; for (int i = 0; i < 1000; i++) { +; float a = x * 2; +; /* split BB */ +; A[i] = a; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* %A) { +entry: + %arrayidx = getelementptr inbounds float, float* %A, i64 -1 + %tmp = load float, float* %arrayidx, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %mul = fmul float %tmp, 2.000000e+00 + br label %for.body.split + +for.body.split: ; preds = %for.cond + %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv + store float %mul, float* %arrayidx1, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/srem-in-other-bb.ll =================================================================== --- test/Isl/CodeGen/srem-in-other-bb.ll +++ test/Isl/CodeGen/srem-in-other-bb.ll @@ -1,18 +1,13 @@ -; RUN: opt %loadPolly -polly-codegen -S \ -; RUN: < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s ; ; void pos(float *A, long n) { ; for (long i = 0; i < 100; i++) ; A[n % 42] += 1; ; } ; -; CHECK: polly.stmt.bb2: -; CHECK-NEXT: %p_tmp = srem i64 %n, 42 -; CHECK-NEXT: store i64 %p_tmp, i64* %tmp.s2a -; -; CHECK: polly.stmt.bb3: -; CHECK: %tmp.s2a.reload = load i64, i64* %tmp.s2a -; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %tmp.s2a.reload +; CHECK: polly.stmt.bb3: +; CHECK: %[[rem:[._a-zA-Z0-9]*]] = srem i64 %n, 42 +; CHECK: getelementptr inbounds float, float* %A, i64 %[[rem]] define void @pos(float* %A, i64 %n) { bb: Index: test/ScopInfo/eliminate-scalar-caused-by-load-reduction-2.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-caused-by-load-reduction-2.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN +; +; This is a negative test. We should move the load to the split block +; and remove all scalar accesses, however at the moment we only move +; instructions that are trivially safe to move. All three checks should +; be negated at some point. This also checks that we currently not try to +; move a part of the scalar operand chain, i.e., the %add instruction. +; +; CHECK: Scalar: 1 +; CHECK: Scalar: 1 +; CHECK-NOT: Reduction: + +; +; These checks should stay as they verify we did not modify the original region: +; +; CODEGEN: for.body.split: +; CODEGEN-NEXT: %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv +; CODEGEN-NEXT: store i32 %add, i32* %arrayidx2, align 4 +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[i] + 3; +; /* split BB */ +; A[i] = x; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 3 + br label %for.body.split + +for.body.split: + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-caused-by-load-reduction.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-caused-by-load-reduction.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; This is a negative test. We should move the load to the split block +; and remove all scalar accesses, however at the moment we only move +; instructions that are trivially safe to move. All three checks should +; be negated at some point. +; +; CHECK: Scalar: 1 +; CHECK: Scalar: 1 +; CHECK-NOT: Reduction: + +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[i]; +; /* split BB */ +; A[i] = x + 3; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + br label %for.body.split + +for.body.split: + %add = add nsw i32 %tmp, 3 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/inter_bb_scalar_dep.ll =================================================================== --- test/ScopInfo/inter_bb_scalar_dep.ll +++ test/ScopInfo/inter_bb_scalar_dep.ll @@ -1,4 +1,5 @@ ; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-codegen -analyze < %s ; void f(long A[], int N, int *init_ptr) { ; long i, j; @@ -36,10 +37,11 @@ %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] %init_plus_two = add i64 %init, 2 ; CHECK-LABEL: Stmt_for_j -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] }; +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NOT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] %scevgep = getelementptr i64, i64* %A, i64 %indvar.j store i64 %init_plus_two, i64* %scevgep %indvar.j.next = add nsw i64 %indvar.j, 1 Index: test/ScopInfo/intra_and_inter_bb_scalar_dep.ll =================================================================== --- test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -24,8 +24,6 @@ ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] : 0 <= i0 < N and 0 <= i1 < N }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> [i0, i1] }; -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] }; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; ; CHECK-NEXT: } Index: test/ScopInfo/invariant-loads-leave-read-only-statements.ll =================================================================== --- test/ScopInfo/invariant-loads-leave-read-only-statements.ll +++ test/ScopInfo/invariant-loads-leave-read-only-statements.ll @@ -8,8 +8,6 @@ ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [p_0, p_1, p_2] -> { Stmt_top_split[] -> [0, 0, 0, 0] }; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [p_0, p_1, p_2] -> { Stmt_top_split[] -> MemRef_26[] }; -; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [p_0, p_1, p_2] -> { Stmt_top_split[] -> MemRef_25[] }; ; CHECK-NEXT: Stmt_L_4 ; CHECK-NEXT: Domain := Index: test/ScopInfo/multidim_fortran_srem.ll =================================================================== --- test/ScopInfo/multidim_fortran_srem.ll +++ test/ScopInfo/multidim_fortran_srem.ll @@ -2,26 +2,13 @@ target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" ; CHECK: Statements { -; CHECK-NEXT: Stmt_bb188 -; CHECK-NEXT: Domain := -; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb188[i0] : 0 <= i0 <= -3 + tmp183 }; -; CHECK-NEXT: Schedule := -; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb188[i0] -> [i0, 0, 0, 0] }; -; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb188[i0] -> MemRef_tmp192[] }; -; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb188[i0] -> MemRef_tmp194[] }; ; CHECK-NEXT: Stmt_bb203 ; CHECK-NEXT: Domain := ; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb203[i0, i1, i2] : 0 <= i0 <= -3 + tmp183 and 0 <= i1 <= -3 + tmp180 and 0 <= i2 <= -3 + tmp177 }; ; CHECK-NEXT: Schedule := -; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb203[i0, i1, i2] -> [i0, 1, i1, i2] }; -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb203[i0, i1, i2] -> MemRef_tmp192[] }; +; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb203[i0, i1, i2] -> [i0, i1, i2] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb203[i0, i1, i2] -> MemRef_tmp173[o0, 1 + i1, 1 + i2] : 3*floor((-i0 + o0)/3) = -i0 + o0 and 0 <= o0 <= 2 }; -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb203[i0, i1, i2] -> MemRef_tmp194[] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [tmp180, tmp177, tmp183, tmp162, tmp157, tmp150, tmp146, tmp140, tmp] -> { Stmt_bb203[i0, i1, i2] -> MemRef_tmp173[o0, 1 + i1, 1 + i2] : 3*floor((-2 - i0 + o0)/3) = -2 - i0 + o0 and 0 <= o0 <= 2 }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] Index: test/ScopInfo/out-of-scop-use-in-region-entry-phi-node.ll =================================================================== --- test/ScopInfo/out-of-scop-use-in-region-entry-phi-node.ll +++ test/ScopInfo/out-of-scop-use-in-region-entry-phi-node.ll @@ -1,7 +1,9 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [p_0] -> { Stmt_bb3[] -> MemRef_tmp5[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [p_0] -> { Stmt_bb3[] -> MemRef_tmp5[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [p_0] -> { Stmt_bb3[] -> MemRef_tmp7[] }; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/scalar_dependence_cond_br.ll =================================================================== --- test/ScopInfo/scalar_dependence_cond_br.ll +++ test/ScopInfo/scalar_dependence_cond_br.ll @@ -6,13 +6,11 @@ ; A[i]++; ; } ; -; FIXME: This test is a negative test until we have an independent blocks alternative. -; ; We should move operands as close to their use as possible, hence in this case -; there should not be any scalar dependence anymore after %cmp1 is moved to -; %for.body (%c and %indvar.iv are synthesis able). +; there should not be any scalar dependence anymore after %cmp1 is virtually +; moved to %for.body (%c and %indvar.iv are synthesis able). ; -; CHECK: [Scalar: 1] +; CHECK-NOT: [Scalar: 1] ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/schedule-const-post-dominator-walk.ll =================================================================== --- test/ScopInfo/schedule-const-post-dominator-walk.ll +++ test/ScopInfo/schedule-const-post-dominator-walk.ll @@ -1,4 +1,5 @@ ; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s +; RUN: opt %loadPolly -disable-output -polly-codegen < %s ; CHECK: { Stmt_bb3[i0] -> [0, 0] }; ; CHECK: { Stmt_bb2[] -> [1, 0] }; Index: test/ScopInfo/tempscop-printing.ll =================================================================== --- test/ScopInfo/tempscop-printing.ll +++ test/ScopInfo/tempscop-printing.ll @@ -1,4 +1,5 @@ ; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-codegen -analyze < %s ; void f(long A[], int N, int *init_ptr) { ; long i, j; @@ -19,8 +20,6 @@ ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] : 0 <= i0 < N and 0 <= i1 < N }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> [i0, i1] }; -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] }; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; ; CHECK-NEXT: } @@ -35,8 +34,6 @@ ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> [i0, i1] }; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] }; ; CHECK-NEXT: } target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" Index: test/ScopInfo/unneeded_scalar_dependences-1.ll =================================================================== --- /dev/null +++ test/ScopInfo/unneeded_scalar_dependences-1.ll @@ -0,0 +1,63 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; CHECK-NOT: Scalar: 1 +; +; void f(int *A, int N) { +; for (int i = 0; i < N; i++) { +; int x = i + 1; +; for (int j = 0; j < N; j++) +; A[i] += A[j]; +; A[i] += x; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc.10, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc.10 ], [ 0, %entry ] + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + %tmp6 = trunc i64 %indvars.iv.next2 to i32 + %cmp = icmp slt i64 %indvars.iv1, %tmp + br i1 %cmp, label %for.body, label %for.end.12 + +for.body: ; preds = %for.cond + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %lftr.wideiv = trunc i64 %indvars.iv to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp3 = load i32, i32* %arrayidx, align 4 + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp4 = load i32, i32* %arrayidx5, align 4 + %add6 = add nsw i32 %tmp4, %tmp3 + store i32 %add6, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %tmp5 = load i32, i32* %arrayidx8, align 4 + %add9 = add nsw i32 %tmp5, %tmp6 + store i32 %add9, i32* %arrayidx8, align 4 + br label %for.inc.10 + +for.inc.10: ; preds = %for.end + br label %for.cond + +for.end.12: ; preds = %for.cond + ret void +} Index: test/ScopInfo/unneeded_scalar_dependences-2.ll =================================================================== --- /dev/null +++ test/ScopInfo/unneeded_scalar_dependences-2.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; CHECK-NOT: Scalar: 1 +; +; void f(int *A, int N) { +; for (int i = 0; i < N; i++) { +; int x = i + 3; +; for (int j = 0; j < N; j++) +; A[i] += A[x + j]; +; A[x] += x; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc.11, %entry + %indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc.11 ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv2, %tmp + br i1 %cmp, label %for.body, label %for.end.13 + +for.body: ; preds = %for.cond + %tmp5 = add nuw nsw i64 %indvars.iv2, 3 + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %lftr.wideiv = trunc i64 %indvars.iv to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + %tmp6 = add nuw nsw i64 %tmp5, %indvars.iv + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %tmp6 + %tmp7 = load i32, i32* %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv2 + %tmp8 = load i32, i32* %arrayidx6, align 4 + %add7 = add nsw i32 %tmp8, %tmp7 + store i32 %add7, i32* %arrayidx6, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 %tmp5 + %tmp9 = load i32, i32* %arrayidx9, align 4 + %tmp10 = trunc i64 %tmp5 to i32 + %add10 = add nsw i32 %tmp9, %tmp10 + store i32 %add10, i32* %arrayidx9, align 4 + br label %for.inc.11 + +for.inc.11: ; preds = %for.end + %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 + br label %for.cond + +for.end.13: ; preds = %for.cond + ret void +} Index: test/ScopInfo/unneeded_scalar_dependences-3.ll =================================================================== --- /dev/null +++ test/ScopInfo/unneeded_scalar_dependences-3.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; CHECK-NOT: Scalar: 1 +; +; void f(int *A, int N) { +; for (int i = 0; i < N; i++) { +; int x = i + 3; +; for (int j = 0; j < N; j++) +; A[i] += x + j * x; +; A[x] += x * x; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc.10, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.10 ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end.12 + +for.body: ; preds = %for.cond + %tmp2 = add nuw nsw i64 %indvars.iv, 3 + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, %N + br i1 %exitcond, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + %tmp3 = trunc i64 %tmp2 to i32 + %mul = mul nsw i32 %j.0, %tmp3 + %tmp4 = trunc i64 %tmp2 to i32 + %add4 = add nsw i32 %tmp4, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp5 = load i32, i32* %arrayidx, align 4 + %add5 = add nsw i32 %tmp5, %add4 + store i32 %add5, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %inc = add nuw nsw i32 %j.0, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + %tmp6 = trunc i64 %tmp2 to i32 + %mul6 = mul nsw i32 %tmp6, %tmp6 + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %tmp2 + %tmp7 = load i32, i32* %arrayidx8, align 4 + %add9 = add nsw i32 %tmp7, %mul6 + store i32 %add9, i32* %arrayidx8, align 4 + br label %for.inc.10 + +for.inc.10: ; preds = %for.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end.12: ; preds = %for.cond + ret void +} Index: test/ScopInfo/unneeded_scalar_dependences-4.ll =================================================================== --- /dev/null +++ test/ScopInfo/unneeded_scalar_dependences-4.ll @@ -0,0 +1,69 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; CHECK-NOT: MemRef_cond +; CHECK-NOT: Scalar: 1 +; +; void f(int *A, int N) { +; for (int i = 0; i < N; i++) { +; int x = i > 42 ? i - 3 : i; +; for (int j = 0; j < N; j++) +; A[i] += x + j * x; +; A[i] += x * x; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc.10, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.10 ], [ 0, %entry ] + %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc.10 ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end.12 + +for.body: ; preds = %for.cond + %cmp1 = icmp sgt i64 %indvars.iv, 42 + %tmp2 = trunc i64 %indvars.iv to i32 + %sub = add nsw i32 %i.0, -3 + %cond = select i1 %cmp1, i32 %sub, i32 %tmp2 + br label %for.cond.2 + +for.cond.2: ; preds = %for.inc, %cond.end + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %cmp3 = icmp slt i32 %j.0, %N + br i1 %cmp3, label %for.body.4, label %for.end + +for.body.4: ; preds = %for.cond.2 + %mul = mul nsw i32 %j.0, %cond + %add = add nsw i32 %cond, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp4 = load i32, i32* %arrayidx, align 4 + %add5 = add nsw i32 %tmp4, %add + store i32 %add5, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.4 + %inc = add nuw nsw i32 %j.0, 1 + br label %for.cond.2 + +for.end: ; preds = %for.cond.2 + %mul6 = mul nsw i32 %cond, %cond + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp5 = load i32, i32* %arrayidx8, align 4 + %add9 = add nsw i32 %tmp5, %mul6 + store i32 %add9, i32* %arrayidx8, align 4 + br label %for.inc.10 + +for.inc.10: ; preds = %for.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %inc11 = add nuw nsw i32 %i.0, 1 + br label %for.cond + +for.end.12: ; preds = %for.cond + ret void +} Index: test/ScopInfo/unneeded_scalar_dependences-5.ll =================================================================== --- /dev/null +++ test/ScopInfo/unneeded_scalar_dependences-5.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; CHECK-NOT: MemRef_conv +; CHECK-NOT: Scalar: 1 +; +; void f(int *A, int N) { +; for (int i = 0; i < N; i++) { +; float x = i + 3.3; +; for (int j = 0; j < N; j++) +; A[i] += x + j * x; +; A[i] += x * x; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc.17, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.17 ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end.19 + +for.body: ; preds = %for.cond + %tmp1 = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp1 to double + %add = fadd double %conv, 3.300000e+00 + %conv1 = fptrunc double %add to float + br label %for.cond.2 + +for.cond.2: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, %N + br i1 %exitcond, label %for.body.5, label %for.end + +for.body.5: ; preds = %for.cond.2 + %conv6 = sitofp i32 %j.0 to float + %mul = fmul float %conv6, %conv1 + %add7 = fadd float %conv1, %mul + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp2 = load i32, i32* %arrayidx, align 4 + %conv8 = sitofp i32 %tmp2 to float + %add9 = fadd float %conv8, %add7 + %conv10 = fptosi float %add9 to i32 + store i32 %conv10, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.5 + %inc = add nuw nsw i32 %j.0, 1 + br label %for.cond.2 + +for.end: ; preds = %for.cond.2 + %mul11 = fmul float %conv1, %conv1 + %arrayidx13 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp3 = load i32, i32* %arrayidx13, align 4 + %conv14 = sitofp i32 %tmp3 to float + %add15 = fadd float %conv14, %mul11 + %conv16 = fptosi float %add15 to i32 + store i32 %conv16, i32* %arrayidx13, align 4 + br label %for.inc.17 + +for.inc.17: ; preds = %for.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end.19: ; preds = %for.cond + ret void +} Index: test/ScopInfo/unneeded_scalar_dependences-6.ll =================================================================== --- /dev/null +++ test/ScopInfo/unneeded_scalar_dependences-6.ll @@ -0,0 +1,49 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A, int N) { +; for (int i = 1; i < N; i++) { +; int preloaded = A[0]; +; /* split BB */ +; A[i] += preloaded; +; } +; } +; +; CHECK: Invariant Accesses: { +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [N] -> { Stmt_for_body[i0] -> MemRef_A[0] }; +; CHECK: Execution Context: [N] -> { : N >= 2 } +; CHECK: } +; +; CHECK-NOT: Scalar: 1 +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 1, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp1 = load i32, i32* %A, align 4 + br label %for.body.split + +for.body.split: + %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp2 = load i32, i32* %arrayidx1, align 4 + %add = add nsw i32 %tmp2, %tmp1 + store i32 %add, i32* %arrayidx1, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/unneeded_scalar_dependences-7.ll =================================================================== --- /dev/null +++ test/ScopInfo/unneeded_scalar_dependences-7.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A, int N) { +; for (int i = 0; i < N; i++) { +; int scevable_addr = 2 * i + N; +; /* split BB */ +; A[scevable_addr] += i; +; } +; } +; +; CHECK-NOT: Scalar: 1 +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp1 = trunc i64 %indvars.iv to i32 + %mul = shl nsw i32 %tmp1, 1 + %add = add nsw i32 %mul, %N + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom + br label %for.body.split + +for.body.split: + %tmp2 = load i32, i32* %arrayidx, align 4 + %tmp3 = trunc i64 %indvars.iv to i32 + %add1 = add nsw i32 %tmp2, %tmp3 + store i32 %add1, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/unneeded_scalar_dependences-8.ll =================================================================== --- /dev/null +++ test/ScopInfo/unneeded_scalar_dependences-8.ll @@ -0,0 +1,51 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A, int N) { +; int pre_scevable_addr = 2 * N + (N - 1); +; for (int i = 0; i < N; i++) { +; int scevable_addr = 2 * i + pre_scevable_addr; +; /* split BB */ +; A[scevable_addr] += i; +; } +; } +; +; CHECK-NOT: Scalar: 1 +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + %m = mul nsw i32 %N, 2 + %s = sub nsw i32 %N, 1 + %pre_scevable_addr = add nsw i32 %m, %s + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp1 = trunc i64 %indvars.iv to i32 + %mul = shl nsw i32 %tmp1, 1 + %add = add nsw i32 %mul, %pre_scevable_addr + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom + br label %for.body.split + +for.body.split: + %tmp2 = load i32, i32* %arrayidx, align 4 + %tmp3 = trunc i64 %indvars.iv to i32 + %add1 = add nsw i32 %tmp2, %tmp3 + store i32 %add1, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/unneeded_scalar_dependences-9.ll =================================================================== --- /dev/null +++ test/ScopInfo/unneeded_scalar_dependences-9.ll @@ -0,0 +1,60 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A, int N) { +; if (N > 42) { +; int pre_scevable_addr = 2 * N + (N - 1); +; for (int i = 0; i < N; i++) { +; int scevable_addr = 2 * i + pre_scevable_addr; +; /* split BB */ +; A[scevable_addr] += i; +; } +; } +; } +; +; CHECK-NOT: Scalar: 1 +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + %m = mul nsw i32 %N, 2 + br label %if.cond + +if.cond: + %s = sub nsw i32 %N, 1 + %c = icmp sgt i32 %N, 42 + br i1 %c, label %if.then, label %for.end + +if.then: + %pre_scevable_addr = add nsw i32 %m, %s + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %if.then ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp1 = trunc i64 %indvars.iv to i32 + %mul = shl nsw i32 %tmp1, 1 + %add = add nsw i32 %mul, %pre_scevable_addr + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom + br label %for.body.split + +for.body.split: + %tmp2 = load i32, i32* %arrayidx, align 4 + %tmp3 = trunc i64 %indvars.iv to i32 + %add1 = add nsw i32 %tmp2, %tmp3 + store i32 %add1, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}