Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -31,14 +31,12 @@ llvm::Pass *createCloogInfoPass(); llvm::Pass *createCodeGenerationPass(); #endif -llvm::Pass *createCodePreparationPass(); llvm::Pass *createDeadCodeElimPass(); llvm::Pass *createDependencesPass(); llvm::Pass *createDOTOnlyPrinterPass(); llvm::Pass *createDOTOnlyViewerPass(); llvm::Pass *createDOTPrinterPass(); llvm::Pass *createDOTViewerPass(); -llvm::Pass *createIndependentBlocksPass(); llvm::Pass *createIndVarSimplifyPass(); llvm::Pass *createJSONExporterPass(); llvm::Pass *createJSONImporterPass(); @@ -52,9 +50,6 @@ llvm::Pass *createIslCodeGenerationPass(); llvm::Pass *createIslScheduleOptimizerPass(); llvm::Pass *createTempScopInfoPass(); - -extern char &IndependentBlocksID; -extern char &CodePreparationID; } namespace { @@ -72,14 +67,12 @@ polly::createCloogInfoPass(); polly::createCodeGenerationPass(); #endif - polly::createCodePreparationPass(); polly::createDeadCodeElimPass(); polly::createDependencesPass(); polly::createDOTOnlyPrinterPass(); polly::createDOTOnlyViewerPass(); polly::createDOTPrinterPass(); polly::createDOTViewerPass(); - polly::createIndependentBlocksPass(); polly::createIndVarSimplifyPass(); polly::createJSONExporterPass(); polly::createJSONImporterPass(); @@ -102,9 +95,7 @@ #ifdef CLOOG_FOUND void initializeCodeGenerationPass(llvm::PassRegistry &); #endif -void initializeCodePreparationPass(llvm::PassRegistry &); void initializeDeadCodeElimPass(llvm::PassRegistry &); -void initializeIndependentBlocksPass(llvm::PassRegistry &); void initializeJSONExporterPass(llvm::PassRegistry &); void initializeJSONImporterPass(llvm::PassRegistry &); void initializeIslAstInfoPass(llvm::PassRegistry &); Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -182,13 +182,6 @@ /// @return True if all blocks in R are valid, false otherwise. bool allBlocksValid(DetectionContext &Context) const; - /// @brief Check the exit block of a region is valid. - /// - /// @param Context The context of scop detection. - /// - /// @return True if the exit of R is valid, false otherwise. - bool isValidExit(DetectionContext &Context) const; - /// @brief Check if a region is a Scop. /// /// @param Context The context of scop detection. Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -218,6 +218,15 @@ MemoryAccess(const IRAccess &Access, Instruction *AccInst, ScopStmt *Statement, const ScopArrayInfo *SAI); + /// @brief Create a read memory access for a PHI node. + /// + /// @param PHI The PHI node. + /// @param SAI The ScopArrayInfo object for this base pointer. + /// @param Domain A restricted domain for this access if applicable. + /// @param Statement The statement that contains the access. + MemoryAccess(PHINode *PHI, const ScopArrayInfo *SAI, + __isl_keep isl_set *Domain, ScopStmt *Statement); + ~MemoryAccess(); /// @brief Get the type of a memory access. @@ -413,7 +422,6 @@ /// Build the statement. //@{ - __isl_give isl_set *buildConditionSet(const Comparison &Cmp); __isl_give isl_set *addConditionsToDomain(__isl_take isl_set *Domain, TempScop &tempScop, const Region &CurRegion); @@ -421,7 +429,9 @@ TempScop &tempScop); __isl_give isl_set *buildDomain(TempScop &tempScop, const Region &CurRegion); void buildScattering(SmallVectorImpl &Scatter); - void buildAccesses(TempScop &tempScop, const Region &CurRegion); + void buildAccesses(TempScop &tempScop); + void buildPHIAccesses(PHINode *PHI, const IRAccess &Access, + TempScop &tempScop); /// @brief Detect and mark reductions in the ScopStmt void checkForReductions(); @@ -587,6 +597,9 @@ /// The statements in this Scop. StmtSet Stmts; + /// @brief A map from basic blocks to SCoP statements. + DenseMap StmtMap; + /// Parameters of this Scop typedef SmallVector ParamVecType; ParamVecType Parameters; @@ -746,6 +759,9 @@ /// @return The assumed context of this Scop. __isl_give isl_set *getAssumedContext() const; + /// @brief Return the stmt for the given @p BB or nullptr if none. + ScopStmt *getStmtForBasicBlock(BasicBlock *BB) const; + /// @brief Add assumptions to assumed context. /// /// The assumptions added will be assumed to hold during the execution of the @@ -799,8 +815,9 @@ //@} /// @brief Return the (possibly new) ScopArrayInfo object for @p Access. - const ScopArrayInfo *getOrCreateScopArrayInfo(const IRAccess &Access, - Instruction *AccessInst); + const ScopArrayInfo * + getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType, + const SmallVector &Sizes); /// @brief Return the cached ScopArrayInfo object for @p BasePtr. const ScopArrayInfo *getScopArrayInfo(Value *BasePtr); Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -15,6 +15,7 @@ #define POLLY_SUPPORT_IRHELPER_H namespace llvm { +class Type; class Instruction; class LoopInfo; class Loop; @@ -52,6 +53,9 @@ llvm::Value *getPointerOperand(llvm::Instruction &Inst); llvm::BasicBlock *createSingleExitEdge(llvm::Region *R, llvm::Pass *P); +/// @brief Return the type of the access. +llvm::Type *getAccessInstType(llvm::Instruction *AccInst); + /// @brief Simplify the region in a SCoP to have a single unconditional entry /// edge and a single exit edge. /// Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -499,9 +499,6 @@ // Check that the base address of the access is invariant in the current // region. if (!isInvariant(*BaseValue, Context.CurRegion)) - // Verification of this property is difficult as the independent blocks - // pass may introduce aliasing that we did not have when running the - // scop detection. return invalid(Context, /*Assert=*/false, BaseValue, &Inst); @@ -527,11 +524,6 @@ AccessFunction, &Inst, BaseValue); } - // FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions - // created by IndependentBlocks Pass. - if (IntToPtrInst *Inst = dyn_cast(BaseValue)) - return invalid(Context, /*Assert=*/true, Inst); - if (IgnoreAliasing) return true; @@ -542,13 +534,6 @@ AliasSet &AS = Context.AST.getAliasSetForPointer( BaseValue, AliasAnalysis::UnknownSize, AATags); - // INVALID triggers an assertion in verifying mode, if it detects that a - // SCoP was detected by SCoP detection and that this SCoP was invalidated by - // a pass that stated it would preserve the SCoPs. We disable this check as - // the independent blocks pass may create memory references which seem to - // alias, if -basicaa is not available. They actually do not, but as we can - // not proof this without -basicaa we would fail. We disable this check to - // not cause irrelevant verification failures. if (!AS.isMustAlias()) { if (PollyUseRuntimeAliasChecks) { bool CanBuildRunTimeCheck = true; @@ -575,16 +560,6 @@ bool ScopDetection::isValidInstruction(Instruction &Inst, DetectionContext &Context) const { - if (PHINode *PN = dyn_cast(&Inst)) - if (!canSynthesize(PN, LI, SE, &Context.CurRegion)) { - if (SCEVCodegen) - return invalid(Context, /*Assert=*/true, - &Inst); - else - return invalid(Context, /*Assert=*/true, - &Inst); - } - // We only check the call instruction but not invoke instruction. if (CallInst *CI = dyn_cast(&Inst)) { if (isValidCallInst(*CI)) @@ -637,9 +612,8 @@ DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); // Only expand when we did not collect errors. - // Check the exit first (cheap) - if (isValidExit(Context) && !Context.Log.hasErrors()) { - // If the exit is valid check all blocks + if (!Context.Log.hasErrors()) { + // Check all blocks // - if true, a valid region was found => store it + keep expanding // - if false, .tbd. => stop (should this really end the loop?) if (!allBlocksValid(Context) || Context.Log.hasErrors()) @@ -782,19 +756,6 @@ return true; } -bool ScopDetection::isValidExit(DetectionContext &Context) const { - Region &R = Context.CurRegion; - - // PHI nodes are not allowed in the exit basic block. - if (BasicBlock *Exit = R.getExit()) { - BasicBlock::iterator I = Exit->begin(); - if (I != Exit->end() && isa(*I)) - return invalid(Context, /*Assert=*/true, I); - } - - return true; -} - bool ScopDetection::isValidRegion(Region &R) const { DetectionContext Context(R, *AA, false /*verifying*/); @@ -843,14 +804,6 @@ } } - // SCoP cannot contain the entry block of the function, because we need - // to insert alloca instruction there when translate scalar to array. - if (R.getEntry() == &(R.getEntry()->getParent()->getEntryBlock())) - return invalid(Context, /*Assert=*/true, R.getEntry()); - - if (!isValidExit(Context)) - return false; - if (!allBlocksValid(Context)) return false; Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -525,6 +525,7 @@ } Space = Statement->getDomainSpace(); + AccessRelation = isl_map_align_params(AccessRelation, isl_space_copy(Space)); AccessRelation = isl_map_set_tuple_id( AccessRelation, isl_dim_in, isl_space_get_tuple_id(Space, isl_dim_set)); AccessRelation = @@ -534,6 +535,30 @@ isl_space_free(Space); } +MemoryAccess::MemoryAccess(PHINode *PHI, const ScopArrayInfo *SAI, + isl_set *Domain, ScopStmt *Statement) + : AccType(READ), Statement(Statement), Inst(PHI), + newAccessRelation(nullptr) { + + isl_ctx *Ctx = Statement->getIslCtx(); + BaseAddr = PHI; + BaseName = getIslCompatibleName("MemRef_", getBaseAddr(), ""); + + isl_id *BaseAddrId = SAI->getBasePtrId(); + + isl_space *Space = isl_space_alloc(Ctx, 0, Statement->getNumIterators(), 0); + AccessRelation = isl_map_universe(Space); + + Space = Statement->getDomainSpace(); + AccessRelation = isl_map_set_tuple_id( + AccessRelation, isl_dim_in, isl_space_get_tuple_id(Space, isl_dim_set)); + AccessRelation = + isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId); + AccessRelation = isl_map_intersect_domain(AccessRelation, Domain); + + isl_space_free(Space); +} + void MemoryAccess::realignParams() { isl_space *ParamSpace = Statement->getParent()->getParamSpace(); AccessRelation = isl_map_align_params(AccessRelation, ParamSpace); @@ -713,14 +738,138 @@ Scattering = isl_map_align_params(Scattering, Parent.getParamSpace()); } -void ScopStmt::buildAccesses(TempScop &tempScop, const Region &CurRegion) { - for (const auto &AccessPair : *tempScop.getAccessFunctions(BB)) { +static __isl_give isl_set *buildConditionSet(ScopStmt *Stmt, + const Comparison &Comp) { + isl_pw_aff *L = SCEVAffinator::getPwAff(Stmt, Comp.getLHS()); + isl_pw_aff *R = SCEVAffinator::getPwAff(Stmt, Comp.getRHS()); + + switch (Comp.getPred()) { + case ICmpInst::ICMP_EQ: + return isl_pw_aff_eq_set(L, R); + case ICmpInst::ICMP_NE: + return isl_pw_aff_ne_set(L, R); + case ICmpInst::ICMP_SLT: + return isl_pw_aff_lt_set(L, R); + case ICmpInst::ICMP_SLE: + return isl_pw_aff_le_set(L, R); + case ICmpInst::ICMP_SGT: + return isl_pw_aff_gt_set(L, R); + case ICmpInst::ICMP_SGE: + return isl_pw_aff_ge_set(L, R); + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_ULE: + case ICmpInst::ICMP_UGE: + llvm_unreachable("Unsigned comparisons not yet supported"); + default: + llvm_unreachable("Non integer predicate not supported"); + } +} + +static bool isLoopCarried(__isl_take isl_map *Scattering, + __isl_take isl_map *InScattering) { + unsigned DimIn = isl_map_dim(Scattering, isl_dim_in); + unsigned InDimIn = isl_map_dim(InScattering, isl_dim_in); + unsigned MinDimIn = std::min(DimIn, InDimIn); + + isl_map *Order = isl_map_lex_gt_map(Scattering, InScattering); + for (unsigned u = 0; u < MinDimIn; u++) + Order = isl_map_equate(Order, isl_dim_in, u, isl_dim_out, u); + + bool IsLoopCarried = isl_map_is_empty(Order); + isl_map_free(Order); + return IsLoopCarried; +} + +void ScopStmt::buildPHIAccesses(PHINode *PHI, const IRAccess &Access, + TempScop &tempScop) { + const SmallVector Sizes; + + unsigned NumIncomingValues = PHI->getNumIncomingValues(); + if (NumIncomingValues == 1) { + if (auto *InValueInst = dyn_cast(PHI->getIncomingValue(0))) { + const ScopArrayInfo *SAI = getParent()->getOrCreateScopArrayInfo( + InValueInst, PHI->getType(), Sizes); + MemAccs.push_back(new MemoryAccess(PHI, SAI, getDomain(), this)); + } + return; + } + + Scop *S = getParent(); + unsigned NumIterators = getNumIterators(); + for (unsigned u = 0, e = NumIncomingValues; u < e; u++) { + BasicBlock *InBB = PHI->getIncomingBlock(u); + ScopStmt *InStmt = S->getStmtForBasicBlock(InBB); + if (!InStmt) + continue; + + Instruction *InValueInst = dyn_cast(PHI->getIncomingValue(u)); + if (!InValueInst) + continue; + + bool IsLC = isLoopCarried(getScattering(), InStmt->getScattering()); + + isl_set *InDomain = isl_set_reset_tuple_id(InStmt->getDomain()); + if (const BBCond *Condition = tempScop.getBBCond(InBB)) { + for (const auto &C : *Condition) { + isl_set *ConditionSet = buildConditionSet(InStmt, C); + InDomain = isl_set_intersect(InDomain, ConditionSet); + } + } + + unsigned InNumIterators = InStmt->getNumIterators(); + unsigned NumIteratorsDiff = NumIterators - InNumIterators; + assert(NumIterators >= InNumIterators && "In Stmt has more iterators!"); + + if (IsLC) { + assert(NumIteratorsDiff == 0 && + "Found loop carried PHI with different dimension"); + assert(InNumIterators > 0 && + "Found loop carried PHI without loop dimension"); + + isl_space *InDomainSpace = isl_set_get_space(InDomain); + isl_space *InDomainMapSpace = isl_space_map_from_set(InDomainSpace); + + isl_multi_aff *LCMAff = isl_multi_aff_identity(InDomainMapSpace); + + unsigned LastDim = InNumIterators - 1; + isl_aff *LCAff = isl_multi_aff_get_aff(LCMAff, LastDim); + LCAff = isl_aff_add_constant_si(LCAff, 1); + LCMAff = isl_multi_aff_set_aff(LCMAff, LastDim, LCAff); + + isl_map *LCMap = isl_map_from_multi_aff(LCMAff); + InDomain = isl_set_apply(InDomain, LCMap); + } + + if (NumIteratorsDiff) { + assert(!IsLC && "Found loop carried PHI with different dimension"); + InDomain = isl_set_add_dims(InDomain, isl_dim_set, NumIteratorsDiff); + for (unsigned dimIt = InNumIterators; dimIt < NumIterators; dimIt++) + InDomain = isl_set_fix_dim_si(InDomain, dimIt, 0); + } + + InDomain = isl_set_set_tuple_id(InDomain, getDomainId()); + const ScopArrayInfo *SAI = getParent()->getOrCreateScopArrayInfo( + InValueInst, PHI->getType(), Sizes); + MemAccs.push_back(new MemoryAccess(PHI, SAI, InDomain, this)); + } +} + +void ScopStmt::buildAccesses(TempScop &tempScop) { + const AccFuncSetType *AFS = tempScop.getAccessFunctions(BB); + assert(AFS && "Cannot build accesses for trivial statement"); + + for (const auto &AccessPair : *AFS) { const IRAccess &Access = AccessPair.first; Instruction *AccessInst = AccessPair.second; - const ScopArrayInfo *SAI = - getParent()->getOrCreateScopArrayInfo(Access, AccessInst); + Type *AccessType = getAccessInstType(AccessInst)->getPointerTo(); + const ScopArrayInfo *SAI = getParent()->getOrCreateScopArrayInfo( + Access.getBase(), AccessType, Access.Sizes); + MemAccs.push_back(new MemoryAccess(Access, AccessInst, this, SAI)); + if (PHINode *PHI = dyn_cast(AccessInst)) + buildPHIAccesses(PHI, Access, tempScop); // We do not track locations for scalar memory accesses at the moment. // @@ -743,33 +892,6 @@ Scattering = isl_map_align_params(Scattering, Parent.getParamSpace()); } -__isl_give isl_set *ScopStmt::buildConditionSet(const Comparison &Comp) { - isl_pw_aff *L = SCEVAffinator::getPwAff(this, Comp.getLHS()); - isl_pw_aff *R = SCEVAffinator::getPwAff(this, Comp.getRHS()); - - switch (Comp.getPred()) { - case ICmpInst::ICMP_EQ: - return isl_pw_aff_eq_set(L, R); - case ICmpInst::ICMP_NE: - return isl_pw_aff_ne_set(L, R); - case ICmpInst::ICMP_SLT: - return isl_pw_aff_lt_set(L, R); - case ICmpInst::ICMP_SLE: - return isl_pw_aff_le_set(L, R); - case ICmpInst::ICMP_SGT: - return isl_pw_aff_gt_set(L, R); - case ICmpInst::ICMP_SGE: - return isl_pw_aff_ge_set(L, R); - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_ULE: - case ICmpInst::ICMP_UGE: - llvm_unreachable("Unsigned comparisons not yet supported"); - default: - llvm_unreachable("Non integer predicate not supported"); - } -} - __isl_give isl_set *ScopStmt::addLoopBoundsToDomain(__isl_take isl_set *Domain, TempScop &tempScop) { isl_space *Space; @@ -810,7 +932,7 @@ if (BranchingBB != CurrentRegion->getEntry()) { if (const BBCond *Condition = tempScop.getBBCond(BranchingBB)) for (const auto &C : *Condition) { - isl_set *ConditionSet = buildConditionSet(C); + isl_set *ConditionSet = buildConditionSet(this, C); Domain = isl_set_intersect(Domain, ConditionSet); } } @@ -857,8 +979,6 @@ Domain = buildDomain(tempScop, CurRegion); buildScattering(Scatter); - buildAccesses(tempScop, CurRegion); - checkForReductions(); } /// @brief Collect loads which might form a reduction chain with @p StoreMA @@ -1422,6 +1542,13 @@ // traversing the region tree. buildScop(tempScop, getRegion(), NestLoops, Scatter, LI); + // We delayed the building of accesses until we created all statements. + // This way we know the domains, thus can model PHI nodes accruate. + for (ScopStmt *Stmt : Stmts) { + Stmt->buildAccesses(tempScop); + Stmt->checkForReductions(); + } + realignParams(); addParameterBounds(); simplifyAssumedContext(); @@ -1434,8 +1561,8 @@ isl_set_free(AssumedContext); // Free the statements; - for (ScopStmt *Stmt : *this) - delete Stmt; + for (const auto &StmtMapPair : StmtMap) + delete StmtMapPair.second; // Free the ScopArrayInfo objects. for (auto &ScopArrayInfoPair : ScopArrayInfoMap) @@ -1451,14 +1578,12 @@ } } -const ScopArrayInfo *Scop::getOrCreateScopArrayInfo(const IRAccess &Access, - Instruction *AccessInst) { - Value *BasePtr = Access.getBase(); +const ScopArrayInfo * +Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType, + const SmallVector &Sizes) { const ScopArrayInfo *&SAI = ScopArrayInfoMap[BasePtr]; - if (!SAI) { - Type *AccessType = getPointerOperand(*AccessInst)->getType(); - SAI = new ScopArrayInfo(BasePtr, AccessType, getIslCtx(), Access.Sizes); - } + if (!SAI) + SAI = new ScopArrayInfo(BasePtr, AccessType, getIslCtx(), Sizes); return SAI; } @@ -1709,11 +1834,17 @@ else { BasicBlock *BB = I->getNodeAs(); + ScopStmt *Stmt = + new ScopStmt(*this, tempScop, CurRegion, *BB, NestLoops, Scatter); + + // Insert all statements (trivial or not) into the statement map. + StmtMap[BB] = Stmt; + if (isTrivialBB(BB, tempScop)) continue; - Stmts.push_back( - new ScopStmt(*this, tempScop, CurRegion, *BB, NestLoops, Scatter)); + // But only non trivial into the statement vector. + Stmts.push_back(Stmt); // Increasing the Scattering function is OK for the moment, because // we are using a depth first iterator and the program is well structured. @@ -1729,6 +1860,13 @@ ++Scatter[loopDepth - 1]; } +ScopStmt *Scop::getStmtForBasicBlock(BasicBlock *BB) const { + const auto &StmtMapIt = StmtMap.find(BB); + if (StmtMapIt == StmtMap.end()) + return nullptr; + return StmtMapIt->second; +} + //===----------------------------------------------------------------------===// ScopInfo::ScopInfo() : RegionPass(ID), scop(0) { ctx = isl_ctx_alloc(); Index: lib/Analysis/TempScopInfo.cpp =================================================================== --- lib/Analysis/TempScopInfo.cpp +++ lib/Analysis/TempScopInfo.cpp @@ -168,7 +168,9 @@ if (!R->contains(UseParent)) continue; - assert(!isa(UI) && "Non synthesizable PHINode found in a SCoP!"); + // Skip PHI users as they are treated differently later on. + if (isa(UI)) + continue; SmallVector Subscripts, Sizes; @@ -408,7 +410,6 @@ AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); - AU.addRequiredID(IndependentBlocksID); AU.addRequired(); AU.setPreservesAll(); } Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -49,9 +49,7 @@ Support/ScopHelper.cpp ${POLLY_JSON_FILES} Transform/Canonicalization.cpp - Transform/CodePreparation.cpp Transform/DeadCodeElimination.cpp - Transform/IndependentBlocks.cpp Transform/IndVarSimplify.cpp Transform/ScheduleOptimizer.cpp ${POLLY_PLUTO_FILES} Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -43,7 +43,7 @@ static cl::opt SCEVCodegenF("polly-codegen-scev", cl::desc("Use SCEV based code generation."), cl::Hidden, - cl::location(SCEVCodegen), cl::init(false), cl::ZeroOrMore, + cl::location(SCEVCodegen), cl::init(true), cl::ZeroOrMore, cl::cat(PollyCategory)); bool polly::SCEVCodegen; Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -1092,7 +1092,6 @@ AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); - AU.addPreservedID(IndependentBlocksID); } }; } Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -655,7 +655,6 @@ AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); - AU.addPreservedID(IndependentBlocksID); } }; } Index: lib/Makefile =================================================================== --- lib/Makefile +++ lib/Makefile @@ -58,9 +58,7 @@ CodeGen/RuntimeDebugBuilder.cpp \ Exchange/JSONExporter.cpp \ Transform/Canonicalization.cpp \ - Transform/CodePreparation.cpp \ Transform/DeadCodeElimination.cpp \ - Transform/IndependentBlocks.cpp \ Transform/IndVarSimplify.cpp \ Transform/ScheduleOptimizer.cpp \ ${GPGPU_FILES} \ Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -155,10 +155,8 @@ initializeCodeGenerationPass(Registry); #endif initializeIslCodeGenerationPass(Registry); - initializeCodePreparationPass(Registry); initializeDeadCodeElimPass(Registry); initializeDependencesPass(Registry); - initializeIndependentBlocksPass(Registry); initializeJSONExporterPass(Registry); initializeJSONImporterPass(Registry); initializeIslAstInfoPass(Registry); Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -66,6 +66,12 @@ return 0; } +Type *polly::getAccessInstType(Instruction *AccInst) { + if (StoreInst *Store = dyn_cast(AccInst)) + return Store->getValueOperand()->getType(); + return AccInst->getType(); +} + bool polly::hasInvokeEdge(const PHINode *PN) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) if (InvokeInst *II = dyn_cast(PN->getIncomingValue(i))) @@ -88,11 +94,8 @@ static void replaceScopAndRegionEntry(polly::Scop *S, BasicBlock *OldEntry, BasicBlock *NewEntry) { - for (polly::ScopStmt *Stmt : *S) - if (Stmt->getBasicBlock() == OldEntry) { - Stmt->setBasicBlock(NewEntry); - break; - } + if (polly::ScopStmt *Stmt = S->getStmtForBasicBlock(OldEntry)) + Stmt->setBasicBlock(NewEntry); S->getRegion().replaceEntryRecursive(NewEntry); } Index: lib/Transform/Canonicalization.cpp =================================================================== --- lib/Transform/Canonicalization.cpp +++ lib/Transform/Canonicalization.cpp @@ -33,8 +33,6 @@ if (!SCEVCodegen) PM.add(polly::createIndVarSimplifyPass()); - - PM.add(polly::createCodePreparationPass()); } namespace { Index: lib/Transform/CodePreparation.cpp =================================================================== --- lib/Transform/CodePreparation.cpp +++ /dev/null @@ -1,248 +0,0 @@ -//===---- CodePreparation.cpp - Code preparation for Scop Detection -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The Polly code preparation pass is executed before SCoP detection. Its only -// use is to translate all PHI nodes that can not be expressed by the code -// generator into explicit memory dependences. Depending of the code generation -// strategy different PHI nodes are translated: -// -// - indvars based code generation: -// -// The indvars based code generation requires explicit canonical induction -// variables. Such variables are generated before scop detection and -// also before the code preparation pass. All PHI nodes that are not canonical -// induction variables are not supported by the indvars based code generation -// and are consequently translated into explicit memory accesses. -// -// - scev based code generation: -// -// The scev based code generation can code generate all PHI nodes that do not -// reference parameters within the scop. As the code preparation pass is run -// before scop detection, we can not check this condition, because without -// a detected scop, we do not know SCEVUnknowns that appear in the SCEV of -// a PHI node may later be within or outside of the SCoP. Hence, we follow a -// heuristic and translate all PHI nodes that are either directly SCEVUnknown -// or SCEVCouldNotCompute. This will hopefully get most of the PHI nodes that -// are introduced due to conditional control flow, but not the ones that are -// referencing loop counters. -// -// XXX: In the future, we should remove the need for this pass entirely and -// instead add support for scalar dependences to ScopInfo and code generation. -// -//===----------------------------------------------------------------------===// - -#include "polly/LinkAllPasses.h" -#include "polly/CodeGen/BlockGenerators.h" -#include "polly/Support/ScopHelper.h" -#include "llvm/Analysis/DominanceFrontier.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/Local.h" - -using namespace llvm; -using namespace polly; - -namespace { - -// Helper function which (for a given PHI node): -// -// 1) Remembers all incoming values and the associated basic blocks -// 2) Demotes the phi node to the stack -// 3) Remembers the correlation between the PHI node and the new alloca -// -// When we combine the information from 1) and 3) we know the values stored -// in this alloca at the end of the predecessor basic blocks of the PHI. -static void DemotePHI( - PHINode *PN, DenseMap &PNallocMap, - DenseMap, PHINode *> &ValueLocToPhiMap) { - - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - auto *InVal = PN->getIncomingValue(i); - auto *InBB = PN->getIncomingBlock(i); - ValueLocToPhiMap[std::make_pair(InVal, InBB)] = PN; - } - - PNallocMap[PN] = DemotePHIToStack(PN); -} - -/// @brief Prepare the IR for the scop detection. -/// -class CodePreparation : public FunctionPass { - CodePreparation(const CodePreparation &) LLVM_DELETED_FUNCTION; - const CodePreparation & - operator=(const CodePreparation &) LLVM_DELETED_FUNCTION; - - LoopInfo *LI; - ScalarEvolution *SE; - - void clear(); - - bool eliminatePHINodes(Function &F); - -public: - static char ID; - - explicit CodePreparation() : FunctionPass(ID) {} - ~CodePreparation(); - - /// @name FunctionPass interface. - //@{ - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - virtual bool runOnFunction(Function &F); - virtual void print(raw_ostream &OS, const Module *) const; - //@} -}; -} - -void CodePreparation::clear() {} - -CodePreparation::~CodePreparation() { clear(); } - -bool CodePreparation::eliminatePHINodes(Function &F) { - // The PHINodes that will be demoted. - std::vector PNtoDemote; - // The PHINodes that will be deleted (stack slot sharing). - std::vector PNtoDelete; - // The PHINodes that will be preserved. - std::vector PNtoPreserve; - // Map to remember values stored in PHINodes at the end of basic blocks. - DenseMap, PHINode *> ValueLocToPhiMap; - // Map from PHINodes to their alloca (after demotion) counterpart. - DenseMap PNallocMap; - - // Scan the PHINodes in this function and categorize them to be either: - // o Preserved, if they are (canonical) induction variables or can be - // synthesized during code generation ('SCEVable') - // o Deleted, if they are trivial PHI nodes (one incoming value) and the - // incoming value is a PHI node we will demote - // o Demoted, if they do not fit any of the previous categories - for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) - for (BasicBlock::iterator II = BI->begin(), IE = BI->getFirstNonPHI(); - II != IE; ++II) { - PHINode *PN = cast(II); - if (SCEVCodegen) { - if (SE->isSCEVable(PN->getType())) { - const SCEV *S = SE->getSCEV(PN); - if (!isa(S) && !isa(S)) { - PNtoPreserve.push_back(PN); - continue; - } - } - } else { - if (Loop *L = LI->getLoopFor(BI)) { - // Induction variables will be preserved. - if (L->getCanonicalInductionVariable() == PN) { - PNtoPreserve.push_back(PN); - continue; - } - } - } - - // As DemotePHIToStack does not support invoke edges, we preserve - // PHINodes that have invoke edges. - if (hasInvokeEdge(PN)) { - PNtoPreserve.push_back(PN); - } else { - if (PN->getNumIncomingValues() == 1) - PNtoDelete.push_back(PN); - else - PNtoDemote.push_back(PN); - } - } - - if (PNtoDemote.empty() && PNtoDelete.empty()) - return false; - - while (!PNtoDemote.empty()) { - PHINode *PN = PNtoDemote.back(); - PNtoDemote.pop_back(); - DemotePHI(PN, PNallocMap, ValueLocToPhiMap); - } - - // For each trivial PHI we encountered (and we want to delete) we try to find - // the value it will hold in a alloca we already created by PHI demotion. If - // we succeed (the incoming value is stored in an alloca at the predecessor - // block), we can replace the trivial PHI by the value stored in the alloca. - // If not, we will demote this trivial PHI as any other one. - for (auto PNIt = PNtoDelete.rbegin(), PNEnd = PNtoDelete.rend(); - PNIt != PNEnd; ++PNIt) { - PHINode *TrivPN = *PNIt; - assert(TrivPN->getNumIncomingValues() == 1 && "Assumed trivial PHI"); - - auto *InVal = TrivPN->getIncomingValue(0); - auto *InBB = TrivPN->getIncomingBlock(0); - const auto &ValLocIt = ValueLocToPhiMap.find(std::make_pair(InVal, InBB)); - if (ValLocIt != ValueLocToPhiMap.end()) { - PHINode *InPHI = ValLocIt->second; - assert(PNallocMap.count(InPHI) && - "Inconsitent state, PN was not demoted!"); - auto *InPHIAlloca = PNallocMap[InPHI]; - PNallocMap[TrivPN] = InPHIAlloca; - LoadInst *LI = new LoadInst(InPHIAlloca, "", - TrivPN->getParent()->getFirstInsertionPt()); - TrivPN->replaceAllUsesWith(LI); - TrivPN->eraseFromParent(); - continue; - } - - DemotePHI(TrivPN, PNallocMap, ValueLocToPhiMap); - } - - // Move preserved PHINodes to the beginning of the BasicBlock. - while (!PNtoPreserve.empty()) { - PHINode *PN = PNtoPreserve.back(); - PNtoPreserve.pop_back(); - - BasicBlock *BB = PN->getParent(); - if (PN == BB->begin()) - continue; - - PN->moveBefore(BB->begin()); - } - - return true; -} - -void CodePreparation::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); -} - -bool CodePreparation::runOnFunction(Function &F) { - LI = &getAnalysis(); - SE = &getAnalysis(); - - splitEntryBlockForAlloca(&F.getEntryBlock(), this); - - eliminatePHINodes(F); - - return false; -} - -void CodePreparation::releaseMemory() { clear(); } - -void CodePreparation::print(raw_ostream &OS, const Module *) const {} - -char CodePreparation::ID = 0; -char &polly::CodePreparationID = CodePreparation::ID; - -Pass *polly::createCodePreparationPass() { return new CodePreparation(); } - -INITIALIZE_PASS_BEGIN(CodePreparation, "polly-prepare", - "Polly - Prepare code for polly", false, false) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_END(CodePreparation, "polly-prepare", - "Polly - Prepare code for polly", false, false) Index: lib/Transform/IndependentBlocks.cpp =================================================================== --- lib/Transform/IndependentBlocks.cpp +++ /dev/null @@ -1,566 +0,0 @@ -//===------ IndependentBlocks.cpp - Create Independent Blocks in Regions --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Create independent blocks in the regions detected by ScopDetection. -// -//===----------------------------------------------------------------------===// -// -#include "polly/LinkAllPasses.h" -#include "polly/Options.h" -#include "polly/CodeGen/BlockGenerators.h" -#include "polly/CodeGen/Cloog.h" -#include "polly/ScopDetection.h" -#include "polly/Support/ScopHelper.h" -#include "llvm/Analysis/DominanceFrontier.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" - -#include - -using namespace polly; -using namespace llvm; - -#define DEBUG_TYPE "polly-independent" - -static cl::opt DisableIntraScopScalarToArray( - "disable-polly-intra-scop-scalar-to-array", - cl::desc("Do not rewrite scalar to array to generate independent blocks"), - cl::Hidden, cl::init(false), cl::cat(PollyCategory)); - -namespace { -struct IndependentBlocks : public FunctionPass { - RegionInfo *RI; - ScalarEvolution *SE; - ScopDetection *SD; - LoopInfo *LI; - - BasicBlock *AllocaBlock; - - static char ID; - - IndependentBlocks() : FunctionPass(ID) {} - - // Create new code for every instruction operator that can be expressed by a - // SCEV. Like this there are just two types of instructions left: - // - // 1. Instructions that only reference loop ivs or parameters outside the - // region. - // - // 2. Instructions that are not used for any memory modification. (These - // will be ignored later on.) - // - // Blocks containing only these kind of instructions are called independent - // blocks as they can be scheduled arbitrarily. - bool createIndependentBlocks(BasicBlock *BB, const Region *R); - bool createIndependentBlocks(const Region *R); - - // Elimination on the Scop to eliminate the scalar dependences come with - // trivially dead instructions. - bool eliminateDeadCode(const Region *R); - - //===--------------------------------------------------------------------===// - /// Non trivial scalar dependences checking functions. - /// Non trivial scalar dependences occur when the def and use are located in - /// different BBs and we can not move them into the same one. This will - /// prevent use from schedule BBs arbitrarily. - /// - /// @brief This function checks if a scalar value that is part of the - /// Scop is used outside of the Scop. - /// - /// @param Use The use of the instruction. - /// @param R The maximum region in the Scop. - /// - /// @return Return true if the Use of an instruction and the instruction - /// itself form a non trivial scalar dependence. - static bool isEscapeUse(const Value *Use, const Region *R); - - /// @brief This function just checks if a Value is either defined in the same - /// basic block or outside the region, such that there are no scalar - /// dependences between basic blocks that are both part of the same - /// region. - /// - /// @param Operand The operand of the instruction. - /// @param CurBB The BasicBlock that contains the instruction. - /// @param R The maximum region in the Scop. - /// - /// @return Return true if the Operand of an instruction and the instruction - /// itself form a non trivial scalar (true) dependence. - bool isEscapeOperand(const Value *Operand, const BasicBlock *CurBB, - const Region *R) const; - - //===--------------------------------------------------------------------===// - /// Operand tree moving functions. - /// Trivial scalar dependences can eliminate by move the def to the same BB - /// that containing use. - /// - /// @brief Check if the instruction can be moved to another place safely. - /// - /// @param Inst The instruction. - /// - /// @return Return true if the instruction can be moved safely, false - /// otherwise. - static bool isSafeToMove(Instruction *Inst); - - typedef std::map ReplacedMapType; - - /// @brief Move all safe to move instructions in the Operand Tree (DAG) to - /// eliminate trivial scalar dependences. - /// - /// @param Inst The root of the operand Tree. - /// @param R The maximum region in the Scop. - /// @param ReplacedMap The map that mapping original instruction to the moved - /// instruction. - /// @param InsertPos The insert position of the moved instructions. - void moveOperandTree(Instruction *Inst, const Region *R, - ReplacedMapType &ReplacedMap, Instruction *InsertPos); - - bool isIndependentBlock(const Region *R, BasicBlock *BB) const; - bool areAllBlocksIndependent(const Region *R) const; - - // Split the exit block to hold load instructions. - bool splitExitBlock(Region *R); - bool onlyUsedInRegion(Instruction *Inst, const Region *R); - bool translateScalarToArray(BasicBlock *BB, const Region *R); - bool translateScalarToArray(Instruction *Inst, const Region *R); - bool translateScalarToArray(const Region *R); - - bool runOnFunction(Function &F); - void verifyAnalysis() const; - void verifyScop(const Region *R) const; - void getAnalysisUsage(AnalysisUsage &AU) const; -}; -} - -bool IndependentBlocks::isSafeToMove(Instruction *Inst) { - if (Inst->mayReadFromMemory() || Inst->mayWriteToMemory()) - return false; - - return isSafeToSpeculativelyExecute(Inst); -} - -void IndependentBlocks::moveOperandTree(Instruction *Inst, const Region *R, - ReplacedMapType &ReplacedMap, - Instruction *InsertPos) { - BasicBlock *CurBB = Inst->getParent(); - - // Depth first traverse the operand tree (or operand dag, because we will - // stop at PHINodes, so there are no cycle). - typedef Instruction::op_iterator ChildIt; - std::vector> WorkStack; - - WorkStack.push_back(std::make_pair(Inst, Inst->op_begin())); - DenseSet VisitedSet; - - while (!WorkStack.empty()) { - Instruction *CurInst = WorkStack.back().first; - ChildIt It = WorkStack.back().second; - DEBUG(dbgs() << "Checking Operand of Node:\n" << *CurInst << "\n------>\n"); - if (It == CurInst->op_end()) { - // Insert the new instructions in topological order. - if (!CurInst->getParent()) { - CurInst->insertBefore(InsertPos); - SE->forgetValue(CurInst); - } - - WorkStack.pop_back(); - } else { - // for each node N, - Instruction *Operand = dyn_cast(*It); - ++WorkStack.back().second; - - // Can not move no instruction value. - if (Operand == 0) - continue; - - DEBUG(dbgs() << "For Operand:\n" << *Operand << "\n--->"); - - // If the Scop Region does not contain N, skip it and all its operands and - // continue: because we reach a "parameter". - // FIXME: we must keep the predicate instruction inside the Scop, - // otherwise it will be translated to a load instruction, and we can not - // handle load as affine predicate at this moment. - if (!R->contains(Operand) && !isa(CurInst)) { - DEBUG(dbgs() << "Out of region.\n"); - continue; - } - - if (canSynthesize(Operand, LI, SE, R)) { - DEBUG(dbgs() << "is IV.\n"); - continue; - } - - // We can not move the operand, a non trivial scalar dependence found! - if (!isSafeToMove(Operand)) { - DEBUG(dbgs() << "Can not move!\n"); - continue; - } - - // Do not need to move instruction if it is contained in the same BB with - // the root instruction. - if (Operand->getParent() == CurBB) { - DEBUG(dbgs() << "No need to move.\n"); - // Try to move its operand, but do not visit an instuction twice. - if (VisitedSet.insert(Operand).second) - WorkStack.push_back(std::make_pair(Operand, Operand->op_begin())); - continue; - } - - // Now we need to move Operand to CurBB. - // Check if we already moved it. - ReplacedMapType::iterator At = ReplacedMap.find(Operand); - if (At != ReplacedMap.end()) { - DEBUG(dbgs() << "Moved.\n"); - Instruction *MovedOp = At->second; - It->set(MovedOp); - SE->forgetValue(MovedOp); - } else { - // Note that NewOp is not inserted in any BB now, we will insert it when - // it popped form the work stack, so it will be inserted in topological - // order. - Instruction *NewOp = Operand->clone(); - NewOp->setName(Operand->getName() + ".moved.to." + CurBB->getName()); - DEBUG(dbgs() << "Move to " << *NewOp << "\n"); - It->set(NewOp); - ReplacedMap.insert(std::make_pair(Operand, NewOp)); - SE->forgetValue(Operand); - - // Process its operands, but do not visit an instuction twice. - if (VisitedSet.insert(NewOp).second) - WorkStack.push_back(std::make_pair(NewOp, NewOp->op_begin())); - } - } - } - - SE->forgetValue(Inst); -} - -bool IndependentBlocks::createIndependentBlocks(BasicBlock *BB, - const Region *R) { - std::vector WorkList; - for (Instruction &Inst : *BB) - if (!isSafeToMove(&Inst) && !canSynthesize(&Inst, LI, SE, R)) - WorkList.push_back(&Inst); - - ReplacedMapType ReplacedMap; - Instruction *InsertPos = BB->getFirstNonPHIOrDbg(); - - for (Instruction *Inst : WorkList) - moveOperandTree(Inst, R, ReplacedMap, InsertPos); - - // The BB was changed if we replaced any operand. - return !ReplacedMap.empty(); -} - -bool IndependentBlocks::createIndependentBlocks(const Region *R) { - bool Changed = false; - - for (BasicBlock *BB : R->blocks()) - Changed |= createIndependentBlocks(BB, R); - - return Changed; -} - -bool IndependentBlocks::eliminateDeadCode(const Region *R) { - std::vector WorkList; - - // Find all trivially dead instructions. - for (BasicBlock *BB : R->blocks()) - for (Instruction &Inst : *BB) - if (isInstructionTriviallyDead(&Inst)) - WorkList.push_back(&Inst); - - if (WorkList.empty()) - return false; - - // Delete them so the cross BB scalar dependences come with them will - // also be eliminated. - while (!WorkList.empty()) { - RecursivelyDeleteTriviallyDeadInstructions(WorkList.back()); - WorkList.pop_back(); - } - - return true; -} - -bool IndependentBlocks::isEscapeUse(const Value *Use, const Region *R) { - // Non-instruction user will never escape. - if (!isa(Use)) - return false; - - return !R->contains(cast(Use)); -} - -bool IndependentBlocks::isEscapeOperand(const Value *Operand, - const BasicBlock *CurBB, - const Region *R) const { - const Instruction *OpInst = dyn_cast(Operand); - - // Non-instruction operand will never escape. - if (OpInst == 0) - return false; - - // Induction variables are valid operands. - if (canSynthesize(OpInst, LI, SE, R)) - return false; - - // A value from a different BB is used in the same region. - return R->contains(OpInst) && (OpInst->getParent() != CurBB); -} - -bool IndependentBlocks::splitExitBlock(Region *R) { - // Split the exit BB to place the load instruction of escaped users. - BasicBlock *ExitBB = R->getExit(); - Region *ExitRegion = RI->getRegionFor(ExitBB); - - if (ExitBB != ExitRegion->getEntry()) - return false; - - BasicBlock *NewExit = createSingleExitEdge(R, this); - - std::vector toUpdate; - toUpdate.push_back(R); - - while (!toUpdate.empty()) { - Region *R = toUpdate.back(); - toUpdate.pop_back(); - - for (auto &&SubRegion : *R) - if (SubRegion->getExit() == ExitBB) - toUpdate.push_back(SubRegion.get()); - - R->replaceExit(NewExit); - } - - RI->setRegionFor(NewExit, R->getParent()); - return true; -} - -bool IndependentBlocks::translateScalarToArray(const Region *R) { - bool Changed = false; - - for (BasicBlock *BB : R->blocks()) - Changed |= translateScalarToArray(BB, R); - - return Changed; -} - -// Returns true when Inst is only used inside region R. -bool IndependentBlocks::onlyUsedInRegion(Instruction *Inst, const Region *R) { - for (User *U : Inst->users()) - if (Instruction *UI = dyn_cast(U)) - if (isEscapeUse(UI, R)) - return false; - - return true; -} - -bool IndependentBlocks::translateScalarToArray(Instruction *Inst, - const Region *R) { - if (canSynthesize(Inst, LI, SE, R) && onlyUsedInRegion(Inst, R)) - return false; - - SmallVector LoadInside, LoadOutside; - for (User *U : Inst->users()) - // Inst is referenced outside or referenced as an escaped operand. - if (Instruction *UI = dyn_cast(U)) { - if (isEscapeUse(UI, R)) - LoadOutside.push_back(UI); - - if (DisableIntraScopScalarToArray) - continue; - - if (canSynthesize(UI, LI, SE, R)) - continue; - - BasicBlock *UParent = UI->getParent(); - if (R->contains(UParent) && isEscapeOperand(Inst, UParent, R)) - LoadInside.push_back(UI); - } - - if (LoadOutside.empty() && LoadInside.empty()) - return false; - - // Create the alloca. - AllocaInst *Slot = new AllocaInst( - Inst->getType(), 0, Inst->getName() + ".s2a", AllocaBlock->begin()); - assert(!isa(Inst) && "Unexpect Invoke in Scop!"); - - // Store right after Inst, and make sure the position is after all phi nodes. - BasicBlock::iterator StorePos; - if (isa(Inst)) { - StorePos = Inst->getParent()->getFirstNonPHI(); - } else { - StorePos = Inst; - StorePos++; - } - (void)new StoreInst(Inst, Slot, StorePos); - - if (!LoadOutside.empty()) { - LoadInst *ExitLoad = new LoadInst(Slot, Inst->getName() + ".loadoutside", - false, R->getExit()->getFirstNonPHI()); - - while (!LoadOutside.empty()) { - Instruction *U = LoadOutside.pop_back_val(); - SE->forgetValue(U); - U->replaceUsesOfWith(Inst, ExitLoad); - } - } - - while (!LoadInside.empty()) { - Instruction *U = LoadInside.pop_back_val(); - assert(!isa(U) && "Can not handle PHI node inside!"); - SE->forgetValue(U); - LoadInst *L = new LoadInst(Slot, Inst->getName() + ".loadarray", false, U); - U->replaceUsesOfWith(Inst, L); - } - - return true; -} - -bool IndependentBlocks::translateScalarToArray(BasicBlock *BB, - const Region *R) { - bool changed = false; - - SmallVector Insts; - for (BasicBlock::iterator II = BB->begin(), IE = --BB->end(); II != IE; ++II) - Insts.push_back(II); - - while (!Insts.empty()) { - Instruction *Inst = Insts.pop_back_val(); - changed |= translateScalarToArray(Inst, R); - } - - return changed; -} - -bool IndependentBlocks::isIndependentBlock(const Region *R, - BasicBlock *BB) const { - for (Instruction &Inst : *BB) { - if (canSynthesize(&Inst, LI, SE, R)) - continue; - - // A value inside the Scop is referenced outside. - for (User *U : Inst.users()) { - if (isEscapeUse(U, R)) { - DEBUG(dbgs() << "Instruction not independent:\n"); - DEBUG(dbgs() << "Instruction used outside the Scop!\n"); - DEBUG(Inst.print(dbgs())); - DEBUG(dbgs() << "\n"); - return false; - } - } - - if (DisableIntraScopScalarToArray) - continue; - - for (Value *Op : Inst.operands()) { - if (isEscapeOperand(Op, BB, R)) { - DEBUG(dbgs() << "Instruction in function '"; - BB->getParent()->printAsOperand(dbgs(), false); - dbgs() << "' not independent:\n"); - DEBUG(dbgs() << "Uses invalid operator\n"); - DEBUG(Inst.print(dbgs())); - DEBUG(dbgs() << "\n"); - DEBUG(dbgs() << "Invalid operator is: "; - Op->printAsOperand(dbgs(), false); dbgs() << "\n"); - return false; - } - } - } - - return true; -} - -bool IndependentBlocks::areAllBlocksIndependent(const Region *R) const { - for (BasicBlock *BB : R->blocks()) - if (!isIndependentBlock(R, BB)) - return false; - - return true; -} - -void IndependentBlocks::getAnalysisUsage(AnalysisUsage &AU) const { - // FIXME: If we set preserves cfg, the cfg only passes do not need to - // be "addPreserved"? - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); -#ifdef CLOOG_FOUND - AU.addPreserved(); -#endif -} - -bool IndependentBlocks::runOnFunction(llvm::Function &F) { - bool Changed = false; - - RI = &getAnalysis().getRegionInfo(); - LI = &getAnalysis(); - SD = &getAnalysis(); - SE = &getAnalysis(); - - AllocaBlock = &F.getEntryBlock(); - - DEBUG(dbgs() << "Run IndepBlock on " << F.getName() << '\n'); - - for (const Region *R : *SD) { - Changed |= createIndependentBlocks(R); - Changed |= eliminateDeadCode(R); - // This may change the RegionTree. - Changed |= splitExitBlock(const_cast(R)); - } - - DEBUG(dbgs() << "Before Scalar to Array------->\n"); - DEBUG(F.dump()); - - for (const Region *R : *SD) - Changed |= translateScalarToArray(R); - - DEBUG(dbgs() << "After Independent Blocks------------->\n"); - DEBUG(F.dump()); - - verifyAnalysis(); - - return Changed; -} - -void IndependentBlocks::verifyAnalysis() const { - for (const Region *R : *SD) - verifyScop(R); -} - -void IndependentBlocks::verifyScop(const Region *R) const { - assert(areAllBlocksIndependent(R) && "Cannot generate independent blocks"); -} - -char IndependentBlocks::ID = 0; -char &polly::IndependentBlocksID = IndependentBlocks::ID; - -Pass *polly::createIndependentBlocksPass() { return new IndependentBlocks(); } - -INITIALIZE_PASS_BEGIN(IndependentBlocks, "polly-independent", - "Polly - Create independent blocks", false, false); -INITIALIZE_PASS_DEPENDENCY(LoopInfo); -INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution); -INITIALIZE_PASS_DEPENDENCY(ScopDetection); -INITIALIZE_PASS_END(IndependentBlocks, "polly-independent", - "Polly - Create independent blocks", false, false) Index: test/Cloog/CodeGen/constant_condition.ll =================================================================== --- test/Cloog/CodeGen/constant_condition.ll +++ test/Cloog/CodeGen/constant_condition.ll @@ -1,4 +1,4 @@ -;RUN: opt %loadPolly -polly-prepare -polly-detect-scops-in-regions-without-loops -polly-detect-scops-in-functions-without-loops -polly-cloog -analyze < %s | FileCheck %s +;RUN: opt %loadPolly -polly-detect-scops-in-regions-without-loops -polly-detect-scops-in-functions-without-loops -polly-cloog -analyze < %s | FileCheck %s ;#include ;int A[1]; Index: test/CodePreparation/if_condition.ll =================================================================== --- test/CodePreparation/if_condition.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: opt %loadPolly -polly-prepare -S < %s | FileCheck %s -; RUN: opt %loadPolly -polly-prepare -S -polly-codegen-scev < %s | FileCheck %s - -; void f(long A[], long N) { -; long i; -; for (i = 0; i < N; ++i) -; A[i] = i; -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-unknown-linux-gnu" - -define void @f(i64* %A, i64 %N) nounwind { -entry: - fence seq_cst - br label %for.i -; CHECK: entry: -; CHECK: %value.reg2mem = alloca i64 -; CHECK: br label %entry.split - -for.i: - %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %merge ] - %scevgep = getelementptr i64* %A, i64 %indvar - %cmp = icmp eq i64 %indvar, 3 - br i1 %cmp, label %then, label %else - -then: - %add_two = add i64 %indvar, 2 - br label %merge -; CHECK: then: -; CHECK: %add_two = add i64 %indvar, 2 -; CHECK: store i64 %add_two, i64* %value.reg2mem -; CHECK: br label %merge - -else: - %add_three = add i64 %indvar, 4 - br label %merge -; CHECK: else: -; CHECK: %add_three = add i64 %indvar, 4 -; CHECK: store i64 %add_three, i64* %value.reg2mem -; CHECK: br label %merge - -merge: - %value = phi i64 [ %add_two, %then ], [ %add_three, %else ] - store i64 %value, i64* %scevgep - %indvar.next = add nsw i64 %indvar, 1 - %exitcond = icmp eq i64 %indvar.next, %N - br i1 %exitcond, label %return, label %for.i - -return: - fence seq_cst - ret void -} Index: test/CodePreparation/multiple_loops_trivial_phis.ll =================================================================== --- test/CodePreparation/multiple_loops_trivial_phis.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt %loadPolly -S -polly-prepare < %s | FileCheck %s -; ModuleID = 'multiple_loops_trivial_phis.ll' -; -; int f(int * __restrict__ A) { -; int i, j, sum = 0; -; for (i = 0; i < 100; i++) { -; sum *= 2; -; for (j = 0; j < 100; j++) { -; sum += A[i+j]; -; } -; } -; return sum; -; } - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind uwtable -define i32 @f(i32* noalias %A) #0 { -entry: - br label %for.body - -for.body: ; preds = %entry, %for.inc5 - %sum.04 = phi i32 [ 0, %entry ], [ %add4.lcssa, %for.inc5 ] - %indvars.iv23 = phi i64 [ 0, %entry ], [ %2, %for.inc5 ] - %mul = shl nsw i32 %sum.04, 1 - br label %for.inc - -for.inc: ; preds = %for.body, %for.inc - %sum.12 = phi i32 [ %mul, %for.body ], [ %add4, %for.inc ] - %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ] - %0 = add i64 %indvars.iv23, %indvars.iv1 - %arrayidx = getelementptr i32* %A, i64 %0 - %tmp5 = load i32* %arrayidx, align 4 - %add4 = add nsw i32 %tmp5, %sum.12 - %1 = add nuw nsw i64 %indvars.iv1, 1 - %exitcond5 = icmp eq i64 %1, 100 - br i1 %exitcond5, label %for.inc5, label %for.inc - -for.inc5: ; preds = %for.inc - %add4.lcssa = phi i32 [ %add4, %for.inc ] - %2 = add nuw nsw i64 %indvars.iv23, 1 - %exitcond = icmp eq i64 %2, 100 - br i1 %exitcond, label %for.end7, label %for.body - -for.end7: ; preds = %for.inc5 - %add4.lcssa.lcssa = phi i32 [ %add4.lcssa, %for.inc5 ] - ret i32 %add4.lcssa.lcssa -} - -attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - -; Verify that only two allocas are created. (instead of 4!) -; CHECK: alloca -; CHECK: alloca -; CHECK-NOT: alloca Index: test/CodePreparation/single_loop_trivial_phi.ll =================================================================== --- test/CodePreparation/single_loop_trivial_phi.ll +++ /dev/null @@ -1,50 +0,0 @@ -; RUN: opt %loadPolly -S -polly-prepare < %s | FileCheck %s -; ModuleID = 'single_loop_trivial_phi.ll' -; -; int f(int *A, int N) { -; int i, sum = 0; -; for (i = 0; i < N; i++) -; sum += A[i]; -; return sum; -; } -; ModuleID = 'stack-slots.ll' -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: nounwind uwtable -define i32 @f(i32* %A, i32 %N) #0 { -entry: - %cmp1 = icmp sgt i32 %N, 0 - br i1 %cmp1, label %for.inc.lr.ph, label %for.end - -for.inc.lr.ph: ; preds = %entry - %0 = zext i32 %N to i64 - br label %for.inc - -for.inc: ; preds = %for.inc.lr.ph, %for.inc - %sum.03 = phi i32 [ 0, %for.inc.lr.ph ], [ %add, %for.inc ] - %indvars.iv2 = phi i64 [ 0, %for.inc.lr.ph ], [ %indvars.iv.next, %for.inc ] - %arrayidx = getelementptr i32* %A, i64 %indvars.iv2 - %tmp1 = load i32* %arrayidx, align 4 - %add = add nsw i32 %tmp1, %sum.03 - %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1 - %exitcond = icmp ne i64 %indvars.iv.next, %0 - br i1 %exitcond, label %for.inc, label %for.cond.for.end_crit_edge - -for.cond.for.end_crit_edge: ; preds = %for.inc - %add.lcssa = phi i32 [ %add, %for.inc ] - br label %for.end - -for.end: ; preds = %for.cond.for.end_crit_edge, %entry - %sum.0.lcssa = phi i32 [ %add.lcssa, %for.cond.for.end_crit_edge ], [ 0, %entry ] - ret i32 %sum.0.lcssa -} - -attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - -; Verify that only two allocas are created. -; Both are needed for the %sum.0 PHI node and none should be created for the -; %sum.0.lcssa PHI node -; CHECK: alloca -; CHECK: alloca -; CHECK-NOT: alloca Index: test/IndependentBlocks/inter_bb_scalar_dep.ll =================================================================== --- test/IndependentBlocks/inter_bb_scalar_dep.ll +++ /dev/null @@ -1,58 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -polly-codegen-scev -S < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -disable-polly-intra-scop-scalar-to-array -S < %s | FileCheck %s -check-prefix=SCALARACCESS -; RUN: opt %loadPolly -basicaa -polly-independent -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -S < %s | FileCheck %s -check-prefix=SCALARACCESS - -; void f(long A[], int N, int *init_ptr) { -; long i, j; -; -; for (i = 0; i < N; ++i) { -; init = *init_ptr; -; for (i = 0; i < N; ++i) { -; A[i] = init + 2; -; } -; } -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-unknown-linux-gnu" - -define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { -entry: - -; CHECK: entry -; CHECK: %init.s2a = alloca i64 -; CHECK: br label %for.i - -; SCALARACCESS-NOT: alloca - br label %for.i - -for.i: - %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] - %indvar.i.next = add nsw i64 %indvar.i, 1 - br label %entry.next - -entry.next: - %init = load i64* %init_ptr -; SCALARACCESS-NOT: store - br label %for.j - -for.j: - %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] - %init_plus_two = add i64 %init, 2 -; CHECK: %init.loadarray = load i64* %init.s2a -; CHECK: %init_plus_two = add i64 %init.loadarray, 2 -; SCALARACCESS: %init_plus_two = add i64 %init, 2 - %scevgep = getelementptr i64* %A, i64 %indvar.j - store i64 %init_plus_two, i64* %scevgep - %indvar.j.next = add nsw i64 %indvar.j, 1 - %exitcond.j = icmp eq i64 %indvar.j.next, %N - br i1 %exitcond.j, label %for.i.end, label %for.j - -for.i.end: - %exitcond.i = icmp eq i64 %indvar.i.next, %N - br i1 %exitcond.i, label %return, label %for.i - -return: - ret void -} Index: test/IndependentBlocks/intra_and_inter_bb_scalar_dep.ll =================================================================== --- test/IndependentBlocks/intra_and_inter_bb_scalar_dep.ll +++ /dev/null @@ -1,67 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -polly-codegen-scev -S < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -disable-polly-intra-scop-scalar-to-array -S < %s | FileCheck %s -check-prefix=SCALARACCESS -; RUN: opt %loadPolly -basicaa -polly-independent -disable-polly-intra-scop-scalar-to-array -polly-codegen-scev -S < %s | FileCheck %s -check-prefix=SCALARACCESS - -; void f(long A[], int N, int *init_ptr) { -; long i, j; -; -; for (i = 0; i < N; ++i) { -; init = *init_ptr; -; for (i = 0; i < N; ++i) { -; init2 = *init_ptr; -; A[i] = init + init2; -; } -; } -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-unknown-linux-gnu" - -define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { -entry: - -; CHECK: entry -; CHECK: %init.s2a = alloca i64 -; CHECK: br label %for.i - -; SCALARACCESS-NOT: alloca - br label %for.i - -for.i: - %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] - %indvar.i.next = add nsw i64 %indvar.i, 1 - br label %entry.next - -entry.next: - %init = load i64* %init_ptr -; SCALARACCESS-NOT: store - br label %for.j - -for.j: - %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] - %init_2 = load i64* %init_ptr - %init_sum = add i64 %init, %init_2 -; CHECK: %init_2 = load i64* %init_ptr -; CHECK: %init.loadarray = load i64* %init.s2a -; CHECK: %init_sum = add i64 %init.loadarray, %init_2 - -; The SCEV of %init_sum is (%init + %init_2). It is referring to both an -; UnknownValue in the same and in a different basic block. We want only the -; reference to the different basic block to be replaced. - -; SCALARACCESS: %init_2 = load i64* %init_ptr -; SCALARACCESS: %init_sum = add i64 %init, %init_2 - %scevgep = getelementptr i64* %A, i64 %indvar.j - store i64 %init_sum, i64* %scevgep - %indvar.j.next = add nsw i64 %indvar.j, 1 - %exitcond.j = icmp eq i64 %indvar.j.next, %N - br i1 %exitcond.j, label %for.i.end, label %for.j - -for.i.end: - %exitcond.i = icmp eq i64 %indvar.i.next, %N - br i1 %exitcond.i, label %return, label %for.i - -return: - ret void -} Index: test/IndependentBlocks/intra_bb_scalar_dep.ll =================================================================== --- test/IndependentBlocks/intra_bb_scalar_dep.ll +++ /dev/null @@ -1,54 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -polly-codegen-scev -S < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -disable-polly-intra-scop-scalar-to-array -S < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -disable-polly-intra-scop-scalar-to-array -polly-codegen-scev -S < %s | FileCheck %s - -; void f(long A[], int N, int *init_ptr) { -; long i, j; -; -; for (i = 0; i < N; ++i) { -; for (i = 0; i < N; ++i) { -; init = *init_ptr; -; A[i] = init + 2; -; } -; } -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-unknown-linux-gnu" - -define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { -entry: - -; CHECK: entry -; CHECK: br label %for.i - br label %for.i - -for.i: - %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] - %indvar.i.next = add nsw i64 %indvar.i, 1 - br label %entry.next - -entry.next: - br label %for.j - -for.j: - %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] - %init = load i64* %init_ptr - %init_plus_two = add i64 %init, 2 -; The scalar evolution of %init_plus_two is (2 + %init). So we have a -; non-trivial scalar evolution referring to a value in the same basic block. -; We want to ensure that this scalar is not translated into a memory copy. - %scevgep = getelementptr i64* %A, i64 %indvar.j - store i64 %init_plus_two, i64* %scevgep - %indvar.j.next = add nsw i64 %indvar.j, 1 - %exitcond.j = icmp eq i64 %indvar.j.next, %N - br i1 %exitcond.j, label %for.i.end, label %for.j - -for.i.end: - %exitcond.i = icmp eq i64 %indvar.i.next, %N - br i1 %exitcond.i, label %return, label %for.i - -return: - ret void -} Index: test/IndependentBlocks/phi_outside_scop.ll =================================================================== --- test/IndependentBlocks/phi_outside_scop.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -polly-codegen-scev -S < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define void @phi_nodes_outside() { -entry: - br label %for.i.1 - -for.i.1: - %i.1 = phi i32 [ %i.1.next, %for.i.1 ], [ 0, %entry ] - %i.1.next = add nsw i32 %i.1, 1 - br i1 false, label %for.i.1 , label %for.i.2.preheader - -for.i.2.preheader: - br label %for.i.2 - -for.i.2: -; The value of %i.1.next is used outside of the scop in a PHI node. - %i.2 = phi i32 [ %i.2.next , %for.i.2 ], [ %i.1.next, %for.i.2.preheader ] - %i.2.next = add nsw i32 %i.2, 1 - fence seq_cst - br i1 false, label %for.i.2, label %cleanup - -cleanup: - ret void -} - -; CHECK: store i32 %i.1.next, i32* %i.1.next.s2a - -; CHECK: for.i.2.preheader: -; CHECK: %i.1.next.loadoutside = load i32* %i.1.next.s2a - -; CHECK: for.i.2: -; CHECK: %i.2 = phi i32 [ %i.2.next, %for.i.2 ], [ %i.1.next.loadoutside, %for.i.2.preheader ] - Index: test/IndependentBlocks/scalar_to_array.ll =================================================================== --- test/IndependentBlocks/scalar_to_array.ll +++ /dev/null @@ -1,270 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent < %s -S | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -polly-codegen-scev < %s -S | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-independent -disable-polly-intra-scop-scalar-to-array -S < %s | FileCheck %s -check-prefix=SCALARACCESS -; RUN: opt %loadPolly -basicaa -polly-independent -disable-polly-intra-scop-scalar-to-array -polly-codegen-scev < %s -S | FileCheck %s -check-prefix=SCALARACCESS - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -@A = common global [1024 x float] zeroinitializer, align 8 - -define i32 @empty() nounwind { -entry: - fence seq_cst - br label %for.cond - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - br label %for.inc - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - - -; CHECK: @array_access() -define i32 @array_access() nounwind { -entry: - fence seq_cst - br label %for.cond -; CHECK: entry: -; CHECK-NOT: alloca -; SCALARACCESS: entry: -; SCALARACCESS-NOT: alloca - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar - %float = uitofp i64 %indvar to float - store float %float, float* %arrayidx - br label %for.inc - -; CHECK: for.body: -; CHECK: %float = uitofp i64 %indvar to float -; CHECK: store float %float, float* %arrayidx -; SCALARACCESS: for.body: -; SCALARACCESS: %float = uitofp i64 %indvar to float -; SCALARACCESS: store float %float, float* %arrayidx - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - -; CHECK: @intra_scop_dep() -define i32 @intra_scop_dep() nounwind { -entry: - fence seq_cst - br label %for.cond - -; CHECK: entry: -; CHECK: %scalar.s2a = alloca float -; CHECK: fence -; SCALARACCESS: entry: -; SCALARACCESS-NOT: alloca -; SCALARACCESS: fence - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body.a, label %return - -for.body.a: - %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar - %scalar = load float* %arrayidx - br label %for.body.b - -; CHECK: for.body.a: -; CHECK: %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar -; CHECK: %scalar = load float* %arrayidx -; CHECK: store float %scalar, float* %scalar.s2a -; CHECK: br label %for.body.b - -; SCALARACCESS: for.body.a: -; SCALARACCESS: %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar -; SCALARACCESS: %scalar = load float* %arrayidx -; SCALARACCESS-NOT: store -; SCALARACCESS: br label %for.body.b - -for.body.b: - %arrayidx2 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar - %float = uitofp i64 %indvar to float - %sum = fadd float %scalar, %float - store float %sum, float* %arrayidx2 - br label %for.inc - -; CHECK: for.body.b: -; CHECK: %arrayidx2 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar -; CHECK: %float = uitofp i64 %indvar to float -; CHECK: %scalar.loadarray = load float* %scalar.s2a -; CHECK: %sum = fadd float %scalar.loadarray, %float -; CHECK: store float %sum, float* %arrayidx2 -; CHECK: br label %for.inc - -; SCALARACCESS: for.body.b: -; SCALARACCESS: %arrayidx2 = getelementptr [1024 x float]* @A, i64 0, i64 %indvar -; SCALARACCESS: %float = uitofp i64 %indvar to float -; SCALARACCESS-NOT: load -; SCALARACCESS: %sum = fadd float %scalar, %float -; SCALARACCESS: store float %sum, float* %arrayidx2 -; SCALARACCESS: br label %for.inc - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - -; It is not possible to have a scop which accesses a scalar element that is -; a global variable. All global variables are pointers containing possibly -; a single element. Hence they do not need to be handled anyways. -; Please note that this is still required when scalar to array rewritting is -; disabled. - -; CHECK: @use_after_scop() -define i32 @use_after_scop() nounwind { -entry: - fence seq_cst - br label %for.head -; CHECK: entry: -; CHECK: %scalar.s2a = alloca float -; CHECK: fence - -; SCALARACCESS: entry: -; SCALARACCESS: %scalar.s2a = alloca float -; SCALARACCESS: fence - -for.head: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - br label %for.body - -for.body: - %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar - %scalar = load float* %arrayidx - br label %for.inc - -; CHECK: for.body: -; CHECK: %scalar = load float* %arrayidx -; CHECK: store float %scalar, float* %scalar.s2a - -; SCALARACCESS: for.body: -; SCALARACCESS: %scalar = load float* %arrayidx -; SCALARACCESS: store float %scalar, float* %scalar.s2a - -for.inc: - %indvar.next = add i64 %indvar, 1 - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.head, label %for.after - -for.after: - fence seq_cst - %return_value = fptosi float %scalar to i32 - br label %return - -; CHECK: for.after: -; CHECK: %scalar.loadoutside = load float* %scalar.s2a -; CHECK: fence seq_cst -; CHECK: %return_value = fptosi float %scalar.loadoutside to i32 - -; SCALARACCESS: for.after: -; SCALARACCESS: %scalar.loadoutside = load float* %scalar.s2a -; SCALARACCESS: fence seq_cst -; SCALARACCESS: %return_value = fptosi float %scalar.loadoutside to i32 - -return: - ret i32 %return_value -} - -; We currently do not transform scalar references, that have only read accesses -; in the scop. There are two reasons for this: -; -; o We don't introduce additional memory references which may yield to compile -; time overhead. -; o For integer values, such a translation may block the use of scalar -; evolution on those values. -; -; CHECK: @before_scop() -define i32 @before_scop() nounwind { -entry: - br label %preheader - -preheader: - %scalar = fadd float 4.0, 5.0 - fence seq_cst - br label %for.cond - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %preheader ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar - store float %scalar, float* %arrayidx - br label %for.inc - -; CHECK: for.body: -; CHECK: store float %scalar, float* %arrayidx - -; SCALARACCESS: for.body: -; SCALARACCESS: store float %scalar, float* %arrayidx - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - -; Currently not working -; CHECK: @param_before_scop( -define i32 @param_before_scop(float %scalar) nounwind { -entry: - fence seq_cst - br label %for.cond -; CHECK: entry: -; CHECK: fence - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 %indvar - store float %scalar, float* %arrayidx - br label %for.inc - -; CHECK: for.body: -; CHECK: store float %scalar, float* %arrayidx - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} Index: test/IndependentBlocks/scev-invalidated.ll =================================================================== --- test/IndependentBlocks/scev-invalidated.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: opt %loadPolly -polly-codegen-scev -polly-independent < %s -target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define void @arc_either() { -entry: - %ang2.2.reg2mem = alloca i64 - br i1 undef, label %return, label %if.then6 - -if.then6: - %rem7 = srem i64 undef, 1474560 - br i1 false, label %if.else, label %return - -if.else: - %add16 = add nsw i64 %rem7, 1474560 - %rem7.add16 = select i1 undef, i64 %rem7, i64 %add16 - store i64 %rem7.add16, i64* %ang2.2.reg2mem - br label %return - -return: - ret void -} Index: test/Isl/Ast/dependence_distance_parametric_expr.ll =================================================================== --- test/Isl/Ast/dependence_distance_parametric_expr.ll +++ test/Isl/Ast/dependence_distance_parametric_expr.ll @@ -3,7 +3,7 @@ ; void f(int *A, int N, int c, int v) { ; CHECK: #pragma minimal dependence distance: 1 ; for (int j = 0; j < N; j++) -; CHECK: #pragma minimal dependence distance: c + v >= 1 ? c + v : -c - v +; CHECK: #pragma minimal dependence distance: v + c >= 1 ? v + c : -v - c ; for (int i = 0; i < N; i++) ; A[i + c + v] = A[i] + 1; ; } Index: test/Isl/CodeGen/constant_condition.ll =================================================================== --- test/Isl/CodeGen/constant_condition.ll +++ test/Isl/CodeGen/constant_condition.ll @@ -1,4 +1,4 @@ -;RUN: opt %loadPolly -polly-prepare -polly-detect-scops-in-regions-without-loops -polly-detect-scops-in-functions-without-loops -polly-ast -analyze < %s | FileCheck %s +;RUN: opt %loadPolly -polly-detect-scops-in-regions-without-loops -polly-detect-scops-in-functions-without-loops -polly-ast -analyze < %s | FileCheck %s ;#include ;int A[1]; Index: test/Isl/CodeGen/loop_with_condition_nested.ll =================================================================== --- test/Isl/CodeGen/loop_with_condition_nested.ll +++ test/Isl/CodeGen/loop_with_condition_nested.ll @@ -215,4 +215,4 @@ ; LOOPS: Printing analysis 'Natural Loop Information' for function 'loop_with_condition': ; LOOPS: Loop at depth 1 containing: %1
,%2,%4,%7,%6,%8,%9,%10 ; LOOPS: Loop at depth 1 containing: -; LOOPS: %polly.loop_header
,%polly.cond,%polly.merge,%polly.then,%polly.else,%polly.stmt.,%polly.cond3,%polly.merge4,%polly.then5,%polly.else6,%polly.stmt.7,%polly.stmt.8 +; LOOPS: %polly.loop_header
,%polly.cond,%polly.merge,%polly.then,%polly.else,%polly.stmt.,%polly.cond3,%polly.merge4,%polly.then5,%polly.else6,%polly.stmt.7,%polly.stmt.9 Index: test/ScopDetect/simple_loop_two_phi_nodes.ll =================================================================== --- test/ScopDetect/simple_loop_two_phi_nodes.ll +++ test/ScopDetect/simple_loop_two_phi_nodes.ll @@ -1,5 +1,4 @@ ; RUN: opt %loadPolly -polly-detect -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -polly-detect -polly-codegen-scev -analyze < %s | FileCheck %s -check-prefix=CHECK-SCEV ; void f(long A[], long N) { ; long i; @@ -32,5 +31,4 @@ ret void } -; CHECK-NOT: Valid Region for Scop -; CHECK-SCEV: Valid Region for Scop: for.i => return +; CHECK: Valid Region for Scop: for.i => return Index: test/ScopInfo/bug_2011_1_5.ll =================================================================== --- test/ScopInfo/bug_2011_1_5.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: opt %loadPolly -polly-analyze-ir -analyze < %s - -; Bug description: Alias Analysis thinks IntToPtrInst aliases with alloca instructions created by IndependentBlocks Pass. -; This will trigger the assertion when we are verifying the SCoP after IndependentBlocks. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-unknown-linux-gnu" - -%struct.precisionType = type { i16, i16, i16, i8, [1 x i16] } - -define void @main() nounwind { -entry: - br label %bb1.i198.i - -bb1.i198.i: ; preds = %bb.i197.i, %psetq.exit196.i - %tmp51.i = inttoptr i64 0 to %struct.precisionType* - br i1 undef, label %bb1.i210.i, label %bb.i209.i - -bb.i209.i: ; preds = %bb1.i198.i - br label %bb1.i210.i - -bb1.i210.i: ; preds = %bb.i209.i, %bb1.i198.i - %0 = icmp eq i64 0, 0 - br i1 %0, label %bb1.i216.i, label %bb.i215.i - -bb.i215.i: ; preds = %bb1.i210.i - %1 = getelementptr inbounds %struct.precisionType* %tmp51.i, i64 0, i32 0 - store i16 undef, i16* %1, align 2 - br label %bb1.i216.i - -bb1.i216.i: ; preds = %bb.i215.i, %bb1.i210.i - br i1 undef, label %psetq.exit220.i, label %bb2.i217.i - -bb2.i217.i: ; preds = %bb1.i216.i - br i1 undef, label %bb3.i218.i, label %psetq.exit220.i - -bb3.i218.i: ; preds = %bb2.i217.i - br label %psetq.exit220.i - -psetq.exit220.i: ; preds = %bb3.i218.i, %bb2.i217.i, %bb1.i216.i - br i1 undef, label %bb14.i76, label %bb15.i77 - -bb14.i76: ; preds = %psetq.exit220.i - unreachable - -bb15.i77: ; preds = %psetq.exit220.i - br i1 %0, label %psetq.exit238.i, label %bb2.i235.i - -bb2.i235.i: ; preds = %bb15.i77 - br i1 undef, label %bb3.i236.i, label %psetq.exit238.i - -bb3.i236.i: ; preds = %bb2.i235.i - unreachable - -psetq.exit238.i: ; preds = %bb2.i235.i, %bb15.i77 - unreachable - -bb56.i.loopexit: ; preds = %psetq.exit172.i - unreachable -} Index: test/ScopInfo/loop_carry.ll =================================================================== --- test/ScopInfo/loop_carry.ll +++ test/ScopInfo/loop_carry.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -basicaa -polly-prepare -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-linux-gnu" Index: test/ScopInfo/phi_not_grouped_at_top.ll =================================================================== --- test/ScopInfo/phi_not_grouped_at_top.ll +++ test/ScopInfo/phi_not_grouped_at_top.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-prepare -analyze < %s +; RUN: opt %loadPolly -analyze < %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-linux-gnu" Index: test/ScopInfo/phi_scalar_simple_1.ll =================================================================== --- /dev/null +++ test/ScopInfo/phi_scalar_simple_1.ll @@ -0,0 +1,80 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; int jd(int *restrict A, int x) { +; for (int i = 0; i < 32; i++) +; for (int j = 0; j < 32; j++) +; x += A[i]; +; return x; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define i32 @jd(i32* noalias %A, i32 %x) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc4, %entry +; CHECK: Domain := +; CHECK: { Stmt_for_cond[i0] : i0 >= 0 and i0 <= 32 }; + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc4 ], [ 0, %entry ] + %x.addr.0 = phi i32 [ %x, %entry ], [ %x.addr.1.lcssa, %for.inc4 ] +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_cond[i0] -> MemRef_x_addr_0[] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_cond[i0] -> MemRef_x_addr_1_lcssa[] : i0 >= 1 and i0 <= 32 }; + %cmp = icmp slt i64 %indvars.iv, 32 + br i1 %cmp, label %for.body, label %for.end6 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body +; CHECK: Domain := +; CHECK: { Stmt_for_cond1[i0, i1] : i0 >= 0 and i0 <= 31 and i1 >= 0 and i1 <= 32 }; + %x.addr.1 = phi i32 [ %x.addr.0, %for.body ], [ %add, %for.inc ] +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_cond1[i0, i1] -> MemRef_x_addr_1[] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_cond1[i0, 0] -> MemRef_x_addr_0[] : i0 >= 0 and i0 <= 31 }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_cond1[i0, i1] -> MemRef_add[] : i0 >= 0 and i0 <= 31 and i1 >= 1 and i1 <= 32 }; + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 32 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + br label %for.inc + +for.inc: ; preds = %for.body3 +; CHECK: Domain := +; CHECK: { Stmt_for_inc[i0, i1] : i0 >= 0 and i0 <= 31 and i1 >= 0 and i1 <= 31 }; + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp1 = load i32* %arrayidx, align 4 +; CHECK-DAG: ReadAccess := [Reduction Type: NONE] +; CHECK-DAG: { Stmt_for_inc[i0, i1] -> MemRef_A[i0] }; + %add = add nsw i32 %x.addr.1, %tmp1 +; CHECK-DAG: ReadAccess := [Reduction Type: NONE] +; CHECK-DAG: { Stmt_for_inc[i0, i1] -> MemRef_x_addr_1[] }; +; CHECK-DAG: MustWriteAccess := [Reduction Type: NONE] +; CHECK-DAG: { Stmt_for_inc[i0, i1] -> MemRef_add[] }; + %inc = add nsw i32 %j.0, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 +; CHECK: Domain := +; CHECK: { Stmt_for_end[i0] : i0 >= 0 and i0 <= 31 }; + %x.addr.1.lcssa = phi i32 [ %x.addr.1, %for.cond1 ] +; CHECK: MustWriteAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_end[i0] -> MemRef_x_addr_1_lcssa[] }; +; CHECK: ReadAccess := [Reduction Type: NONE] +; CHECK: { Stmt_for_end[i0] -> MemRef_x_addr_1[] : i0 >= 0 and i0 <= 31 }; + br label %for.inc4 + +for.inc4: ; preds = %for.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end6: ; preds = %for.cond + %x.addr.0.lcssa = phi i32 [ %x.addr.0, %for.cond ] + ret i32 %x.addr.0.lcssa +} Index: test/ScopInfo/phi_with_invoke_edge.ll =================================================================== --- test/ScopInfo/phi_with_invoke_edge.ll +++ test/ScopInfo/phi_with_invoke_edge.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-prepare -polly-detect -analyze < %s +; RUN: opt %loadPolly -polly-detect -analyze < %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-linux-gnu" Index: test/ScopInfo/scalar.ll =================================================================== --- test/ScopInfo/scalar.ll +++ test/ScopInfo/scalar.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-scops -analyze -disable-polly-intra-scop-scalar-to-array < %s | FileCheck %s +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" Index: test/TempScop/inter_bb_scalar_dep.ll =================================================================== --- test/TempScop/inter_bb_scalar_dep.ll +++ test/TempScop/inter_bb_scalar_dep.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -analyze < %s | FileCheck %s ; void f(long A[], int N, int *init_ptr) { ; long i, j; Index: test/TempScop/intra_and_inter_bb_scalar_dep.ll =================================================================== --- test/TempScop/intra_and_inter_bb_scalar_dep.ll +++ test/TempScop/intra_and_inter_bb_scalar_dep.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -analyze < %s | FileCheck %s ; void f(long A[], int N, int *init_ptr) { ; long i, j; Index: test/TempScop/intra_bb_scalar_dep.ll =================================================================== --- test/TempScop/intra_bb_scalar_dep.ll +++ test/TempScop/intra_bb_scalar_dep.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -analyze < %s | FileCheck %s ; void f(long A[], int N, int *init_ptr) { ; long i, j; Index: test/TempScop/scalar_to_array.ll =================================================================== --- test/TempScop/scalar_to_array.ll +++ test/TempScop/scalar_to_array.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-analyze-ir -disable-polly-intra-scop-scalar-to-array -polly-codegen-scev -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-analyze-ir -polly-codegen-scev -analyze < %s | FileCheck %s ; ModuleID = 'scalar_to_array.ll' target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" Index: test/TempScop/tempscop-printing.ll =================================================================== --- test/TempScop/tempscop-printing.ll +++ test/TempScop/tempscop-printing.ll @@ -1,5 +1,4 @@ ; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -polly-analyze-ir -analyze -disable-polly-intra-scop-scalar-to-array < %s | FileCheck %s -check-prefix=SCALARACCESS ; void f(long A[], int N, int *init_ptr) { ; long i, j; @@ -26,23 +25,16 @@ entry.next: ; CHECK: BB: entry.next -; SCALARACCESS: BB: entry.next %init = load i64* %init_ptr -; CHECK: Read init_ptr[0] -; CHECK: Write init.s2a[0] -; SCALARACCESS: Read init_ptr[0] -; SCALARACCESS: Write init[0] +; CHECK: Read init_ptr[0] +; CHECK: Write init[0] br label %for.j for.j: %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] ; CHECK: BB: for.j -; CHECK: Read init.s2a[0] +; CHECK: Read init ; CHECK: Write A[{0,+,8}<%for.j>] - -; SCALARACCESS: BB: for.j -; SCALARACCESS: Read init -; SCALARACCESS: Write A[{0,+,8}<%for.j>] %init_plus_two = add i64 %init, 2 %scevgep = getelementptr i64* %A, i64 %indvar.j store i64 %init_plus_two, i64* %scevgep @@ -68,19 +60,19 @@ br label %entry.next entry.next: -; SCALARACCESS: BB: entry.next +; CHECK: BB: entry.next %init = load i64* %init_ptr -; SCALARACCESS: Read init_ptr[0] -; SCALARACCESS: Write init[0] +; CHECK: Read init_ptr[0] +; CHECK: Write init[0] br label %for.j for.j: -; SCALARACCESS: BB: for.j +; CHECK: BB: for.j %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] %scevgep = getelementptr i64* %A, i64 %indvar.j store i64 %init, i64* %scevgep -; SCALARACCESS: Read init -; SCALARACCESS: Write A[{0,+,8}<%for.j>] +; CHECK: Read init +; CHECK: Write A[{0,+,8}<%for.j>] %indvar.j.next = add nsw i64 %indvar.j, 1 %exitcond.j = icmp eq i64 %indvar.j.next, %N br i1 %exitcond.j, label %for.i.end, label %for.j Index: www/documentation/passes.html =================================================================== --- www/documentation/passes.html +++ www/documentation/passes.html @@ -20,7 +20,6 @@

Front End

    -
  • polly-prepare Prepare code for Polly
  • polly-detect Detect SCoPs in functions
  • polly-analyze-ir Analyse the LLVM-IR in the detected SCoPs
  • polly-independent Create independent blocks