Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -387,6 +387,17 @@ /// the original value in the non-optimized SCoP. void createScalarFinalization(Region &R); + /// @brief Recompute all scalars needed in this statement. + /// + /// During SCoP creation scalars can be virtually moved to simplify the SCoP + /// description as well as the dependences. However, they are only moved if + /// we can recompute them in the statements they are used. This method will + /// perform the recomputation before we clone the original statement into the + /// new, optimized region, thus ensure all scalars are available. + void recomputeDependentScalars(ScopStmt &Stmt, ValueMapT &BBMap, + LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses); + /// @brief Try to synthesize a new value /// /// Given an old value, we try to synthesize it in a new context from its @@ -409,7 +420,7 @@ /// @returns o A newly synthesized value. /// o NULL, if synthesizing the value failed. Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, Loop *L) const; + LoopToScevMapT <S, Loop *L); /// @brief Get the new version of a value. /// @@ -431,15 +442,18 @@ /// @param L The loop that surrounded the instruction that referenced /// this value in the original code. This loop is used to /// evaluate the scalar evolution at the right scope. + /// @param TryOnly Flag to indicate that nullptr is a valid return value + /// if no new value was found. /// /// @returns o The old value, if it is still valid. /// o The new value, if available. - /// o NULL, if no value is found. + /// o NULL, if no value is found and TryOnly is set. + /// o Otherwise a trap is triggered. Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, Loop *L) const; + LoopToScevMapT <S, Loop *L, bool TryOnly = false); void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, - LoopToScevMapT <S); + LoopToScevMapT <S, bool Recompute = false); /// @brief Get the innermost loop that surrounds an instruction. /// @@ -500,8 +514,13 @@ /// @param NewAccesses A map from memory access ids to new ast expressions, /// which may contain new access expressions for certain /// memory accesses. + /// @param Recompute Flag to indicate the instruction is a scalar that + /// needs to be recomputed in this statement. It basically + /// forces us to copy not only the instruction but also all + /// operands if we cannot find a local or global mapping. void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, - LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); + LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses, + bool Recompute = false); /// @brief Helper to get the newest version of @p ScalarValue. /// Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -33,7 +33,6 @@ llvm::Pass *createDOTOnlyViewerPass(); llvm::Pass *createDOTPrinterPass(); llvm::Pass *createDOTViewerPass(); -llvm::Pass *createIndependentBlocksPass(); llvm::Pass *createJSONExporterPass(); llvm::Pass *createJSONImporterPass(); llvm::Pass *createPollyCanonicalizePass(); @@ -43,7 +42,6 @@ llvm::Pass *createCodeGenerationPass(); llvm::Pass *createIslScheduleOptimizerPass(); -extern char &IndependentBlocksID; extern char &CodePreparationID; } @@ -64,7 +62,6 @@ polly::createDOTOnlyViewerPass(); polly::createDOTPrinterPass(); polly::createDOTViewerPass(); - polly::createIndependentBlocksPass(); polly::createJSONExporterPass(); polly::createJSONImporterPass(); polly::createScopDetectionPass(); @@ -81,7 +78,6 @@ class PassRegistry; void initializeCodePreparationPass(llvm::PassRegistry &); void initializeDeadCodeElimPass(llvm::PassRegistry &); -void initializeIndependentBlocksPass(llvm::PassRegistry &); void initializeJSONExporterPass(llvm::PassRegistry &); void initializeJSONImporterPass(llvm::PassRegistry &); void initializeIslAstInfoPass(llvm::PassRegistry &); Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -511,6 +511,12 @@ /// @param SAI Info object for the accessed array. void buildAccessRelation(const ScopArrayInfo *SAI); + /// @brief Copy this memory access into the given statement @p Stmt. + /// + /// @param AccList The list that contains all accesses for @p Stmt. + /// @param Stmt The statement the copied access should reside in. + MemoryAccess *copy(AccFuncSetType &AccList, ScopStmt *Stmt) const; + public: /// @brief Create a new MemoryAccess. /// @@ -534,7 +540,7 @@ ~MemoryAccess(); /// @brief Get the type of a memory access. - enum AccessType getType() { return AccType; } + enum AccessType getType() const { return AccType; } /// @brief Is this a reduction like access? bool isReductionLike() const { return RedType != RT_NONE; } @@ -772,6 +778,9 @@ std::string BaseName; + /// @brief Set of scalar values that need to be recomputed in this statement + SetVector DependentScalars; + /// Build the statement. //@{ void buildDomain(); @@ -910,7 +919,18 @@ } /// @brief Add @p Access to this statement's list of accesses. - void addAccess(MemoryAccess *Access); + /// + /// @param Access The access to add. + /// @param Front Flag to indicate where the access should be added. + void addAccess(MemoryAccess *Access, bool Front = false); + + /// @brief Remove the memory access @p MA from this statement. + /// + /// @param MA The access to remove. + /// @param OnlyMA Flag to indicate if all accesses caused by the access + /// instruction of @p MA should be removed or only @MA. + /// + void removeMemoryAccess(MemoryAccess *MA, bool OnlyMA); /// @brief Move the memory access in @p InvMAs to @p InvariantEquivClasses. /// @@ -941,6 +961,14 @@ /// @brief Get the isl AST build. __isl_keep isl_ast_build *getAstBuild() const { return Build; } + /// @brief Add a scalar that need to be recomputed in this statement. + void addDependentScalar(Instruction *Inst) { DependentScalars.insert(Inst); } + + /// @brief Return the scalars that need to be recomputed in this statement. + const SetVector &getDependentScalars() const { + return DependentScalars; + } + /// @brief Restrict the domain of the statement. /// /// @param NewDomain The new statement domain. @@ -1225,6 +1253,25 @@ /// Required inv. loads: LB[0], LB[1], (V, if it may alias with A or LB) void hoistInvariantLoads(); + /// @brief Check if we can recompute all instructions in @p Stmt. + /// + /// @param Stmt The statement we want to recompute @p Insts in. + /// @param Insts The instructions we need to recompute. + /// + /// @returns True, if all instructions can be recomputed in @p Stmt. + bool canRecomputeInStmt(ScopStmt &Stmt, SmallPtrSet &Insts); + + /// @brief Simplify the scalar accesses in this SCoP. + /// + /// Scalar accesses are often not needed and only caused by the placement in + /// the code. Additionally it is sometimes possible to recompute scalars to + /// avoid communication. As scalars basically sequentialize all loops they are + /// in, we try to avoid scalar accesses as much as possible. To this end we + /// will virtually move and later recompute them in the code generation. This + /// allows more freedom for the scheduler while we do not need to change the + /// original code region at all. + void simplifyScalarAccesses(); + /// @brief Build the Context of the Scop. void buildContext(); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -58,6 +58,8 @@ STATISTIC(ScopFound, "Number of valid Scops"); STATISTIC(RichScopFound, "Number of Scops containing a loop"); +STATISTIC(ScalarsEliminated, "Number of scalars eliminated (moved/recomputed)"); +STATISTIC(StatementsEliminated, "Number of statements eliminated"); static cl::opt ModelReadOnlyScalars( "polly-analyze-read-only-scalars", @@ -652,6 +654,17 @@ Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), NewAccessRelation(nullptr) {} +MemoryAccess *MemoryAccess::copy(AccFuncSetType &AccList, + ScopStmt *Stmt) const { + AccList.emplace_back(Stmt, getAccessInstruction(), getId(), getType(), + getBaseAddr(), getElemSizeInBytes(), isAffine(), + Subscripts, Sizes, getAccessValue(), Origin, + getBaseName()); + MemoryAccess *CopyMA = &AccList.back(); + CopyMA->buildAccessRelation(getScopArrayInfo()); + return CopyMA; +} + void MemoryAccess::realignParams() { isl_space *ParamSpace = Statement->getParent()->getParamSpace(); AccessRelation = isl_map_align_params(AccessRelation, ParamSpace); @@ -825,14 +838,17 @@ } } -void ScopStmt::addAccess(MemoryAccess *Access) { +void ScopStmt::addAccess(MemoryAccess *Access, bool Front) { Instruction *AccessInst = Access->getAccessInstruction(); MemoryAccessList *&MAL = InstructionToAccess[AccessInst]; if (!MAL) MAL = new MemoryAccessList(); MAL->emplace_front(Access); - MemAccs.push_back(MAL->front()); + if (Front) + MemAccs.insert(MemAccs.begin(), MAL->front()); + else + MemAccs.push_back(MAL->front()); } void ScopStmt::realignParams() { @@ -1354,6 +1370,38 @@ void ScopStmt::dump() const { print(dbgs()); } +void ScopStmt::removeMemoryAccess(MemoryAccess *MA, bool OnlyMA) { + auto &MAL = *lookupAccessesFor(MA->getAccessInstruction()); + MAL.reverse(); + + auto MALIt = MAL.begin(); + auto MALLastIt = MAL.before_begin(); + auto MALEnd = MAL.end(); + auto MemAccsIt = MemAccs.begin(); + while (true) { + + while (OnlyMA && MALIt != MALEnd && (MA != *MALIt)) { + MALLastIt++; + MALIt++; + } + + if (MALIt == MALEnd) + break; + + while (*MemAccsIt != *MALIt) + MemAccsIt++; + + MemAccs.erase(MemAccsIt); + MALIt = MAL.erase_after(MALLastIt); + } + + if (!MAL.empty()) + return; + + InstructionToAccess.erase(MA->getAccessInstruction()); + delete &MAL; +} + void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, InvariantAccessesTy &InvariantEquivClasses) { @@ -1362,24 +1410,8 @@ // order uses is needed because the MemAccs is a vector and the order in // which the accesses of each memory access list (MAL) are stored in this // vector is reversed. - for (MemoryAccess *MA : InvMAs) { - auto &MAL = *lookupAccessesFor(MA->getAccessInstruction()); - MAL.reverse(); - - auto MALIt = MAL.begin(); - auto MALEnd = MAL.end(); - auto MemAccsIt = MemAccs.begin(); - while (MALIt != MALEnd) { - while (*MemAccsIt != *MALIt) - MemAccsIt++; - - MALIt++; - MemAccs.erase(MemAccsIt); - } - - InstructionToAccess.erase(MA->getAccessInstruction()); - delete &MAL; - } + for (MemoryAccess *MA : InvMAs) + removeMemoryAccess(MA, false); // Get the context under which this statement, hence the memory accesses, are // executed. @@ -2449,6 +2481,7 @@ buildAliasChecks(AA); hoistInvariantLoads(); + simplifyScalarAccesses(); simplifySCoP(false); } @@ -2503,6 +2536,7 @@ StmtMap.erase(Stmt.getBasicBlock()); StmtIt = Stmts.erase(StmtIt); + StatementsEliminated++; continue; } @@ -2650,6 +2684,207 @@ compareInvariantAccesses); } +bool Scop::canRecomputeInStmt(ScopStmt &Stmt, + SmallPtrSet &Insts) { + if (Insts.empty()) + return true; + + // TODO: Check if we can actually move the instructions. + return false; +} + +void Scop::simplifyScalarAccesses() { + using OutsideOperandsSetTy = + SmallVector, 4>; + using InstructionSetTy = SmallPtrSet; + using NonTrivialOperandsPairTy = + std::pair; + DenseMap NonTrivialOperandsMap; + + // First iterate over all implicit write accesses, hence scalar definitions + // and collect all operands that might have side effects or read memory as + // well as all operands that are outside the SCoP. The former is needed to + // decide if we can recompute the scalar definition to another statement. + // The latter to add read-only scalar accesses to the statement in which the + // scalar is recomputed. That allows us to identify values needed e.g., for + // parallel code generation. + + SmallPtrSet Visited; + for (ScopStmt &Stmt : *this) { + for (MemoryAccess *MA : Stmt) { + if (MA->isExplicit() || MA->isRead() || MA->isPHI()) + continue; + + Instruction *AccessInst = MA->getAccessInstruction(); + if (isa(AccessInst)) + AccessInst = cast(MA->getBaseAddr()); + + DEBUG(dbgs() << "Check operand tree of " << *AccessInst << "\n"); + + auto &NonTrivialOperands = NonTrivialOperandsMap[AccessInst]; + auto &SideEffectOperands = NonTrivialOperands.first; + auto &OutsideOperands = NonTrivialOperands.second; + + SmallPtrSet Worklist; + Worklist.insert(AccessInst); + Visited.clear(); + + while (!Worklist.empty()) { + Instruction *Inst = *Worklist.begin(); + Worklist.erase(Inst); + + if (!Visited.insert(Inst).second || !R.contains(Inst)) + continue; + + for (auto &InstOp : Inst->operands()) + if (Instruction *InstOpInst = dyn_cast(InstOp)) { + if (R.contains(InstOpInst)) + Worklist.insert(InstOpInst); + else + OutsideOperands.push_back(std::make_pair(InstOpInst, Inst)); + } + + if (Inst->mayHaveSideEffects() || Inst->mayReadFromMemory()) + SideEffectOperands.insert(Inst); + + if (isa(Inst) && !canSynthesize(Inst, &LI, SE, &R)) + SideEffectOperands.insert(Inst); + } + + DEBUG({ + dbgs() << "\tSideEffectOperands: {\n"; + for (auto *Op : SideEffectOperands) + dbgs() << "\t\t" << *Op << "\n"; + dbgs() << "\t}\n"; + dbgs() << "\tOutsideOperands: {\n"; + for (auto *Op : OutsideOperands) + dbgs() << "\t\t" << *Op << "\n"; + dbgs() << "\t}\n"; + }); + } + } + + // In the second step traverse all implicit read accesses, hence scalar uses + // in statements that do not define the scalar. However, at the moment we + // exclude PHIs to simplify the logic. For each use we will check if we can + // recompute the definition to this block (see canRecomputeInStmt()). + // If so we will: + // o Add the definition to the dependent scalars set of the use statement. + // o Add read accesses for all prior to the SCoP defined values if they + // were present in the definition statement. + // o Remove the use access from the use statement as it will be recomputed + // and does not need to be communicated anymore. + + for (ScopStmt &Stmt : *this) { + BasicBlock *StmtBB = Stmt.isBlockStmt() ? Stmt.getBasicBlock() + : Stmt.getRegion()->getEntry(); + AccFuncSetType &AccList = AccFuncMap[StmtBB]; + + SmallVector AdditionalAccesses; + SmallVector ResolvedAccesses; + + for (MemoryAccess *MA : Stmt) { + if (MA->isExplicit() || MA->isWrite() || MA->isPHI()) + continue; + + Instruction *DefInst = dyn_cast(MA->getAccessValue()); + if (!DefInst) + continue; + + ScopStmt *DefStmt = getStmtForBasicBlock(DefInst->getParent()); + if (!DefStmt) + continue; + + auto &NonTrivialOperands = NonTrivialOperandsMap[DefInst]; + auto &SideEffectOperands = NonTrivialOperands.first; + if (!canRecomputeInStmt(Stmt, SideEffectOperands)) + continue; + + auto &OutsideOperands = NonTrivialOperands.second; + for (auto &OutsideOperandPair : OutsideOperands) { + Instruction *OutsideOperand = OutsideOperandPair.first; + Instruction *OutsideUser = OutsideOperandPair.second; + ScopStmt *UserStmt = getStmtForBasicBlock(OutsideUser->getParent()); + assert(UserStmt); + auto *UseMAs = UserStmt->lookupAccessesFor(OutsideUser); + if (!UseMAs) + continue; + + for (const MemoryAccess *UseMA : *UseMAs) + if (UseMA->getBaseAddr() == OutsideOperand) + AdditionalAccesses.push_back(UseMA->copy(AccList, &Stmt)); + } + + Stmt.addDependentScalar(DefInst); + ResolvedAccesses.push_back(MA); + } + + ScalarsEliminated += ResolvedAccesses.size(); + for (MemoryAccess *MA : ResolvedAccesses) + Stmt.removeMemoryAccess(MA, true); + for (MemoryAccess *MA : AdditionalAccesses) + Stmt.addAccess(MA, true); + } + + // In the third and final step we iterate over the scalar definitions in + // the SCoP again. We will check if we removed the accesses for all users + // of the scalar in the SCoP. If so, we can safely remove the scalar write + // access as all users will recompute the value. As we currently cannot simply + // use this logic to recompute values at the exit of the SCoP we will not + // remove scalars that escape the SCoP. + // + // TODO: Introduce a exit ScopStmt that collects all escaping users so we + // can recompute escaping values in this exit statement and remove + // them from others. + + for (ScopStmt &Stmt : *this) { + SmallVector ResolvedAccesses; + for (MemoryAccess *MA : Stmt) { + if (MA->isExplicit() || MA->isRead() || MA->isPHI()) + continue; + + Instruction *AccessInst = MA->getAccessInstruction(); + if (isa(AccessInst)) + AccessInst = cast(MA->getBaseAddr()); + + if (!R.contains(AccessInst)) + continue; + + bool AllUsersRemoved = true; + for (auto *User : AccessInst->users()) { + auto *UserInst = cast(User); + + auto *UserStmt = getStmtForBasicBlock(UserInst->getParent()); + if (!UserStmt) { + AllUsersRemoved = false; + break; + } + + auto *UserMAs = UserStmt->lookupAccessesFor(UserInst); + if (!UserMAs) + continue; + + for (auto *UserMA : *UserMAs) { + if (UserMA->isExplicit() || UserMA->isWrite()) + continue; + + AllUsersRemoved = false; + break; + } + } + + if (!AllUsersRemoved) + continue; + + ResolvedAccesses.push_back(MA); + } + + ScalarsEliminated += ResolvedAccesses.size(); + for (MemoryAccess *MA : ResolvedAccesses) + Stmt.removeMemoryAccess(MA, true); + } +} + const ScopArrayInfo * Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType, ArrayRef Sizes, bool IsPHI) { @@ -3603,7 +3838,6 @@ } void ScopInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredID(IndependentBlocksID); AU.addRequired(); AU.addRequired(); AU.addRequired(); Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -49,7 +49,6 @@ Transform/Canonicalization.cpp Transform/CodePreparation.cpp Transform/DeadCodeElimination.cpp - Transform/IndependentBlocks.cpp Transform/ScheduleOptimizer.cpp ${POLLY_HEADER_FILES} ) Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -59,14 +59,33 @@ EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), EscapeMap(EscapeMap), GlobalMap(GlobalMap) {} +void BlockGenerator::recomputeDependentScalars( + ScopStmt &Stmt, ValueMapT &BBMap, LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses) { + + for (auto *Inst : Stmt.getDependentScalars()) + if (!GlobalMap.count(Inst)) + copyInstruction(Stmt, Inst, BBMap, LTS, NewAccesses, true); +} + Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, - Loop *L) const { + LoopToScevMapT <S, Loop *L) { if (SE.isSCEVable(Old->getType())) if (const SCEV *Scev = SE.getSCEVAtScope(const_cast(Old), L)) { if (!isa(Scev)) { const SCEV *NewScev = apply(Scev, LTS, SE); + + // Recompute scalars needed for this SCEV + const Region &R = Stmt.getParent()->getRegion(); + SetVector Values; + findValues(NewScev, Values); + for (Value *Val : Values) { + if (Instruction *Inst = dyn_cast(Val)) + if (R.contains(Inst)) + copyInstScalar(Stmt, Inst, BBMap, LTS, true); + } + ValueMapT VTV; VTV.insert(BBMap.begin(), BBMap.end()); VTV.insert(GlobalMap.begin(), GlobalMap.end()); @@ -80,7 +99,6 @@ "Only instructions can be insert points for SCEVExpander"); Value *Expanded = expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), IP, &VTV); - BBMap[Old] = Expanded; return Expanded; } @@ -90,7 +108,7 @@ } Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, - LoopToScevMapT <S, Loop *L) const { + LoopToScevMapT <S, Loop *L, bool TryOnly) { // We assume constants never change. // This avoids map lookups for many calls to this function. if (isa(Old)) @@ -121,29 +139,44 @@ if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) return const_cast(Old); + if (TryOnly) + return nullptr; + // The scalar dependence is neither available nor SCEVCodegenable. llvm_unreachable("Unexpected scalar dependence in region!"); - return nullptr; } void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst, - ValueMapT &BBMap, LoopToScevMapT <S) { + ValueMapT &BBMap, LoopToScevMapT <S, + bool Recompute) { // We do not generate debug intrinsics as we did not investigate how to // copy them correctly. At the current state, they just crash the code // generation as the meta-data operands are not correctly copied. if (isa(Inst)) return; + const Region &R = Stmt.getParent()->getRegion(); Instruction *NewInst = Inst->clone(); // Replace old operands with the new ones. for (Value *OldOperand : Inst->operands()) { - Value *NewOperand = - getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForInst(Inst)); + Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, LTS, + getLoopForInst(Inst), Recompute); + + if (Recompute) { + Instruction *NewOperandInst = dyn_cast_or_null(NewOperand); + if (!NewOperand || (NewOperandInst && R.contains(NewOperandInst))) { + if (Instruction *OldOperandInst = dyn_cast(OldOperand)) { + copyInstScalar(Stmt, OldOperandInst, BBMap, LTS, Recompute); + NewOperand = BBMap[OldOperand]; + } + } + } if (!NewOperand) { assert(!isa(NewInst) && "Store instructions are always needed!"); + assert(!Recompute && "Recompute copy should never fail"); delete NewInst; return; } @@ -230,10 +263,13 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, LoopToScevMapT <S, - isl_id_to_ast_expr *NewAccesses) { + isl_id_to_ast_expr *NewAccesses, + bool Recompute) { - // First check for possible scalar dependences for this instruction. - generateScalarLoads(Stmt, Inst, BBMap); + // First check for possible scalar dependences for this instruction if we + // are not recomputing a scalar. + if (!Recompute) + generateScalarLoads(Stmt, Inst, BBMap); // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. @@ -270,7 +306,7 @@ if (isIgnoredIntrinsic(Inst)) return; - copyInstScalar(Stmt, Inst, BBMap, LTS); + copyInstScalar(Stmt, Inst, BBMap, LTS, Recompute); } void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, @@ -305,6 +341,8 @@ Builder.SetInsertPoint(CopyBB->begin()); EntryBB = &CopyBB->getParent()->getEntryBlock(); + recomputeDependentScalars(Stmt, BBMap, LTS, NewAccesses); + for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -192,7 +192,6 @@ // region tree. AU.addPreserved(); AU.addPreserved(); - AU.addPreservedID(IndependentBlocksID); } }; } Index: lib/Makefile =================================================================== --- lib/Makefile +++ lib/Makefile @@ -135,7 +135,6 @@ Transform/Canonicalization.cpp \ Transform/CodePreparation.cpp \ Transform/DeadCodeElimination.cpp \ - Transform/IndependentBlocks.cpp \ Transform/ScheduleOptimizer.cpp \ ${GPGPU_FILES} \ ${ISL_CODEGEN_FILES} \ Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -146,7 +146,6 @@ initializeCodePreparationPass(Registry); initializeDeadCodeElimPass(Registry); initializeDependenceInfoPass(Registry); - initializeIndependentBlocksPass(Registry); initializeJSONExporterPass(Registry); initializeJSONImporterPass(Registry); initializeIslAstInfoPass(Registry); Index: lib/Transform/IndependentBlocks.cpp =================================================================== --- lib/Transform/IndependentBlocks.cpp +++ /dev/null @@ -1,373 +0,0 @@ -//===------ IndependentBlocks.cpp - Create Independent Blocks in Regions --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Create independent blocks in the regions detected by ScopDetection. -// -//===----------------------------------------------------------------------===// -// -#include "polly/LinkAllPasses.h" -#include "polly/Options.h" -#include "polly/ScopDetection.h" -#include "polly/Support/ScopHelper.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/DominanceFrontier.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Transforms/Utils/Local.h" -#include - -using namespace polly; -using namespace llvm; - -#define DEBUG_TYPE "polly-independent" - -namespace { -struct IndependentBlocks : public FunctionPass { - RegionInfo *RI; - ScalarEvolution *SE; - ScopDetection *SD; - LoopInfo *LI; - - BasicBlock *AllocaBlock; - - static char ID; - - IndependentBlocks() : FunctionPass(ID) {} - - // Create new code for every instruction operator that can be expressed by a - // SCEV. Like this there are just two types of instructions left: - // - // 1. Instructions that only reference loop ivs or parameters outside the - // region. - // - // 2. Instructions that are not used for any memory modification. (These - // will be ignored later on.) - // - // Blocks containing only these kind of instructions are called independent - // blocks as they can be scheduled arbitrarily. - bool createIndependentBlocks(BasicBlock *BB, const Region *R); - bool createIndependentBlocks(const Region *R); - - // Elimination on the Scop to eliminate the scalar dependences come with - // trivially dead instructions. - bool eliminateDeadCode(const Region *R); - - //===--------------------------------------------------------------------===// - /// Non trivial scalar dependences checking functions. - /// Non trivial scalar dependences occur when the def and use are located in - /// different BBs and we can not move them into the same one. This will - /// prevent use from schedule BBs arbitrarily. - /// - /// @brief This function checks if a scalar value that is part of the - /// Scop is used outside of the Scop. - /// - /// @param Use The use of the instruction. - /// @param R The maximum region in the Scop. - /// - /// @return Return true if the Use of an instruction and the instruction - /// itself form a non trivial scalar dependence. - static bool isEscapeUse(const Value *Use, const Region *R); - - //===--------------------------------------------------------------------===// - /// Operand tree moving functions. - /// Trivial scalar dependences can eliminate by move the def to the same BB - /// that containing use. - /// - /// @brief Check if the instruction can be moved to another place safely. - /// - /// @param Inst The instruction. - /// - /// @return Return true if the instruction can be moved safely, false - /// otherwise. - static bool isSafeToMove(Instruction *Inst); - - typedef std::map ReplacedMapType; - - /// @brief Move all safe to move instructions in the Operand Tree (DAG) to - /// eliminate trivial scalar dependences. - /// - /// @param Inst The root of the operand Tree. - /// @param R The maximum region in the Scop. - /// @param ReplacedMap The map that mapping original instruction to the moved - /// instruction. - /// @param InsertPos The insert position of the moved instructions. - void moveOperandTree(Instruction *Inst, const Region *R, - ReplacedMapType &ReplacedMap, Instruction *InsertPos); - - bool isIndependentBlock(const Region *R, BasicBlock *BB) const; - bool areAllBlocksIndependent(const Region *R) const; - - bool runOnFunction(Function &F); - void verifyAnalysis() const; - void verifyScop(const Region *R) const; - void getAnalysisUsage(AnalysisUsage &AU) const; -}; -} - -bool IndependentBlocks::isSafeToMove(Instruction *Inst) { - if (Inst->mayReadFromMemory() || Inst->mayWriteToMemory()) - return false; - - return isSafeToSpeculativelyExecute(Inst); -} - -void IndependentBlocks::moveOperandTree(Instruction *Inst, const Region *R, - ReplacedMapType &ReplacedMap, - Instruction *InsertPos) { - BasicBlock *CurBB = Inst->getParent(); - - // Depth first traverse the operand tree (or operand dag, because we will - // stop at PHINodes, so there are no cycle). - typedef Instruction::op_iterator ChildIt; - std::vector> WorkStack; - - WorkStack.push_back(std::make_pair(Inst, Inst->op_begin())); - DenseSet VisitedSet; - - while (!WorkStack.empty()) { - Instruction *CurInst = WorkStack.back().first; - ChildIt It = WorkStack.back().second; - DEBUG(dbgs() << "Checking Operand of Node:\n" << *CurInst << "\n------>\n"); - if (It == CurInst->op_end()) { - // Insert the new instructions in topological order. - if (!CurInst->getParent()) { - CurInst->insertBefore(InsertPos); - SE->forgetValue(CurInst); - } - - WorkStack.pop_back(); - } else { - // for each node N, - Instruction *Operand = dyn_cast(*It); - ++WorkStack.back().second; - - // Can not move no instruction value. - if (Operand == 0) - continue; - - DEBUG(dbgs() << "For Operand:\n" << *Operand << "\n--->"); - - // If the Scop Region does not contain N, skip it and all its operands and - // continue: because we reach a "parameter". - // FIXME: we must keep the predicate instruction inside the Scop, - // otherwise it will be translated to a load instruction, and we can not - // handle load as affine predicate at this moment. - if (!R->contains(Operand) && !isa(CurInst)) { - DEBUG(dbgs() << "Out of region.\n"); - continue; - } - - if (canSynthesize(Operand, LI, SE, R)) { - DEBUG(dbgs() << "is IV.\n"); - continue; - } - - // We can not move the operand, a non trivial scalar dependence found! - if (!isSafeToMove(Operand)) { - DEBUG(dbgs() << "Can not move!\n"); - continue; - } - - // Do not need to move instruction if it is contained in the same BB with - // the root instruction. - if (Operand->getParent() == CurBB) { - DEBUG(dbgs() << "No need to move.\n"); - // Try to move its operand, but do not visit an instuction twice. - if (VisitedSet.insert(Operand).second) - WorkStack.push_back(std::make_pair(Operand, Operand->op_begin())); - continue; - } - - // Now we need to move Operand to CurBB. - // Check if we already moved it. - ReplacedMapType::iterator At = ReplacedMap.find(Operand); - if (At != ReplacedMap.end()) { - DEBUG(dbgs() << "Moved.\n"); - Instruction *MovedOp = At->second; - It->set(MovedOp); - SE->forgetValue(MovedOp); - } else { - // Note that NewOp is not inserted in any BB now, we will insert it when - // it popped form the work stack, so it will be inserted in topological - // order. - Instruction *NewOp = Operand->clone(); - NewOp->setName(Operand->getName() + ".moved.to." + CurBB->getName()); - DEBUG(dbgs() << "Move to " << *NewOp << "\n"); - It->set(NewOp); - ReplacedMap.insert(std::make_pair(Operand, NewOp)); - SE->forgetValue(Operand); - - // Process its operands, but do not visit an instuction twice. - if (VisitedSet.insert(NewOp).second) - WorkStack.push_back(std::make_pair(NewOp, NewOp->op_begin())); - } - } - } - - SE->forgetValue(Inst); -} - -bool IndependentBlocks::createIndependentBlocks(BasicBlock *BB, - const Region *R) { - std::vector WorkList; - for (Instruction &Inst : *BB) - if (!isSafeToMove(&Inst) && !canSynthesize(&Inst, LI, SE, R)) - WorkList.push_back(&Inst); - - ReplacedMapType ReplacedMap; - Instruction *InsertPos = BB->getFirstNonPHIOrDbg(); - - for (Instruction *Inst : WorkList) - if (!isa(Inst)) - moveOperandTree(Inst, R, ReplacedMap, InsertPos); - - // The BB was changed if we replaced any operand. - return !ReplacedMap.empty(); -} - -bool IndependentBlocks::createIndependentBlocks(const Region *R) { - bool Changed = false; - - for (BasicBlock *BB : R->blocks()) - Changed |= createIndependentBlocks(BB, R); - - return Changed; -} - -bool IndependentBlocks::eliminateDeadCode(const Region *R) { - std::vector WorkList; - - // Find all trivially dead instructions. - for (BasicBlock *BB : R->blocks()) - for (Instruction &Inst : *BB) - if (!isIgnoredIntrinsic(&Inst) && isInstructionTriviallyDead(&Inst)) - WorkList.push_back(&Inst); - - if (WorkList.empty()) - return false; - - // Delete them so the cross BB scalar dependences come with them will - // also be eliminated. - while (!WorkList.empty()) { - RecursivelyDeleteTriviallyDeadInstructions(WorkList.back()); - WorkList.pop_back(); - } - - return true; -} - -bool IndependentBlocks::isEscapeUse(const Value *Use, const Region *R) { - // Non-instruction user will never escape. - if (!isa(Use)) - return false; - - return !R->contains(cast(Use)); -} - -bool IndependentBlocks::isIndependentBlock(const Region *R, - BasicBlock *BB) const { - for (Instruction &Inst : *BB) { - if (canSynthesize(&Inst, LI, SE, R)) - continue; - if (isIgnoredIntrinsic(&Inst)) - continue; - - // A value inside the Scop is referenced outside. - for (User *U : Inst.users()) { - if (isEscapeUse(U, R)) { - DEBUG(dbgs() << "Instruction not independent:\n"); - DEBUG(dbgs() << "Instruction used outside the Scop!\n"); - DEBUG(Inst.print(dbgs())); - DEBUG(dbgs() << "\n"); - return false; - } - } - } - - return true; -} - -bool IndependentBlocks::areAllBlocksIndependent(const Region *R) const { - for (BasicBlock *BB : R->blocks()) - if (!isIndependentBlock(R, BB)) - return false; - - return true; -} - -void IndependentBlocks::getAnalysisUsage(AnalysisUsage &AU) const { - // FIXME: If we set preserves cfg, the cfg only passes do not need to - // be "addPreserved"? - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); -} - -bool IndependentBlocks::runOnFunction(llvm::Function &F) { - - bool Changed = false; - - RI = &getAnalysis().getRegionInfo(); - LI = &getAnalysis().getLoopInfo(); - SD = &getAnalysis(); - SE = &getAnalysis().getSE(); - - AllocaBlock = &F.getEntryBlock(); - - DEBUG(dbgs() << "Run IndepBlock on " << F.getName() << '\n'); - - for (const Region *R : *SD) { - Changed |= createIndependentBlocks(R); - Changed |= eliminateDeadCode(R); - } - - verifyAnalysis(); - - return Changed; -} - -void IndependentBlocks::verifyAnalysis() const {} - -void IndependentBlocks::verifyScop(const Region *R) const { - assert(areAllBlocksIndependent(R) && "Cannot generate independent blocks"); -} - -char IndependentBlocks::ID = 0; -char &polly::IndependentBlocksID = IndependentBlocks::ID; - -Pass *polly::createIndependentBlocksPass() { return new IndependentBlocks(); } - -INITIALIZE_PASS_BEGIN(IndependentBlocks, "polly-independent", - "Polly - Create independent blocks", false, false); -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); -INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); -INITIALIZE_PASS_DEPENDENCY(ScopDetection); -INITIALIZE_PASS_END(IndependentBlocks, "polly-independent", - "Polly - Create independent blocks", false, false) Index: test/IndependentBlocks/inter_bb_scalar_dep.ll =================================================================== --- test/IndependentBlocks/inter_bb_scalar_dep.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS - -; void f(long A[], int N, int *init_ptr) { -; long i, j; -; -; for (i = 0; i < N; ++i) { -; init = *init_ptr; -; for (i = 0; i < N; ++i) { -; A[i] = init + 2; -; } -; } -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" - -define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { -entry: - -; SCALARACCESS-NOT: alloca - br label %for.i - -for.i: - %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] - %indvar.i.next = add nsw i64 %indvar.i, 1 - br label %entry.next - -entry.next: - %init = load i64, i64* %init_ptr -; SCALARACCESS-NOT: store - br label %for.j - -for.j: - %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] - %init_plus_two = add i64 %init, 2 -; SCALARACCESS: %init_plus_two = add i64 %init, 2 - %scevgep = getelementptr i64, i64* %A, i64 %indvar.j - store i64 %init_plus_two, i64* %scevgep - %indvar.j.next = add nsw i64 %indvar.j, 1 - %exitcond.j = icmp eq i64 %indvar.j.next, %N - br i1 %exitcond.j, label %for.i.end, label %for.j - -for.i.end: - %exitcond.i = icmp eq i64 %indvar.i.next, %N - br i1 %exitcond.i, label %return, label %for.i - -return: - ret void -} Index: test/IndependentBlocks/intra_and_inter_bb_scalar_dep.ll =================================================================== --- test/IndependentBlocks/intra_and_inter_bb_scalar_dep.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS - -; void f(long A[], int N, int *init_ptr) { -; long i, j; -; -; for (i = 0; i < N; ++i) { -; init = *init_ptr; -; for (i = 0; i < N; ++i) { -; init2 = *init_ptr; -; A[i] = init + init2; -; } -; } -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" - -define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { -entry: - -; SCALARACCESS-NOT: alloca - br label %for.i - -for.i: - %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] - %indvar.i.next = add nsw i64 %indvar.i, 1 - br label %entry.next - -entry.next: - %init = load i64, i64* %init_ptr -; SCALARACCESS-NOT: store - br label %for.j - -for.j: - %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] - %init_2 = load i64, i64* %init_ptr - %init_sum = add i64 %init, %init_2 - -; The SCEV of %init_sum is (%init + %init_2). It is referring to both an -; UnknownValue in the same and in a different basic block. We want only the -; reference to the different basic block to be replaced. - -; SCALARACCESS: %init_2 = load i64, i64* %init_ptr -; SCALARACCESS: %init_sum = add i64 %init, %init_2 - %scevgep = getelementptr i64, i64* %A, i64 %indvar.j - store i64 %init_sum, i64* %scevgep - %indvar.j.next = add nsw i64 %indvar.j, 1 - %exitcond.j = icmp eq i64 %indvar.j.next, %N - br i1 %exitcond.j, label %for.i.end, label %for.j - -for.i.end: - %exitcond.i = icmp eq i64 %indvar.i.next, %N - br i1 %exitcond.i, label %return, label %for.i - -return: - ret void -} Index: test/IndependentBlocks/intra_bb_scalar_dep.ll =================================================================== --- test/IndependentBlocks/intra_bb_scalar_dep.ll +++ /dev/null @@ -1,50 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s - -; void f(long A[], int N, int *init_ptr) { -; long i, j; -; -; for (i = 0; i < N; ++i) { -; for (i = 0; i < N; ++i) { -; init = *init_ptr; -; A[i] = init + 2; -; } -; } -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" - -define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { -entry: - -; CHECK: entry -; CHECK: br label %for.i - br label %for.i - -for.i: - %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] - %indvar.i.next = add nsw i64 %indvar.i, 1 - br label %entry.next - -entry.next: - br label %for.j - -for.j: - %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] - %init = load i64, i64* %init_ptr - %init_plus_two = add i64 %init, 2 -; The scalar evolution of %init_plus_two is (2 + %init). So we have a -; non-trivial scalar evolution referring to a value in the same basic block. -; We want to ensure that this scalar is not translated into a memory copy. - %scevgep = getelementptr i64, i64* %A, i64 %indvar.j - store i64 %init_plus_two, i64* %scevgep - %indvar.j.next = add nsw i64 %indvar.j, 1 - %exitcond.j = icmp eq i64 %indvar.j.next, %N - br i1 %exitcond.j, label %for.i.end, label %for.j - -for.i.end: - %exitcond.i = icmp eq i64 %indvar.i.next, %N - br i1 %exitcond.i, label %return, label %for.i - -return: - ret void -} Index: test/IndependentBlocks/phi_outside_scop.ll =================================================================== --- test/IndependentBlocks/phi_outside_scop.ll +++ /dev/null @@ -1,33 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALAR -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -define void @phi_nodes_outside() { -entry: - br label %for.i.1 - -for.i.1: - %i.1 = phi i32 [ %i.1.next, %for.i.1 ], [ 0, %entry ] - %i.1.next = add nsw i32 %i.1, 1 - br i1 false, label %for.i.1 , label %for.i.2.preheader - -for.i.2.preheader: - br label %for.i.2 - -for.i.2: -; The value of %i.1.next is used outside of the scop in a PHI node. - %i.2 = phi i32 [ %i.2.next , %for.i.2 ], [ %i.1.next, %for.i.2.preheader ] - %i.2.next = add nsw i32 %i.2, 1 - fence seq_cst - br i1 false, label %for.i.2, label %cleanup - -cleanup: - ret void -} - -; SCALAR-NOT: alloca - -; SCALAR: for.i.2.preheader: -; SCALAR-NOT: load - -; SCALAR: for.i.2: -; SCALAR: %i.2 = phi i32 [ %i.2.next, %for.i.2 ], [ %i.1.next, %for.i.2.preheader ] Index: test/IndependentBlocks/scalar_to_array.ll =================================================================== --- test/IndependentBlocks/scalar_to_array.ll +++ /dev/null @@ -1,222 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS -; RAUN: opt %loadPolly -basicaa -polly-independent < %s -S | FileCheck %s -check-prefix=SCALARACCESS - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -@A = common global [1024 x float] zeroinitializer, align 8 - -define i32 @empty() nounwind { -entry: - fence seq_cst - br label %for.cond - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - br label %for.inc - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - - -; SCALARACCESS-LABEL: @array_access() -define i32 @array_access() nounwind { -entry: - fence seq_cst - br label %for.cond -; SCALARACCESS: entry: -; SCALARACCESS-NOT: alloca - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - %float = uitofp i64 %indvar to float - store float %float, float* %arrayidx - br label %for.inc - -; SCALARACCESS: for.body: -; SCALARACCESS: %float = uitofp i64 %indvar to float -; SCALARACCESS: store float %float, float* %arrayidx - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - -; SCALARACCESS-LABEL: @intra_scop_dep() -define i32 @intra_scop_dep() nounwind { -entry: - fence seq_cst - br label %for.cond - -; SCALARACCESS: entry: -; SCALARACCESS-NOT: alloca -; SCALARACCESS: fence - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body.a, label %return - -for.body.a: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - %scalar = load float, float* %arrayidx - br label %for.body.b - -; SCALARACCESS: for.body.a: -; SCALARACCESS: %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar -; SCALARACCESS: %scalar = load float, float* %arrayidx -; SCALARACCESS-NOT: store -; SCALARACCESS: br label %for.body.b - -for.body.b: - %arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - %float = uitofp i64 %indvar to float - %sum = fadd float %scalar, %float - store float %sum, float* %arrayidx2 - br label %for.inc - -; SCALARACCESS: for.body.b: -; SCALARACCESS: %arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar -; SCALARACCESS: %float = uitofp i64 %indvar to float -; SCALARACCESS-NOT: load -; SCALARACCESS: %sum = fadd float %scalar, %float -; SCALARACCESS: store float %sum, float* %arrayidx2 -; SCALARACCESS: br label %for.inc - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - -; It is not possible to have a scop which accesses a scalar element that is -; a global variable. All global variables are pointers containing possibly -; a single element. - -; SCALARACCESS-LABEL: @use_after_scop() -define i32 @use_after_scop() nounwind { -entry: - fence seq_cst - br label %for.head - -; SCALARACCESS: entry: -; SCALARACCESS-NOT: alloca -; SCALARACCESS: fence - -for.head: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - br label %for.body - -for.body: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - %scalar = load float, float* %arrayidx - br label %for.inc - -; SCALARACCESS: for.body: -; SCALARACCESS: %scalar = load float, float* %arrayidx -; SCALARACCESS-NOT: store float %scalar - -for.inc: - %indvar.next = add i64 %indvar, 1 - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.head, label %for.after - -for.after: - fence seq_cst - %return_value = fptosi float %scalar to i32 - br label %return - -; SCALARACCESS: for.after: -; SCALARACCESS: fence seq_cst -; SCALARACCESS: %return_value = fptosi float %scalar to i32 - -return: - ret i32 %return_value -} - -; We currently do not transform scalar references, that have only read accesses -; in the scop. There are two reasons for this: -; -; o We don't introduce additional memory references which may yield to compile -; time overhead. -; o For integer values, such a translation may block the use of scalar -; evolution on those values. -; -; SCALARACCESS-LABEL: @before_scop() -define i32 @before_scop() nounwind { -entry: - br label %preheader - -preheader: - %scalar = fadd float 4.0, 5.0 - fence seq_cst - br label %for.cond - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %preheader ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - store float %scalar, float* %arrayidx - br label %for.inc - -; SCALARACCESS: for.body: -; SCALARACCESS: store float %scalar, float* %arrayidx - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - -; Currently not working -; SCALARACCESS-LABEL: @param_before_scop( -define i32 @param_before_scop(float %scalar) nounwind { -entry: - fence seq_cst - br label %for.cond - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - store float %scalar, float* %arrayidx - br label %for.inc - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} Index: test/IndependentBlocks/scev-invalidated.ll =================================================================== --- test/IndependentBlocks/scev-invalidated.ll +++ /dev/null @@ -1,21 +0,0 @@ -; RUN: opt %loadPolly -polly-independent < %s -target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -define void @arc_either() { -entry: - %ang2.2.reg2mem = alloca i64 - br i1 undef, label %return, label %if.then6 - -if.then6: - %rem7 = srem i64 undef, 1474560 - br i1 false, label %if.else, label %return - -if.else: - %add16 = add nsw i64 %rem7, 1474560 - %rem7.add16 = select i1 undef, i64 %rem7, i64 %add16 - store i64 %rem7.add16, i64* %ang2.2.reg2mem - br label %return - -return: - ret void -} Index: test/Isl/CodeGen/20110312-Fail-without-basicaa.ll =================================================================== --- test/Isl/CodeGen/20110312-Fail-without-basicaa.ll +++ /dev/null @@ -1,26 +0,0 @@ -; This should be run without alias analysis enabled. -;RUN: opt %loadPolly -polly-independent < %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" - -define i32 @main() nounwind { -entry: - %t.02.reg2mem = alloca float - br label %entry.split - -entry.split: ; preds = %entry - store float 0.000000e+00, float* %t.02.reg2mem - br label %for.body - -for.body: ; preds = %for.body, %entry.split - %j.01 = phi i32 [ 0, %entry.split ], [ %inc1, %for.body ] - %t.02.reload = load float, float* %t.02.reg2mem - %inc = fadd float %t.02.reload, 1.000000e+00 - %inc1 = add nsw i32 %j.01, 1 - %exitcond = icmp eq i32 %inc1, 5000001 - store float %inc, float* %t.02.reg2mem - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body - %conv = fptosi float %inc to i32 - ret i32 %conv -} Index: test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll =================================================================== --- test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll +++ test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll @@ -7,8 +7,8 @@ ; for (int i = 1; i < 1000; i++) ; A[i] += /* split bb */ A[0]; ; } -; A[0] tmp (unused) A -; CHECK: %polly.par.userContext = alloca { float, float*, float* } +; A[0] A +; CHECK: %polly.par.userContext = alloca { float, float* } ; ; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds ; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load Index: test/Isl/CodeGen/eliminate-multiple-scalar-fp-reads.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/eliminate-multiple-scalar-fp-reads.ll @@ -0,0 +1,90 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; +; SCOP-NOT: Scalar: 1 +; SCOP-NOT: ReadAccess +; +; Verify the original region is untouched but all computation is moved to the +; only place it is needed in the generated region. +; +; CHECK: for.body.f: +; CHECK-NEXT: %idxprom = sext i32 %i.0 to i64 +; CHECK-NEXT: %arrayidx = getelementptr inbounds float, float* %A, i64 %idxprom +; CHECK-NEXT: store float %add5, float* %arrayidx +; +; CHECK: polly.stmt.for.body.f: +; CHECK: %0 = trunc i64 %polly.indvar to i32 +; CHECK: %1 = shl i32 %0, 1 +; CHECK: %p_conv = sitofp i32 %1 to float +; CHECK: %p_add = fadd float %p_conv, %p_conv +; CHECK: %p_add3 = fadd float %p_conv, %p_add +; CHECK: %p_add1 = fadd float %p_add, %p_conv +; CHECK: %p_add4 = fadd float %p_add3, %p_add1 +; CHECK: %p_add2 = fadd float %p_conv, %p_conv +; CHECK: %p_add5 = fadd float %p_add4, %p_add2 +; CHECK: %scevgep = getelementptr float, float* %A, i64 %polly.indvar +; CHECK: store float %p_add5, float* %scevgep +; +; void f(float *A) { +; for (int i = 0; i < 1000; i++) { +; float a = i * 2; +; /* split BB */ +; float b = a + a; +; /* split BB */ +; float c = b + a; +; /* split BB */ +; float d = a + a; +; /* split BB */ +; float e = a + b + c + d; +; /* split BB */ +; A[i] = e; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, 1000 + br i1 %cmp, label %for.body.a, label %for.end + +for.body.a: + %mul = mul nsw i32 %i.0, 2 + %conv = sitofp i32 %mul to float + br label %for.body.b + +for.body.b: + %add = fadd float %conv, %conv + br label %for.body.c + +for.body.c: + %add1 = fadd float %add, %conv + br label %for.body.d + +for.body.d: + %add2 = fadd float %conv, %conv + br label %for.body.e + +for.body.e: + %add3 = fadd float %conv, %add + %add4 = fadd float %add3, %add1 + %add5 = fadd float %add4, %add2 + br label %for.body.f + +for.body.f: + %idxprom = sext i32 %i.0 to i64 + %arrayidx = getelementptr inbounds float, float* %A, i64 %idxprom + store float %add5, float* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/eliminate-multiple-scalar-reads.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/eliminate-multiple-scalar-reads.ll @@ -0,0 +1,82 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; +; SCOP-NOT: Scalar: 1 +; SCOP-NOT: ReadAccess +; +; Verify the original region is untouched but all computation is moved to the +; only place it is needed in the generated region. +; +; CHECK: for.body.f: +; CHECK-NEXT: %idxprom6 = sext i32 %i.0 to i64 +; CHECK-NEXT: %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %idxprom6 +; CHECK-NEXT: store i32 %add5, i32* %arrayidx7, align 4 +; +; CHECK: polly.stmt.for.body.f: +; CHECK: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar +; CHECK: %0 = trunc i64 %polly.indvar to i32 +; CHECK: %1 = shl i32 %0, 4 +; CHECK: store i32 %1, i32* %scevgep +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int a = i * 2; +; /* split BB */ +; int b = a + a; +; /* split BB */ +; int c = b + a; +; /* split BB */ +; int d = a + a; +; /* split BB */ +; int e = a + b + c + d; +; /* split BB */ +; A[i] = e; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, 1000 + br i1 %cmp, label %for.body.a, label %for.end + +for.body.a: ; preds = %for.cond + %tmp = mul nsw i32 %i.0, 2 + br label %for.body.b + +for.body.b: + %add = add nsw i32 %tmp, %tmp + br label %for.body.c + +for.body.c: + %add1 = add nsw i32 %add, %tmp + br label %for.body.d + +for.body.d: + %add2 = add nsw i32 %tmp, %tmp + br label %for.body.e + +for.body.e: + %add3 = add nsw i32 %tmp, %add + %add4 = add nsw i32 %add3, %add1 + %add5 = add nsw i32 %add4, %add2 + br label %for.body.f + +for.body.f: + %idxprom6 = sext i32 %i.0 to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %idxprom6 + store i32 %add5, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/eliminate-scalars-with-outside-load.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/eliminate-scalars-with-outside-load.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN +; +; Verify that we will virtually move %mul but also the read of %tmp to the +; for.body.split block. +; +; TODO: Remove read only statements +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_tmp[] }; +; CHECK-NOT: Access +; CHECK: Stmt_for_body_split +; CHECK-NOT: MemRef_mul +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: { Stmt_for_body_split[i0] -> MemRef_tmp[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_split[i0] -> MemRef_A[i0] }; +; +; CODEGEN: polly.stmt.for.body.split: +; CODEGEN-NEXT: %p_mul1 = fmul float %tmp, 2.000000e+00 +; CODEGEN-NEXT: %scevgep = getelementptr float, float* %A, i64 %polly.indvar +; CODEGEN-NEXT: store float %p_mul1, +; +; void f(float *A) { +; float x = A[-1]; +; for (int i = 0; i < 1000; i++) { +; float a = x * 2; +; /* split BB */ +; A[i] = a; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* %A) { +entry: + %arrayidx = getelementptr inbounds float, float* %A, i64 -1 + %tmp = load float, float* %arrayidx, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %mul = fmul float %tmp, 2.000000e+00 + br label %for.body.split + +for.body.split: ; preds = %for.cond + %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv + store float %mul, float* %arrayidx1, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll @@ -14,12 +14,14 @@ ; CHECK-LABEL: polly.stmt.loop: ; CHECK-NEXT: %polly.subregion.iv = phi i32 [ 0, %polly.stmt.loop.entry ] -; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00 +; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 +; CHECK-NEXT: %p_val11 = fadd float 1.000000e+00, 2.000000e+00 +; CHECK-NEXT: %p_val22 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: store float %p_val0, float* %merge.phiops -; CHECK-NEXT: store float %p_val1, float* %val1.s2a -; CHECK-NEXT: store float %p_val2, float* %val2.s2a +; CHECK-NEXT: store float %p_val11, float* %val1.s2a +; CHECK-NEXT: store float %p_val22, float* %val2.s2a ; FIXME -> The last two writes are not really needed and can be dropped if the ; incoming block of the PHI and the value that is used share the same @@ -29,13 +31,17 @@ br i1 %cond1, label %branch2, label %backedge ; CHECK-LABEL: polly.stmt.branch1: -; CHECK-NEXT: store float %p_val1, float* %merge.phiops +; CHECK-NEXT: %p_val13 = fadd float 1.000000e+00, 2.000000e+00 +; CHECK-NEXT: %p_val24 = fadd float 1.000000e+00, 2.000000e+00 +; CHECK-NEXT: store float %p_val13, float* %merge.phiops branch2: br label %backedge ; CHECK-LABEL: polly.stmt.branch2: -; CHECK-NEXT: store float %p_val2, float* %merge.phiops +; CHECK-NEXT: %p_val15 = fadd float 1.000000e+00, 2.000000e+00 +; CHECK-NEXT: %p_val26 = fadd float 1.000000e+00, 2.000000e+00 +; CHECK-NEXT: store float %p_val26, float* %merge.phiops backedge: %merge = phi float [%val0, %loop], [%val1, %branch1], [%val2, %branch2] Index: test/Isl/CodeGen/srem-in-other-bb.ll =================================================================== --- test/Isl/CodeGen/srem-in-other-bb.ll +++ test/Isl/CodeGen/srem-in-other-bb.ll @@ -1,5 +1,4 @@ -; RUN: opt %loadPolly -polly-codegen -S \ -; RUN: < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s ; ; void pos(float *A, long n) { ; for (long i = 0; i < 100; i++) @@ -7,8 +6,8 @@ ; } ; ; CHECK: polly.stmt.bb3: -; CHECK: %p_tmp.moved.to.bb3 = srem i64 %n, 42 -; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %p_tmp.moved.to.bb3 +; CHECK: %[[rem:[._a-zA-Z0-9]*]] = srem i64 %n, 42 +; CHECK: getelementptr inbounds float, float* %A, i64 %[[rem]] define void @pos(float* %A, i64 %n) { bb: Index: test/ScopInfo/eliminate-scalar-caused-by-load-reduction-2.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-caused-by-load-reduction-2.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN +; +; This is a negative test. We should move the load to the split block +; and remove all scalar accesses, however at the moment we only move +; instructions that are trivially safe to move. All three checks should +; be negated at some point. This also checks that we currently not try to +; move a part of the scalar operand chain, i.e., the %add instruction. +; +; CHECK: Scalar: 1 +; CHECK: Scalar: 1 +; CHECK-NOT: Reduction: + +; +; These checks should stay as they verify we did not modify the original region: +; +; CODEGEN: for.body.split: +; CODEGEN-NEXT: %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv +; CODEGEN-NEXT: store i32 %add, i32* %arrayidx2, align 4 +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[i] + 3; +; /* split BB */ +; A[i] = x; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 3 + br label %for.body.split + +for.body.split: + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-caused-by-load-reduction.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-caused-by-load-reduction.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; This is a negative test. We should move the load to the split block +; and remove all scalar accesses, however at the moment we only move +; instructions that are trivially safe to move. All three checks should +; be negated at some point. +; +; CHECK: Scalar: 1 +; CHECK: Scalar: 1 +; CHECK-NOT: Reduction: + +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[i]; +; /* split BB */ +; A[i] = x + 3; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + br label %for.body.split + +for.body.split: + %add = add nsw i32 %tmp, 3 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/independent-blocks-never-stop-on-big-scop.ll =================================================================== --- test/ScopInfo/independent-blocks-never-stop-on-big-scop.ll +++ /dev/null @@ -1,199 +0,0 @@ -; RUN: opt %loadPolly -polly-independent < %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -define i32 @main() nounwind uwtable readnone { - %arr = alloca [100 x i32], align 16 - br label %1 - -;