Index: polly/trunk/include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- polly/trunk/include/polly/CodeGen/IslNodeBuilder.h +++ polly/trunk/include/polly/CodeGen/IslNodeBuilder.h @@ -16,6 +16,7 @@ #include "polly/CodeGen/BlockGenerators.h" #include "polly/CodeGen/IslExprBuilder.h" #include "polly/CodeGen/LoopGenerators.h" +#include "polly/ScopInfo.h" #include "isl/ctx.h" #include "isl/union_map.h" @@ -222,6 +223,13 @@ Value *preloadInvariantLoad(const MemoryAccess &MA, __isl_take isl_set *Domain); + /// @brief Preload the invariant access equivalence class @p IAClass + /// + /// This function will preload the representing load from @p IAClass and + /// map all members of @p IAClass to that preloaded value, potentially casted + /// to the required type. + void preloadInvariantEquivClass(const InvariantEquivClassTy &IAClass); + void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -677,17 +677,19 @@ /// @brief Ordered list type to hold accesses. using MemoryAccessList = std::forward_list; -/// @brief Type for invariant memory accesses and their domain context. -using InvariantAccessTy = std::pair; - -/// @brief Type for an ordered list of invariant accesses. -using InvariantAccessListTy = std::forward_list; - -/// @brief Type for a class of equivalent invariant memory accesses. -using InvariantEquivClassTy = std::pair; +/// @brief Type for equivalent invariant accesses and their domain context. +/// +/// The first element is the SCEV for the pointer/location that identifies this +/// equivalence class. The second is a list of memory accesses to that location +/// that are now treated as invariant and hoisted during code generation. The +/// last element is the execution context under which the invariant memory +/// location is accessed, hence the union of all domain contexts for the memory +/// accesses in the list. +using InvariantEquivClassTy = + std::tuple; -/// @brief Type for multiple invariant memory accesses and their domain context. -using InvariantAccessesTy = SmallVector; +/// @brief Type for invariant accesses equivalence classes. +using InvariantEquivClassesTy = SmallVector; ///===----------------------------------------------------------------------===// /// @brief Statement of the Scop @@ -916,12 +918,11 @@ /// @brief Add @p Access to this statement's list of accesses. void addAccess(MemoryAccess *Access); - /// @brief Move the memory access in @p InvMAs to @p InvariantEquivClasses. + /// @brief Remove the memory access in @p InvMAs. /// /// Note that scalar accesses that are caused by any access in @p InvMAs will /// be eliminated too. - void hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &InvariantEquivClasses); + void removeMemoryAccesses(MemoryAccessList &InvMAs); typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; @@ -1144,8 +1145,12 @@ /// group to ensure the SCoP is executed in an alias free environment. MinMaxVectorPairVectorTy MinMaxAliasGroups; + /// @brief Mapping from invariant loads to the representing invariant load of + /// their equivalence class. + ValueToValueMap InvEquivClassVMap; + /// @brief List of invariant accesses. - InvariantAccessesTy InvariantEquivClasses; + InvariantEquivClassesTy InvariantEquivClasses; /// @brief Scop constructor; invoked from ScopInfo::buildScop. Scop(Region &R, AccFuncMapType &AccFuncMap, ScopDetection &SD, @@ -1229,6 +1234,9 @@ /// Required inv. loads: LB[0], LB[1], (V, if it may alias with A or LB) void hoistInvariantLoads(); + /// @brief Add invariant loads listed in @p InvMAs with the domain of @p Stmt. + void addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs); + /// @brief Build the Context of the Scop. void buildContext(); @@ -1255,7 +1263,7 @@ /// @param S The SCEV to normalize. /// /// @return The representing SCEV for invariant loads or @p S if none. - const SCEV *getRepresentingInvariantLoadSCEV(const SCEV *S) const; + const SCEV *getRepresentingInvariantLoadSCEV(const SCEV *S); /// @brief Create a new SCoP statement for either @p BB or @p R. /// @@ -1349,7 +1357,7 @@ /// @param Parameter A SCEV that was recognized as a Parameter. /// /// @return The corresponding isl_id or NULL otherwise. - isl_id *getIdForParam(const SCEV *Parameter) const; + isl_id *getIdForParam(const SCEV *Parameter); /// @name Parameter Iterators /// @@ -1375,8 +1383,11 @@ /// @return The maximum depth of the loop. inline unsigned getMaxLoopDepth() const { return MaxLoopDepth; } + /// @brief Return the invariant equivalence class for @p Val if any. + const InvariantEquivClassTy *lookupInvariantEquivClass(Value *Val) const; + /// @brief Return the set of invariant accesses. - const InvariantAccessesTy &getInvariantAccesses() const { + const InvariantEquivClassesTy &getInvariantAccesses() const { return InvariantEquivClasses; } Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -1384,8 +1384,7 @@ void ScopStmt::dump() const { print(dbgs()); } -void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &InvariantEquivClasses) { +void ScopStmt::removeMemoryAccesses(MemoryAccessList &InvMAs) { // Remove all memory accesses in @p InvMAs from this statement together // with all scalar accesses that were caused by them. The tricky iteration @@ -1410,79 +1409,6 @@ InstructionToAccess.erase(MA->getAccessInstruction()); delete &MAL; } - - // Get the context under which this statement, hence the memory accesses, are - // executed. - isl_set *DomainCtx = isl_set_params(getDomain()); - DomainCtx = isl_set_remove_redundancies(DomainCtx); - DomainCtx = isl_set_detect_equalities(DomainCtx); - DomainCtx = isl_set_coalesce(DomainCtx); - - Scop &S = *getParent(); - ScalarEvolution &SE = *S.getSE(); - - // Project out all parameters that relate to loads in this statement that - // we will hoist. Otherwise we would have cyclic dependences on the - // constraints under which the hoisted loads are executed and we could not - // determine an order in which to preload them. This happens because not only - // lower bounds are part of the domain but also upper bounds. - for (MemoryAccess *MA : InvMAs) { - Instruction *AccInst = MA->getAccessInstruction(); - if (SE.isSCEVable(AccInst->getType())) { - isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); - if (ParamId) { - int Dim = isl_set_find_dim_by_id(DomainCtx, isl_dim_param, ParamId); - DomainCtx = isl_set_eliminate(DomainCtx, isl_dim_param, Dim, 1); - } - isl_id_free(ParamId); - } - } - - for (MemoryAccess *MA : InvMAs) { - - // Check for another invariant access that accesses the same location as - // MA and if found consolidate them. Otherwise create a new equivalence - // class at the end of InvariantEquivClasses. - LoadInst *LInst = cast(MA->getAccessInstruction()); - const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); - bool Consolidated = false; - - for (auto &IAClass : InvariantEquivClasses) { - const SCEV *ClassPointerSCEV = IAClass.first; - if (PointerSCEV != ClassPointerSCEV) - continue; - - Consolidated = true; - - // We created empty equivalence classes for required invariant loads - // in the beginning and might encounter one of them here. If so, this - // MA will be the first in that equivalence class. - auto &ClassList = IAClass.second; - if (ClassList.empty()) { - ClassList.push_front(std::make_pair(MA, isl_set_copy(DomainCtx))); - break; - } - - // If the equivalence class for MA is not empty we unify the execution - // context and add MA to the list of accesses that are in this class. - isl_set *IAClassDomainCtx = IAClass.second.front().second; - IAClassDomainCtx = - isl_set_union(IAClassDomainCtx, isl_set_copy(DomainCtx)); - ClassList.push_front(std::make_pair(MA, IAClassDomainCtx)); - break; - } - - if (Consolidated) - continue; - - // If we did not consolidate MA, thus did not find an equivalence class - // that for it, we create a new one. - InvariantAccessTy IA = std::make_pair(MA, isl_set_copy(DomainCtx)); - InvariantEquivClasses.emplace_back(InvariantEquivClassTy( - std::make_pair(PointerSCEV, InvariantAccessListTy({IA})))); - } - - isl_set_free(DomainCtx); } //===----------------------------------------------------------------------===// @@ -1494,25 +1420,8 @@ Context = NewContext; } -const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *S) const { - const SCEVUnknown *SU = dyn_cast_or_null(S); - if (!SU) - return S; - - LoadInst *LInst = dyn_cast(SU->getValue()); - if (!LInst) - return S; - - // Try to find an equivalence class for the load, if found return - // the SCEV for the representing element, otherwise return S. - const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); - for (const InvariantEquivClassTy &IAClass : InvariantEquivClasses) { - const SCEV *ClassPointerSCEV = IAClass.first; - if (ClassPointerSCEV == PointerSCEV) - return ClassPointerSCEV; - } - - return S; +const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *S) { + return SCEVParameterRewriter::rewrite(S, *SE, InvEquivClassVMap); } void Scop::addParams(std::vector NewParameters) { @@ -1532,7 +1441,7 @@ } } -__isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) const { +__isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) { // Normalize the SCEV to get the representing element for an invariant load. Parameter = getRepresentingInvariantLoadSCEV(Parameter); @@ -1614,17 +1523,17 @@ } void Scop::buildInvariantEquivalenceClasses() { + DenseMap EquivClasses; + const InvariantLoadsSetTy &RIL = *SD.getRequiredInvariantLoads(&getRegion()); - SmallPtrSet ClassPointerSet; for (LoadInst *LInst : RIL) { const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); - // Skip the load if we already have a equivalence class for the pointer. - if (!ClassPointerSet.insert(PointerSCEV).second) - continue; - - InvariantEquivClasses.emplace_back(InvariantEquivClassTy( - std::make_pair(PointerSCEV, InvariantAccessListTy()))); + LoadInst *&ClassRep = EquivClasses[PointerSCEV]; + if (!ClassRep) + ClassRep = LInst; + else + InvEquivClassVMap[LInst] = ClassRep; } } @@ -2504,8 +2413,7 @@ } for (const auto &IAClass : InvariantEquivClasses) - if (!IAClass.second.empty()) - isl_set_free(IAClass.second.front().second); + isl_set_free(std::get<2>(IAClass)); } void Scop::updateAccessDimensionality() { @@ -2540,6 +2448,84 @@ } } +const InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) const { + LoadInst *LInst = dyn_cast(Val); + if (!LInst) + return nullptr; + + if (Value *Rep = InvEquivClassVMap.lookup(LInst)) + LInst = cast(Rep); + + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + for (auto &IAClass : InvariantEquivClasses) + if (PointerSCEV == std::get<0>(IAClass)) + return &IAClass; + + return nullptr; +} + +void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { + + // Get the context under which the statement is executed. + isl_set *DomainCtx = isl_set_params(Stmt.getDomain()); + DomainCtx = isl_set_remove_redundancies(DomainCtx); + DomainCtx = isl_set_detect_equalities(DomainCtx); + DomainCtx = isl_set_coalesce(DomainCtx); + + // Project out all parameters that relate to loads in the statement. Otherwise + // we could have cyclic dependences on the constraints under which the + // hoisted loads are executed and we could not determine an order in which to + // pre-load them. This happens because not only lower bounds are part of the + // domain but also upper bounds. + for (MemoryAccess *MA : InvMAs) { + Instruction *AccInst = MA->getAccessInstruction(); + if (SE->isSCEVable(AccInst->getType())) { + isl_id *ParamId = getIdForParam(SE->getSCEV(AccInst)); + if (ParamId) { + int Dim = isl_set_find_dim_by_id(DomainCtx, isl_dim_param, ParamId); + DomainCtx = isl_set_eliminate(DomainCtx, isl_dim_param, Dim, 1); + } + isl_id_free(ParamId); + } + } + + for (MemoryAccess *MA : InvMAs) { + // Check for another invariant access that accesses the same location as + // MA and if found consolidate them. Otherwise create a new equivalence + // class at the end of InvariantEquivClasses. + LoadInst *LInst = cast(MA->getAccessInstruction()); + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + + bool Consolidated = false; + for (auto &IAClass : InvariantEquivClasses) { + if (PointerSCEV != std::get<0>(IAClass)) + continue; + + Consolidated = true; + + // Add MA to the list of accesses that are in this class. + auto &MAs = std::get<1>(IAClass); + MAs.push_front(MA); + + // Unify the execution context of the class and this statement. + isl_set *&IAClassDomainCtx = std::get<2>(IAClass); + IAClassDomainCtx = isl_set_coalesce( + isl_set_union(IAClassDomainCtx, isl_set_copy(DomainCtx))); + break; + } + + if (Consolidated) + continue; + + // If we did not consolidate MA, thus did not find an equivalence class + // for it, we create a new one. + InvariantEquivClasses.emplace_back(PointerSCEV, MemoryAccessList{MA}, + isl_set_copy(DomainCtx)); + } + + isl_set_free(DomainCtx); +} + void Scop::hoistInvariantLoads() { isl_union_map *Writes = getWrites(); for (ScopStmt &Stmt : *this) { @@ -2594,7 +2580,8 @@ InvMAs.reverse(); // Transfer the memory access from the statement to the SCoP. - Stmt.hoistMemoryAccesses(InvMAs, InvariantEquivClasses); + Stmt.removeMemoryAccesses(InvMAs); + addInvariantLoads(Stmt, InvMAs); isl_set_free(Domain); } @@ -2617,67 +2604,6 @@ return; } } - - // We want invariant accesses to be sorted in a "natural order" because there - // might be dependences between invariant loads. These can be caused by - // indirect loads but also because an invariant load is only conditionally - // executed and the condition is dependent on another invariant load. As we - // want to do code generation in a straight forward way, e.g., preload the - // accesses in the list one after another, we sort them such that the - // preloaded values needed in the conditions will always be in front. Before - // we already ordered the accesses such that indirect loads can be resolved, - // thus we use a stable sort here. - - auto compareInvariantAccesses = [this]( - const InvariantEquivClassTy &IAClass0, - const InvariantEquivClassTy &IAClass1) { - const InvariantAccessTy &IA0 = IAClass0.second.front(); - const InvariantAccessTy &IA1 = IAClass1.second.front(); - - Instruction *AI0 = IA0.first->getAccessInstruction(); - Instruction *AI1 = IA1.first->getAccessInstruction(); - - const SCEV *S0 = - SE->isSCEVable(AI0->getType()) ? SE->getSCEV(AI0) : nullptr; - const SCEV *S1 = - SE->isSCEVable(AI1->getType()) ? SE->getSCEV(AI1) : nullptr; - - isl_id *Id0 = getIdForParam(S0); - isl_id *Id1 = getIdForParam(S1); - - if (Id0 && !Id1) { - isl_id_free(Id0); - isl_id_free(Id1); - return true; - } - - if (!Id0) { - isl_id_free(Id0); - isl_id_free(Id1); - return false; - } - - assert(Id0 && Id1); - - isl_set *Dom0 = IA0.second; - isl_set *Dom1 = IA1.second; - - int Dim0 = isl_set_find_dim_by_id(Dom0, isl_dim_param, Id0); - - bool Involves1Id0 = isl_set_involves_dims(Dom1, isl_dim_param, Dim0, 1); - assert(!Involves1Id0 || - !isl_set_involves_dims( - Dom0, isl_dim_param, - isl_set_find_dim_by_id(Dom0, isl_dim_param, Id1), 1)); - - isl_id_free(Id0); - isl_id_free(Id1); - - return Involves1Id0; - }; - - std::stable_sort(InvariantEquivClasses.begin(), InvariantEquivClasses.end(), - compareInvariantAccesses); } const ScopArrayInfo * @@ -2862,12 +2788,12 @@ OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; OS.indent(4) << "Invariant Accesses: {\n"; for (const auto &IAClass : InvariantEquivClasses) { - if (IAClass.second.empty()) { - OS.indent(12) << "Class Pointer: " << IAClass.first << "\n"; + const auto &MAs = std::get<1>(IAClass); + if (MAs.empty()) { + OS.indent(12) << "Class Pointer: " << *std::get<0>(IAClass) << "\n"; } else { - IAClass.second.front().first->print(OS); - OS.indent(12) << "Execution Context: " << IAClass.second.front().second - << "\n"; + MAs.front()->print(OS); + OS.indent(12) << "Execution Context: " << std::get<2>(IAClass) << "\n"; } } OS.indent(4) << "}\n"; Index: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp +++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp @@ -818,7 +818,19 @@ void IslNodeBuilder::materializeValue(isl_id *Id) { // If the Id is already mapped, skip it. if (!IDToValue.count(Id)) { - auto V = generateSCEV((const SCEV *)isl_id_get_user(Id)); + auto *ParamSCEV = (const SCEV *)isl_id_get_user(Id); + + // Parameters could refere to invariant loads that need to be + // preloaded before we can generate code for the parameter. Thus, + // check if any value refered to in ParamSCEV is an invariant load + // and if so make sure its equivalence class is preloaded. + SetVector Values; + findValues(ParamSCEV, Values); + for (auto *Val : Values) + if (const auto *IAClass = S.lookupInvariantEquivClass(Val)) + preloadInvariantEquivClass(*IAClass); + + auto *V = generateSCEV(ParamSCEV); IDToValue[Id] = V; } @@ -910,53 +922,60 @@ return PreloadVal; } -void IslNodeBuilder::preloadInvariantLoads() { - - const auto &InvariantEquivClasses = S.getInvariantAccesses(); - if (InvariantEquivClasses.empty()) - return; - - const Region &R = S.getRegion(); - BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); - - BasicBlock *PreLoadBB = - SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); - PreLoadBB->setName("polly.preload.begin"); - Builder.SetInsertPoint(PreLoadBB->begin()); - - // For each equivalence class of invariant loads we pre-load the representing +void IslNodeBuilder::preloadInvariantEquivClass( + const InvariantEquivClassTy &IAClass) { + // For an equivalence class of invariant loads we pre-load the representing // element with the unified execution context. However, we have to map all // elements of the class to the one preloaded load as they are referenced // during the code generation and therefor need to be mapped. - for (const auto &IAClass : InvariantEquivClasses) { + const MemoryAccessList &MAs = std::get<1>(IAClass); + assert(!MAs.empty()); + MemoryAccess *MA = MAs.front(); + assert(MA->isExplicit() && MA->isRead()); + + // If the access function was already mapped, the preload of this equivalence + // class was triggered earlier already and doesn't need to be done again. + if (ValueMap.count(MA->getAccessInstruction())) + return; - MemoryAccess *MA = IAClass.second.front().first; - assert(!MA->isImplicit()); + Instruction *AccInst = MA->getAccessInstruction(); + Type *AccInstTy = AccInst->getType(); - isl_set *Domain = isl_set_copy(IAClass.second.front().second); - Instruction *AccInst = MA->getAccessInstruction(); - Value *PreloadVal = preloadInvariantLoad(*MA, Domain); - for (const InvariantAccessTy &IA : IAClass.second) { - Instruction *AccInst = IA.first->getAccessInstruction(); - ValueMap[AccInst] = - Builder.CreateBitOrPointerCast(PreloadVal, AccInst->getType()); - } + isl_set *Domain = isl_set_copy(std::get<2>(IAClass)); + Value *PreloadVal = preloadInvariantLoad(*MA, Domain); + assert(PreloadVal->getType() == AccInst->getType()); + for (const MemoryAccess *MA : MAs) { + Instruction *MAAccInst = MA->getAccessInstruction(); + ValueMap[MAAccInst] = + Builder.CreateBitOrPointerCast(PreloadVal, MAAccInst->getType()); + } + + if (SE.isSCEVable(AccInstTy)) { + isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); + if (ParamId) + IDToValue[ParamId] = PreloadVal; + isl_id_free(ParamId); + } + + auto *SAI = S.getScopArrayInfo(MA->getBaseAddr()); + for (auto *DerivedSAI : SAI->getDerivedSAIs()) { + Value *BasePtr = DerivedSAI->getBasePtr(); + BasePtr = Builder.CreateBitOrPointerCast(PreloadVal, BasePtr->getType()); + DerivedSAI->setBasePtr(BasePtr); + } - if (SE.isSCEVable(AccInst->getType())) { - isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); - if (ParamId) - IDToValue[ParamId] = PreloadVal; - isl_id_free(ParamId); - } + BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); + auto *Alloca = new AllocaInst(AccInstTy, AccInst->getName() + ".preload.s2a"); + Alloca->insertBefore(EntryBB->getFirstInsertionPt()); + Builder.CreateStore(PreloadVal, Alloca); - auto *SAI = S.getScopArrayInfo(MA->getBaseAddr()); - for (auto *DerivedSAI : SAI->getDerivedSAIs()) - DerivedSAI->setBasePtr(PreloadVal); + const Region &R = S.getRegion(); + for (const MemoryAccess *MA : MAs) { - // Use the escape system to get the correct value to users outside - // the SCoP. + Instruction *MAAccInst = MA->getAccessInstruction(); + // Use the escape system to get the correct value to users outside the SCoP. BlockGenerator::EscapeUserVectorTy EscapeUsers; - for (auto *U : AccInst->users()) + for (auto *U : MAAccInst->users()) if (Instruction *UI = dyn_cast(U)) if (!R.contains(UI)) EscapeUsers.push_back(UI); @@ -964,15 +983,26 @@ if (EscapeUsers.empty()) continue; - auto *Ty = AccInst->getType(); - auto *Alloca = new AllocaInst(Ty, AccInst->getName() + ".preload.s2a"); - Alloca->insertBefore(EntryBB->getFirstInsertionPt()); - Builder.CreateStore(PreloadVal, Alloca); - - EscapeMap[AccInst] = std::make_pair(Alloca, std::move(EscapeUsers)); + EscapeMap[MA->getAccessInstruction()] = + std::make_pair(Alloca, std::move(EscapeUsers)); } } +void IslNodeBuilder::preloadInvariantLoads() { + + const auto &InvariantEquivClasses = S.getInvariantAccesses(); + if (InvariantEquivClasses.empty()) + return; + + BasicBlock *PreLoadBB = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); + PreLoadBB->setName("polly.preload.begin"); + Builder.SetInsertPoint(PreLoadBB->begin()); + + for (const auto &IAClass : InvariantEquivClasses) + preloadInvariantEquivClass(IAClass); +} + void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { // Materialize values for the parameters of the SCoP. Index: polly/trunk/test/Isl/CodeGen/inv-load-lnt-crash-wrong-order-2.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/inv-load-lnt-crash-wrong-order-2.ll +++ polly/trunk/test/Isl/CodeGen/inv-load-lnt-crash-wrong-order-2.ll @@ -0,0 +1,91 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; +; This crashed our codegen at some point, verify it runs through +; +; CHECK: polly.start +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.EqState.41.74.107.272.602.635.701.734.767.899.998.1229.2449.2482.2647.2680.2779.2911.3010.3036 = type { %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035*, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018*, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035* } +%struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018 = type { i32, %struct.Production.29.62.95.260.590.623.689.722.755.887.986.1217.2437.2470.2635.2668.2767.2899.2998.3014*, i32, i32, i32, i32, %struct.anon.0.30.63.96.261.591.624.690.723.756.888.987.1218.2438.2471.2636.2669.2768.2900.2999.3015, %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021*, %struct.Code.31.64.97.262.592.625.691.724.757.889.988.1219.2439.2472.2637.2670.2769.2901.3000.3016, %struct.Code.31.64.97.262.592.625.691.724.757.889.988.1219.2439.2472.2637.2670.2769.2901.3000.3016, %struct.anon.1.32.65.98.263.593.626.692.725.758.890.989.1220.2440.2473.2638.2671.2770.2902.3001.3017, i32, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018* } +%struct.Production.29.62.95.260.590.623.689.722.755.887.986.1217.2437.2470.2635.2668.2767.2899.2998.3014 = type { i8*, i32, %struct.anon.9.42.75.240.570.603.669.702.735.867.966.1197.2417.2450.2615.2648.2747.2879.2978.3011, i32, i8, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018*, [8 x %struct.Production.29.62.95.260.590.623.689.722.755.887.986.1217.2437.2470.2635.2668.2767.2899.2998.3014*], [8 x %struct.Declaration.13.46.79.244.574.607.673.706.739.871.970.1201.2421.2454.2619.2652.2751.2883.2982.3012*], %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035*, %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021*, %struct.Term.18.51.84.249.579.612.678.711.744.876.975.1206.2426.2459.2624.2657.2756.2888.2987.3013*, %struct.Production.29.62.95.260.590.623.689.722.755.887.986.1217.2437.2470.2635.2668.2767.2899.2998.3014* } +%struct.anon.9.42.75.240.570.603.669.702.735.867.966.1197.2417.2450.2615.2648.2747.2879.2978.3011 = type { i32, i32, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018**, [3 x %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018*] } +%struct.Declaration.13.46.79.244.574.607.673.706.739.871.970.1201.2421.2454.2619.2652.2751.2883.2982.3012 = type { %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021*, i32, i32 } +%struct.Term.18.51.84.249.579.612.678.711.744.876.975.1206.2426.2459.2624.2657.2756.2888.2987.3013 = type { i32, i32, i32, i32, i32, i8*, i32, i8, %struct.Production.29.62.95.260.590.623.689.722.755.887.986.1217.2437.2470.2635.2668.2767.2899.2998.3014* } +%struct.anon.0.30.63.96.261.591.624.690.723.756.888.987.1218.2438.2471.2636.2669.2768.2900.2999.3015 = type { i32, i32, %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021**, [3 x %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021*] } +%struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021 = type { i32, i32, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018*, %union.anon.11.44.77.242.572.605.671.704.737.869.968.1199.2419.2452.2617.2650.2749.2881.2980.3020 } +%union.anon.11.44.77.242.572.605.671.704.737.869.968.1199.2419.2452.2617.2650.2749.2881.2980.3020 = type { %struct.Unresolved.10.43.76.241.571.604.670.703.736.868.967.1198.2418.2451.2616.2649.2748.2880.2979.3019 } +%struct.Unresolved.10.43.76.241.571.604.670.703.736.868.967.1198.2418.2451.2616.2649.2748.2880.2979.3019 = type { i8*, i32 } +%struct.Code.31.64.97.262.592.625.691.724.757.889.988.1219.2439.2472.2637.2670.2769.2901.3000.3016 = type { i8*, i32 } +%struct.anon.1.32.65.98.263.593.626.692.725.758.890.989.1220.2440.2473.2638.2671.2770.2902.3001.3017 = type { i32, i32, %struct.Code.31.64.97.262.592.625.691.724.757.889.988.1219.2439.2472.2637.2670.2769.2901.3000.3016**, [3 x %struct.Code.31.64.97.262.592.625.691.724.757.889.988.1219.2439.2472.2637.2670.2769.2901.3000.3016*] } +%struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035 = type { i32, i64, %struct.anon.2.14.47.80.245.575.608.674.707.740.872.971.1202.2422.2455.2620.2653.2752.2884.2983.3022, %struct.anon.3.15.48.81.246.576.609.675.708.741.873.972.1203.2423.2456.2621.2654.2753.2885.2984.3023, %struct.VecGoto.17.50.83.248.578.611.677.710.743.875.974.1205.2425.2458.2623.2656.2755.2887.2986.3025, %struct.VecAction.20.53.86.251.581.614.680.713.746.878.977.1208.2428.2461.2626.2659.2758.2890.2989.3027, %struct.VecAction.20.53.86.251.581.614.680.713.746.878.977.1208.2428.2461.2626.2659.2758.2890.2989.3027, %struct.VecHint.22.55.88.253.583.616.682.715.748.880.979.1210.2430.2463.2628.2661.2760.2892.2991.3029, %struct.VecHint.22.55.88.253.583.616.682.715.748.880.979.1210.2430.2463.2628.2661.2760.2892.2991.3029, %struct.Scanner.27.60.93.258.588.621.687.720.753.885.984.1215.2435.2468.2633.2666.2765.2897.2996.3034, i8, i8*, i32, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035*, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035*, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018*, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018* } +%struct.anon.2.14.47.80.245.575.608.674.707.740.872.971.1202.2422.2455.2620.2653.2752.2884.2983.3022 = type { i32, i32, %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021**, [3 x %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021*] } +%struct.anon.3.15.48.81.246.576.609.675.708.741.873.972.1203.2423.2456.2621.2654.2753.2885.2984.3023 = type { i32, i32, %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021**, [3 x %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021*] } +%struct.VecGoto.17.50.83.248.578.611.677.710.743.875.974.1205.2425.2458.2623.2656.2755.2887.2986.3025 = type { i32, i32, %struct.Goto.16.49.82.247.577.610.676.709.742.874.973.1204.2424.2457.2622.2655.2754.2886.2985.3024**, [3 x %struct.Goto.16.49.82.247.577.610.676.709.742.874.973.1204.2424.2457.2622.2655.2754.2886.2985.3024*] } +%struct.Goto.16.49.82.247.577.610.676.709.742.874.973.1204.2424.2457.2622.2655.2754.2886.2985.3024 = type { %struct.Elem.12.45.78.243.573.606.672.705.738.870.969.1200.2420.2453.2618.2651.2750.2882.2981.3021*, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035* } +%struct.VecAction.20.53.86.251.581.614.680.713.746.878.977.1208.2428.2461.2626.2659.2758.2890.2989.3027 = type { i32, i32, %struct.Action.19.52.85.250.580.613.679.712.745.877.976.1207.2427.2460.2625.2658.2757.2889.2988.3026**, [3 x %struct.Action.19.52.85.250.580.613.679.712.745.877.976.1207.2427.2460.2625.2658.2757.2889.2988.3026*] } +%struct.Action.19.52.85.250.580.613.679.712.745.877.976.1207.2427.2460.2625.2658.2757.2889.2988.3026 = type { i32, %struct.Term.18.51.84.249.579.612.678.711.744.876.975.1206.2426.2459.2624.2657.2756.2888.2987.3013*, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018*, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035*, i32, i8* } +%struct.VecHint.22.55.88.253.583.616.682.715.748.880.979.1210.2430.2463.2628.2661.2760.2892.2991.3029 = type { i32, i32, %struct.Hint.21.54.87.252.582.615.681.714.747.879.978.1209.2429.2462.2627.2660.2759.2891.2990.3028**, [3 x %struct.Hint.21.54.87.252.582.615.681.714.747.879.978.1209.2429.2462.2627.2660.2759.2891.2990.3028*] } +%struct.Hint.21.54.87.252.582.615.681.714.747.879.978.1209.2429.2462.2627.2660.2759.2891.2990.3028 = type { i32, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035*, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018* } +%struct.Scanner.27.60.93.258.588.621.687.720.753.885.984.1215.2435.2468.2633.2666.2765.2897.2996.3034 = type { %struct.VecScanState.25.58.91.256.586.619.685.718.751.883.982.1213.2433.2466.2631.2664.2763.2895.2994.3032, %struct.VecScanStateTransition.26.59.92.257.587.620.686.719.752.884.983.1214.2434.2467.2632.2665.2764.2896.2995.3033 } +%struct.VecScanState.25.58.91.256.586.619.685.718.751.883.982.1213.2433.2466.2631.2664.2763.2895.2994.3032 = type { i32, i32, %struct.ScanState.24.57.90.255.585.618.684.717.750.882.981.1212.2432.2465.2630.2663.2762.2894.2993.3031**, [3 x %struct.ScanState.24.57.90.255.585.618.684.717.750.882.981.1212.2432.2465.2630.2663.2762.2894.2993.3031*] } +%struct.ScanState.24.57.90.255.585.618.684.717.750.882.981.1212.2432.2465.2630.2663.2762.2894.2993.3031 = type { i32, [256 x %struct.ScanState.24.57.90.255.585.618.684.717.750.882.981.1212.2432.2465.2630.2663.2762.2894.2993.3031*], %struct.VecAction.20.53.86.251.581.614.680.713.746.878.977.1208.2428.2461.2626.2659.2758.2890.2989.3027, %struct.VecAction.20.53.86.251.581.614.680.713.746.878.977.1208.2428.2461.2626.2659.2758.2890.2989.3027, [256 x %struct.ScanStateTransition.23.56.89.254.584.617.683.716.749.881.980.1211.2431.2464.2629.2662.2761.2893.2992.3030*] } +%struct.ScanStateTransition.23.56.89.254.584.617.683.716.749.881.980.1211.2431.2464.2629.2662.2761.2893.2992.3030 = type { i32, %struct.VecAction.20.53.86.251.581.614.680.713.746.878.977.1208.2428.2461.2626.2659.2758.2890.2989.3027, %struct.VecAction.20.53.86.251.581.614.680.713.746.878.977.1208.2428.2461.2626.2659.2758.2890.2989.3027 } +%struct.VecScanStateTransition.26.59.92.257.587.620.686.719.752.884.983.1214.2434.2467.2632.2665.2764.2896.2995.3033 = type { i32, i32, %struct.ScanStateTransition.23.56.89.254.584.617.683.716.749.881.980.1211.2431.2464.2629.2662.2761.2893.2992.3030**, [3 x %struct.ScanStateTransition.23.56.89.254.584.617.683.716.749.881.980.1211.2431.2464.2629.2662.2761.2893.2992.3030*] } + +; Function Attrs: nounwind +declare noalias i8* @malloc() #0 + +; Function Attrs: nounwind uwtable +define void @build_eq() #1 { +entry: + %call = tail call noalias i8* @malloc() #2 + %0 = bitcast i8* %call to %struct.EqState.41.74.107.272.602.635.701.734.767.899.998.1229.2449.2482.2647.2680.2779.2911.3010.3036* + br label %for.cond.preheader + +for.cond.preheader: ; preds = %for.cond.preheader, %entry + br i1 undef, label %for.cond.260.preheader, label %for.cond.preheader + +for.cond.260.preheader: ; preds = %for.cond.preheader + br i1 undef, label %for.cond.316.preheader, label %for.body.265 + +for.cond.316.preheader: ; preds = %for.cond.260.preheader + br i1 undef, label %for.cond.400.preheader, label %for.body.321 + +for.body.265: ; preds = %for.cond.260.preheader + unreachable + +for.cond.400.preheader: ; preds = %for.inc.397, %for.cond.316.preheader + ret void + +for.body.321: ; preds = %for.inc.397, %for.cond.316.preheader + %1 = load %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035*, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035** undef, align 8 + %eq329 = getelementptr inbounds %struct.EqState.41.74.107.272.602.635.701.734.767.899.998.1229.2449.2482.2647.2680.2779.2911.3010.3036, %struct.EqState.41.74.107.272.602.635.701.734.767.899.998.1229.2449.2482.2647.2680.2779.2911.3010.3036* %0, i64 0, i32 0 + br i1 undef, label %for.inc.397, label %land.lhs.true.331 + +land.lhs.true.331: ; preds = %for.body.321 + br i1 undef, label %for.inc.397, label %if.then.334 + +if.then.334: ; preds = %land.lhs.true.331 + %2 = load %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018*, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018** undef, align 8 + br i1 undef, label %for.inc.397, label %land.lhs.true.369 + +land.lhs.true.369: ; preds = %if.then.334 + %n380 = getelementptr inbounds %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018, %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018* %2, i64 0, i32 6, i32 0 + %3 = load i32, i32* %n380, align 8 + br i1 true, label %if.then.383, label %for.inc.397 + +if.then.383: ; preds = %land.lhs.true.369 + %reduces_with387 = getelementptr inbounds %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035* %1, i64 0, i32 15 + %4 = bitcast %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018** %reduces_with387 to i64* + %5 = load %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035*, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035** %eq329, align 8 + %index389 = getelementptr inbounds %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035, %struct.State.28.61.94.259.589.622.688.721.754.886.985.1216.2436.2469.2634.2667.2766.2898.2997.3035* %5, i64 0, i32 0 + %6 = load i32, i32* %index389, align 8 + store i32 0, i32* %index389, align 8 + %diff_rule392 = getelementptr inbounds %struct.EqState.41.74.107.272.602.635.701.734.767.899.998.1229.2449.2482.2647.2680.2779.2911.3010.3036, %struct.EqState.41.74.107.272.602.635.701.734.767.899.998.1229.2449.2482.2647.2680.2779.2911.3010.3036* %0, i64 0, i32 1 + %7 = bitcast %struct.Rule.33.66.99.264.594.627.693.726.759.891.990.1221.2441.2474.2639.2672.2771.2903.3002.3018** %diff_rule392 to i64* + br label %for.inc.397 + +for.inc.397: ; preds = %if.then.383, %land.lhs.true.369, %if.then.334, %land.lhs.true.331, %for.body.321 + br i1 undef, label %for.body.321, label %for.cond.400.preheader +} Index: polly/trunk/test/Isl/CodeGen/inv-load-lnt-crash-wrong-order.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/inv-load-lnt-crash-wrong-order.ll +++ polly/trunk/test/Isl/CodeGen/inv-load-lnt-crash-wrong-order.ll @@ -0,0 +1,90 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; +; This crashed our codegen at some point, verify it runs through +; +; CHECK: polly.start +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.EqState.41.74.107.338.503.866.932.965.998.1064.2052.2151.2184.2606 = type { %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605*, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588*, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605* } +%struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588 = type { i32, %struct.Production.29.62.95.326.491.854.920.953.986.1052.2040.2139.2172.2584*, i32, i32, i32, i32, %struct.anon.0.30.63.96.327.492.855.921.954.987.1053.2041.2140.2173.2585, %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591*, %struct.Code.31.64.97.328.493.856.922.955.988.1054.2042.2141.2174.2586, %struct.Code.31.64.97.328.493.856.922.955.988.1054.2042.2141.2174.2586, %struct.anon.1.32.65.98.329.494.857.923.956.989.1055.2043.2142.2175.2587, i32, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588* } +%struct.Production.29.62.95.326.491.854.920.953.986.1052.2040.2139.2172.2584 = type { i8*, i32, %struct.anon.9.42.75.306.471.834.900.933.966.1032.2020.2119.2152.2581, i32, i8, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588*, [8 x %struct.Production.29.62.95.326.491.854.920.953.986.1052.2040.2139.2172.2584*], [8 x %struct.Declaration.13.46.79.310.475.838.904.937.970.1036.2024.2123.2156.2582*], %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605*, %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591*, %struct.Term.18.51.84.315.480.843.909.942.975.1041.2029.2128.2161.2583*, %struct.Production.29.62.95.326.491.854.920.953.986.1052.2040.2139.2172.2584* } +%struct.anon.9.42.75.306.471.834.900.933.966.1032.2020.2119.2152.2581 = type { i32, i32, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588**, [3 x %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588*] } +%struct.Declaration.13.46.79.310.475.838.904.937.970.1036.2024.2123.2156.2582 = type { %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591*, i32, i32 } +%struct.Term.18.51.84.315.480.843.909.942.975.1041.2029.2128.2161.2583 = type { i32, i32, i32, i32, i32, i8*, i32, i8, %struct.Production.29.62.95.326.491.854.920.953.986.1052.2040.2139.2172.2584* } +%struct.anon.0.30.63.96.327.492.855.921.954.987.1053.2041.2140.2173.2585 = type { i32, i32, %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591**, [3 x %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591*] } +%struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591 = type { i32, i32, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588*, %union.anon.11.44.77.308.473.836.902.935.968.1034.2022.2121.2154.2590 } +%union.anon.11.44.77.308.473.836.902.935.968.1034.2022.2121.2154.2590 = type { %struct.Unresolved.10.43.76.307.472.835.901.934.967.1033.2021.2120.2153.2589 } +%struct.Unresolved.10.43.76.307.472.835.901.934.967.1033.2021.2120.2153.2589 = type { i8*, i32 } +%struct.Code.31.64.97.328.493.856.922.955.988.1054.2042.2141.2174.2586 = type { i8*, i32 } +%struct.anon.1.32.65.98.329.494.857.923.956.989.1055.2043.2142.2175.2587 = type { i32, i32, %struct.Code.31.64.97.328.493.856.922.955.988.1054.2042.2141.2174.2586**, [3 x %struct.Code.31.64.97.328.493.856.922.955.988.1054.2042.2141.2174.2586*] } +%struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605 = type { i32, i64, %struct.anon.2.14.47.80.311.476.839.905.938.971.1037.2025.2124.2157.2592, %struct.anon.3.15.48.81.312.477.840.906.939.972.1038.2026.2125.2158.2593, %struct.VecGoto.17.50.83.314.479.842.908.941.974.1040.2028.2127.2160.2595, %struct.VecAction.20.53.86.317.482.845.911.944.977.1043.2031.2130.2163.2597, %struct.VecAction.20.53.86.317.482.845.911.944.977.1043.2031.2130.2163.2597, %struct.VecHint.22.55.88.319.484.847.913.946.979.1045.2033.2132.2165.2599, %struct.VecHint.22.55.88.319.484.847.913.946.979.1045.2033.2132.2165.2599, %struct.Scanner.27.60.93.324.489.852.918.951.984.1050.2038.2137.2170.2604, i8, i8*, i32, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605*, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605*, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588*, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588* } +%struct.anon.2.14.47.80.311.476.839.905.938.971.1037.2025.2124.2157.2592 = type { i32, i32, %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591**, [3 x %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591*] } +%struct.anon.3.15.48.81.312.477.840.906.939.972.1038.2026.2125.2158.2593 = type { i32, i32, %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591**, [3 x %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591*] } +%struct.VecGoto.17.50.83.314.479.842.908.941.974.1040.2028.2127.2160.2595 = type { i32, i32, %struct.Goto.16.49.82.313.478.841.907.940.973.1039.2027.2126.2159.2594**, [3 x %struct.Goto.16.49.82.313.478.841.907.940.973.1039.2027.2126.2159.2594*] } +%struct.Goto.16.49.82.313.478.841.907.940.973.1039.2027.2126.2159.2594 = type { %struct.Elem.12.45.78.309.474.837.903.936.969.1035.2023.2122.2155.2591*, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605* } +%struct.VecAction.20.53.86.317.482.845.911.944.977.1043.2031.2130.2163.2597 = type { i32, i32, %struct.Action.19.52.85.316.481.844.910.943.976.1042.2030.2129.2162.2596**, [3 x %struct.Action.19.52.85.316.481.844.910.943.976.1042.2030.2129.2162.2596*] } +%struct.Action.19.52.85.316.481.844.910.943.976.1042.2030.2129.2162.2596 = type { i32, %struct.Term.18.51.84.315.480.843.909.942.975.1041.2029.2128.2161.2583*, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588*, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605*, i32, i8* } +%struct.VecHint.22.55.88.319.484.847.913.946.979.1045.2033.2132.2165.2599 = type { i32, i32, %struct.Hint.21.54.87.318.483.846.912.945.978.1044.2032.2131.2164.2598**, [3 x %struct.Hint.21.54.87.318.483.846.912.945.978.1044.2032.2131.2164.2598*] } +%struct.Hint.21.54.87.318.483.846.912.945.978.1044.2032.2131.2164.2598 = type { i32, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605*, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588* } +%struct.Scanner.27.60.93.324.489.852.918.951.984.1050.2038.2137.2170.2604 = type { %struct.VecScanState.25.58.91.322.487.850.916.949.982.1048.2036.2135.2168.2602, %struct.VecScanStateTransition.26.59.92.323.488.851.917.950.983.1049.2037.2136.2169.2603 } +%struct.VecScanState.25.58.91.322.487.850.916.949.982.1048.2036.2135.2168.2602 = type { i32, i32, %struct.ScanState.24.57.90.321.486.849.915.948.981.1047.2035.2134.2167.2601**, [3 x %struct.ScanState.24.57.90.321.486.849.915.948.981.1047.2035.2134.2167.2601*] } +%struct.ScanState.24.57.90.321.486.849.915.948.981.1047.2035.2134.2167.2601 = type { i32, [256 x %struct.ScanState.24.57.90.321.486.849.915.948.981.1047.2035.2134.2167.2601*], %struct.VecAction.20.53.86.317.482.845.911.944.977.1043.2031.2130.2163.2597, %struct.VecAction.20.53.86.317.482.845.911.944.977.1043.2031.2130.2163.2597, [256 x %struct.ScanStateTransition.23.56.89.320.485.848.914.947.980.1046.2034.2133.2166.2600*] } +%struct.ScanStateTransition.23.56.89.320.485.848.914.947.980.1046.2034.2133.2166.2600 = type { i32, %struct.VecAction.20.53.86.317.482.845.911.944.977.1043.2031.2130.2163.2597, %struct.VecAction.20.53.86.317.482.845.911.944.977.1043.2031.2130.2163.2597 } +%struct.VecScanStateTransition.26.59.92.323.488.851.917.950.983.1049.2037.2136.2169.2603 = type { i32, i32, %struct.ScanStateTransition.23.56.89.320.485.848.914.947.980.1046.2034.2133.2166.2600**, [3 x %struct.ScanStateTransition.23.56.89.320.485.848.914.947.980.1046.2034.2133.2166.2600*] } + +; Function Attrs: nounwind +declare noalias i8* @malloc() #0 + +; Function Attrs: nounwind uwtable +define void @build_eq() #1 { +entry: + %call = tail call noalias i8* @malloc() #2 + %0 = bitcast i8* %call to %struct.EqState.41.74.107.338.503.866.932.965.998.1064.2052.2151.2184.2606* + br label %for.cond.preheader + +for.cond.preheader: ; preds = %for.cond.preheader, %entry + br i1 undef, label %for.cond.316.preheader, label %for.cond.preheader + +for.cond.316.preheader: ; preds = %for.cond.preheader + br i1 undef, label %for.cond.400.preheader, label %for.body.321 + +for.cond.400.preheader: ; preds = %for.inc.397, %for.cond.316.preheader + br i1 undef, label %for.end.423, label %for.body.405 + +for.body.321: ; preds = %for.inc.397, %for.cond.316.preheader + %eq329 = getelementptr inbounds %struct.EqState.41.74.107.338.503.866.932.965.998.1064.2052.2151.2184.2606, %struct.EqState.41.74.107.338.503.866.932.965.998.1064.2052.2151.2184.2606* %0, i64 0, i32 0 + br i1 undef, label %for.inc.397, label %land.lhs.true.331 + +land.lhs.true.331: ; preds = %for.body.321 + br i1 undef, label %for.inc.397, label %if.then.334 + +if.then.334: ; preds = %land.lhs.true.331 + %1 = load %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605*, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605** %eq329, align 8 + %2 = load %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588*, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588** undef, align 8 + br i1 undef, label %for.inc.397, label %land.lhs.true.369 + +land.lhs.true.369: ; preds = %if.then.334 + %n380 = getelementptr inbounds %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588, %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588* %2, i64 0, i32 6, i32 0 + %3 = load i32, i32* %n380, align 8 + %cmp381 = icmp eq i32 %3, 2 + br i1 %cmp381, label %if.then.383, label %for.inc.397 + +if.then.383: ; preds = %land.lhs.true.369 + %reduces_to385 = getelementptr inbounds %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605* %1, i64 0, i32 14 + store %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605* undef, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605** %reduces_to385, align 8 + %diff_rule386 = getelementptr inbounds %struct.EqState.41.74.107.338.503.866.932.965.998.1064.2052.2151.2184.2606, %struct.EqState.41.74.107.338.503.866.932.965.998.1064.2052.2151.2184.2606* %0, i64 0, i32 1 + %4 = bitcast %struct.Rule.33.66.99.330.495.858.924.957.990.1056.2044.2143.2176.2588** %diff_rule386 to i64* + %5 = load i64, i64* %4, align 8 + %6 = load %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605*, %struct.State.28.61.94.325.490.853.919.952.985.1051.2039.2138.2171.2605** %eq329, align 8 + br label %for.inc.397 + +for.inc.397: ; preds = %if.then.383, %land.lhs.true.369, %if.then.334, %land.lhs.true.331, %for.body.321 + br i1 undef, label %for.body.321, label %for.cond.400.preheader + +for.body.405: ; preds = %for.cond.400.preheader + unreachable + +for.end.423: ; preds = %for.cond.400.preheader + ret void +} Index: polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type.ll +++ polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type.ll @@ -1,5 +1,5 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -polly-codegen -S < %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN ; ; struct { ; int a; Index: polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll +++ polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type_escaping.ll @@ -1,5 +1,5 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -polly-codegen -S < %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN ; ; struct { ; int a; Index: polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll +++ polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer.ll @@ -1,5 +1,5 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -polly-codegen -S < %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN ; ; int U; ; void f(int *A) { Index: polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll +++ polly/trunk/test/ScopInfo/invariant_load_access_classes_different_base_type_same_pointer_escaping.ll @@ -0,0 +1,84 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN +; +; int U; +; int f(int *A) { +; int i = 0, x, y; +; do { +; x = (*(int *)&U); +; y = (int)(*(float *)&U); +; A[i] = x + y; +; } while (i++ < 100); +; return x + y; +; } +; +; CHECK: Invariant Accesses: { +; CHECK-NOT: ReadAccess +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_do_body[i0] -> MemRef_U[0] }; +; CHECK: Execution Context: { : } +; CHECK-NOT: ReadAccess +; CHECK: } +; +; CHECK: Statements { +; CHECK: Stmt_do_body +; CHECK: Domain := +; CHECK: { Stmt_do_body[i0] : i0 <= 100 and i0 >= 0 }; +; CHECK: Schedule := +; CHECK: { Stmt_do_body[i0] -> [i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_do_body[i0] -> MemRef_A[i0] }; +; CHECK: } +; +; CODEGEN: entry: +; CODEGEN: %U.f.preload.s2a = alloca float +; CODEGEN: br label %polly.split_new_and_old +; +; CODEGEN: polly.preload.begin: +; CODEGEN: %U.load = load i32, i32* @U +; CODEGEN: %0 = bitcast i32 %U.load to float +; CODEGEN: %1 = bitcast float %0 to i32 +; CODEGEN: store float %0, float* %U.f.preload.s2a +; +; CODEGEN: polly.merge_new_and_old: +; CODEGEN-DAG: %U.f.merge = phi float [ %U.f.final_reload, %polly.loop_exit ], [ %U.f, %do.cond ] +; CODEGEN-DAG: %U.i.merge = phi i32 [ %7, %polly.loop_exit ], [ %U.i, %do.cond ] +; +; CODEGEN: polly.loop_exit: +; CODEGEN-DAG: %U.f.final_reload = load float, float* %U.f.preload.s2a +; CODEGEN-DAG: %U.i.final_reload = load float, float* %U.f.preload.s2a +; CODEGEN-DAG: %7 = bitcast float %U.i.final_reload to i32 +; +; CODEGEN: polly.stmt.do.body: +; CODEGEN: %p_conv = fptosi float %0 to i32 +; CODEGEN: %p_add = add nsw i32 %1, %p_conv +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@U = common global i32 0, align 4 + +define i32 @f(i32* %A) { +entry: + br label %do.body + +do.body: ; preds = %do.cond, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %entry ] + %U.i = load i32, i32* @U, align 4 + %U.cast = bitcast i32 *@U to float* + %U.f = load float, float* %U.cast, align 4 + %conv = fptosi float %U.f to i32 + %add = add nsw i32 %U.i, %conv + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %add, i32* %arrayidx, align 4 + br label %do.cond + +do.cond: ; preds = %do.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 101 + br i1 %exitcond, label %do.body, label %do.end + +do.end: ; preds = %do.cond + %conv2 = fptosi float %U.f to i32 + %add2 = add nsw i32 %U.i, %conv2 + ret i32 %add2 +} Index: polly/trunk/test/ScopInfo/invariant_load_zext_parameter-2.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_zext_parameter-2.ll +++ polly/trunk/test/ScopInfo/invariant_load_zext_parameter-2.ll @@ -0,0 +1,112 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; Stress test for the code generation of invariant accesses. +; +; void f(int *I0, int *I1, int *I2, int *V, long p0, long p1, long p2, long p3) { +; *V = *I1; +; for (int i = 0; i < 1000; i++) { +; long n0 = p0 * *I1 + p1 * *I1; +; V[i] = I0[n0]; +; long m0 = p0 * (I2[0]); +; long m1 = p1 * (I2[1]); +; long m2 = p2 * (I2[2]); +; long m3 = p3 * (I2[3]); +; int j = 0; +; do { +; if (j > 0) { +; V[i] += I1[m0 + m2]; +; V[i] += I1[n0]; +; } +; } while (j++ < m1 + m3 * n0); +; } +; } +; +; CHECK: p0: ((sext i32 %tmp6 to i64) * %p1) +; CHECK: p1: ((sext i32 %tmp8 to i64) * (sext i32 %tmp3 to i64) * (%p0 + %p1) * %p3) +; CHECK: p2: ((sext i32 %tmp3 to i64) * (%p0 + %p1)) +; CHECK: p3: ((sext i32 %tmp5 to i64) * %p0) +; CHECK: p4: ((sext i32 %tmp7 to i64) * %p2) +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %I0, i32* %I1, i32* %I2, i32* %V, i64 %p0, i64 %p1, i64 %p2, i64 %p3) { +entry: + %tmp = load i32, i32* %I1, align 4 + store i32 %tmp, i32* %V, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv1, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = load i32, i32* %I1, align 4 + %conv = sext i32 %tmp3 to i64 + %mul = mul nsw i64 %conv, %p0 + %conv1 = sext i32 %tmp3 to i64 + %mul2 = mul nsw i64 %conv1, %p1 + %add = add nsw i64 %mul, %mul2 + %arrayidx = getelementptr inbounds i32, i32* %I0, i64 %add + %tmp4 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %V, i64 %indvars.iv1 + store i32 %tmp4, i32* %arrayidx3, align 4 + %tmp5 = load i32, i32* %I2, align 4 + %conv5 = sext i32 %tmp5 to i64 + %mul6 = mul nsw i64 %conv5, %p0 + %arrayidx7 = getelementptr inbounds i32, i32* %I2, i64 1 + %tmp6 = load i32, i32* %arrayidx7, align 4 + %conv8 = sext i32 %tmp6 to i64 + %mul9 = mul nsw i64 %conv8, %p1 + %arrayidx10 = getelementptr inbounds i32, i32* %I2, i64 2 + %tmp7 = load i32, i32* %arrayidx10, align 4 + %conv11 = sext i32 %tmp7 to i64 + %mul12 = mul nsw i64 %conv11, %p2 + %arrayidx13 = getelementptr inbounds i32, i32* %I2, i64 3 + %tmp8 = load i32, i32* %arrayidx13, align 4 + %conv14 = sext i32 %tmp8 to i64 + %mul15 = mul nsw i64 %conv14, %p3 + br label %do.body + +do.body: ; preds = %do.cond, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %for.body ] + %cmp16 = icmp sgt i64 %indvars.iv, 0 + br i1 %cmp16, label %if.then, label %if.end + +if.then: ; preds = %do.body + %add18 = add nsw i64 %mul6, %mul12 + %arrayidx19 = getelementptr inbounds i32, i32* %I1, i64 %add18 + %tmp9 = load i32, i32* %arrayidx19, align 4 + %arrayidx21 = getelementptr inbounds i32, i32* %V, i64 %indvars.iv1 + %tmp10 = load i32, i32* %arrayidx21, align 4 + %add22 = add nsw i32 %tmp10, %tmp9 + store i32 %add22, i32* %arrayidx21, align 4 + %arrayidx23 = getelementptr inbounds i32, i32* %I1, i64 %add + %tmp11 = load i32, i32* %arrayidx23, align 4 + %arrayidx25 = getelementptr inbounds i32, i32* %V, i64 %indvars.iv1 + %tmp12 = load i32, i32* %arrayidx25, align 4 + %add26 = add nsw i32 %tmp12, %tmp11 + store i32 %add26, i32* %arrayidx25, align 4 + br label %if.end + +if.end: ; preds = %if.then, %do.body + br label %do.cond + +do.cond: ; preds = %if.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %mul28 = mul nsw i64 %mul15, %add + %add29 = add nsw i64 %mul9, %mul28 + %cmp30 = icmp slt i64 %indvars.iv, %add29 + br i1 %cmp30, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %for.inc + +for.inc: ; preds = %do.end + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: polly/trunk/test/ScopInfo/invariant_load_zext_parameter.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_zext_parameter.ll +++ polly/trunk/test/ScopInfo/invariant_load_zext_parameter.ll @@ -0,0 +1,67 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN +; +; void f(int *I0, int *I1, int *V) { +; for (int i = 0; i < 1000; i++) { +; if ((long)(*I0) == 0) +; V[i] += *I1; +; } +; } +; +; Check that even though the invariant load is some subpart of a parameter we +; will generate valid code and replace it by the preloaded value, e.g., to evaluate +; the execution context of the invariant access to I1. +; +; CHECK: p0: (zext i32 %loadI0 to i64) +; +; CODEGEN: polly.preload.begin: +; CODEGEN-NEXT: %polly.access.I0 = getelementptr i32, i32* %I0, i64 0 +; CODEGEN-NEXT: %polly.access.I0.load = load i32, i32* %polly.access.I0 +; CODEGEN-NEXT: store i32 %polly.access.I0.load, i32* %loadI0.preload.s2a +; CODEGEN-NEXT: %0 = zext i32 %polly.access.I0.load to i64 +; CODEGEN-NEXT: %1 = icmp eq i64 %0, 0 +; CODEGEN-NEXT: br label %polly.preload.cond +; +; CODEGEN: polly.preload.cond: +; CODEGEN-NEXT: br i1 %1, label %polly.preload.exec, label %polly.preload.merge +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %I0, i32* %I1, i32* %V) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %loadI1a = load i32, i32* %I0, align 4 + %arrayidx = getelementptr inbounds i32, i32* %V, i64 %indvars.iv + %loadI1a1 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %loadI1a1, %loadI1a + store i32 %add, i32* %arrayidx, align 4 + %loadI0 = load i32, i32* %I0, align 4 + %loadI0ext = zext i32 %loadI0 to i64 + %cmp1 = icmp eq i64 %loadI0ext, 0 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %loadI1b = load i32, i32* %I1, align 4 + %arrayidx4 = getelementptr inbounds i32, i32* %V, i64 %indvars.iv + %loadI1a4 = load i32, i32* %arrayidx4, align 4 + %add5 = add nsw i32 %loadI1a4, %loadI1b + store i32 %add5, i32* %arrayidx4, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: polly/trunk/test/ScopInfo/invariant_loads_complicated_dependences.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_loads_complicated_dependences.ll +++ polly/trunk/test/ScopInfo/invariant_loads_complicated_dependences.ll @@ -2,10 +2,7 @@ ; ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [LB, UB] -> { Stmt_for_body[i0] -> MemRef_LB[0] }; -; CHECK-NEXT: Execution Context: [LB, UB] -> { : } -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [LB, UB] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] }; +; CHECK-NEXT: [LB, UB] -> { Stmt_for_body[i0] -> MemRef_LBptr[0] }; ; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [LB, UB] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] }; @@ -13,6 +10,9 @@ ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [LB, UB] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] }; ; CHECK-NEXT: Execution Context: [LB, UB] -> { : LB <= 5 } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [LB, UB] -> { Stmt_do_cond[i0, i1] -> MemRef_UBptr[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: } ; ; void f(int *restrict A, int *restrict V, int *restrict U, int *restrict UB, @@ -30,7 +30,7 @@ ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -define void @f(i32* noalias %A, i32* noalias %V, i32* noalias %U, i32* noalias %UB, i32* noalias %LB) { +define void @f(i32* noalias %A, i32* noalias %V, i32* noalias %U, i32* noalias %UBptr, i32* noalias %LBptr) { entry: br label %for.cond @@ -40,11 +40,11 @@ br i1 %exitcond, label %for.body, label %for.end for.body: ; preds = %for.cond - %tmp = load i32, i32* %LB, align 4 + %LB = load i32, i32* %LBptr, align 4 br label %do.body do.body: ; preds = %do.cond, %for.body - %j.0 = phi i32 [ %tmp, %for.body ], [ %inc, %do.cond ] + %j.0 = phi i32 [ %LB, %for.body ], [ %inc, %do.cond ] %cmp1 = icmp sgt i32 %j.0, 5 br i1 %cmp1, label %if.then, label %if.else @@ -69,8 +69,8 @@ do.cond: ; preds = %if.end %inc = add nsw i32 %j.0, 1 - %tmp5 = load i32, i32* %UB, align 4 - %cmp5 = icmp slt i32 %j.0, %tmp5 + %UB = load i32, i32* %UBptr, align 4 + %cmp5 = icmp slt i32 %j.0, %UB br i1 %cmp5, label %do.body, label %do.end do.end: ; preds = %do.cond Index: polly/trunk/test/ScopInfo/invariant_loop_bounds.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_loop_bounds.ll +++ polly/trunk/test/ScopInfo/invariant_loop_bounds.ll @@ -3,28 +3,28 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[2] -; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : } +; CHECK-NEXT: Execution Context: [bounds2, bounds1, bounds0] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_0 >= 1 } +; CHECK-NEXT: Execution Context: [bounds2, bounds1, bounds0] -> { : bounds2 >= 1 } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_1 >= 1 and p_0 >= 1 } +; CHECK-NEXT: Execution Context: [bounds2, bounds1, bounds0] -> { : bounds1 >= 1 and bounds2 >= 1 } ; CHECK-NEXT: } ; -; CHECK: p0: (8 + @bounds) -; CHECK: p1: (4 + @bounds) -; CHECK: p2: @bounds +; CHECK: p0: %bounds2 +; CHECK: p1: %bounds1 +; CHECK: p2: %bounds0 ; CHECK: Statements { ; CHECK: Stmt_for_body_6 ; CHECK: Domain := -; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + p_0 and i1 >= 0 and i1 <= -1 + p_1 and i2 >= 0 and i2 <= -1 + bounds }; +; CHECK: [bounds2, bounds1, bounds0] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + bounds2 and i1 >= 0 and i1 <= -1 + bounds1 and i2 >= 0 and i2 <= -1 + bounds0 }; ; CHECK: Schedule := -; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: [bounds2, bounds1, bounds0] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [bounds2, bounds1, bounds0] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [bounds2, bounds1, bounds0] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: } ; ; int bounds[3]; @@ -49,8 +49,8 @@ for.cond: ; preds = %for.inc.16, %entry %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] - %tmp = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 2), align 4 - %tmp7 = sext i32 %tmp to i64 + %bounds2 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 2), align 4 + %tmp7 = sext i32 %bounds2 to i64 %cmp = icmp slt i64 %indvars.iv5, %tmp7 br i1 %cmp, label %for.body, label %for.end.18 @@ -59,8 +59,8 @@ for.cond.1: ; preds = %for.inc.13, %for.body %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] - %tmp8 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 1), align 4 - %tmp9 = sext i32 %tmp8 to i64 + %bounds1 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 1), align 4 + %tmp9 = sext i32 %bounds1 to i64 %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 br i1 %cmp2, label %for.body.3, label %for.end.15 @@ -69,8 +69,8 @@ for.cond.4: ; preds = %for.inc, %for.body.3 %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] - %tmp10 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 0), align 4 - %tmp11 = sext i32 %tmp10 to i64 + %bounds0 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp11 = sext i32 %bounds0 to i64 %cmp5 = icmp slt i64 %indvars.iv, %tmp11 br i1 %cmp5, label %for.body.6, label %for.end Index: polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll +++ polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll @@ -8,21 +8,21 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [bounds] -> { : } +; CHECK-NEXT: Execution Context: [bounds0l0] -> { : } ; CHECK-NEXT: } ; -; CHECK: p0: @bounds +; CHECK: p0: %bounds0l0 ; CHECK-NOT: p1 ; CHECK: Statements { ; CHECK: Stmt_for_body_6 ; CHECK: Domain := -; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds }; +; CHECK: [bounds0l0] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + bounds0l0 and i1 >= 0 and i1 <= -1 + bounds0l0 and i2 >= 0 and i2 <= -1 + bounds0l0 }; ; CHECK: Schedule := -; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: [bounds0l0] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [bounds0l0] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [bounds0l0] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: } ; ; int bounds[1]; @@ -47,8 +47,8 @@ for.cond: ; preds = %for.inc.16, %entry %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] - %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 - %tmp7 = sext i32 %tmp to i64 + %bounds0l0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp7 = sext i32 %bounds0l0 to i64 %cmp = icmp slt i64 %indvars.iv5, %tmp7 br i1 %cmp, label %for.body, label %for.end.18 @@ -57,8 +57,8 @@ for.cond.1: ; preds = %for.inc.13, %for.body %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] - %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 - %tmp9 = sext i32 %tmp8 to i64 + %bounds0l1 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp9 = sext i32 %bounds0l1 to i64 %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 br i1 %cmp2, label %for.body.3, label %for.end.15 @@ -67,8 +67,8 @@ for.cond.4: ; preds = %for.inc, %for.body.3 %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] - %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 - %tmp11 = sext i32 %tmp10 to i64 + %bounds0l2 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp11 = sext i32 %bounds0l2 to i64 %cmp5 = icmp slt i64 %indvars.iv, %tmp11 br i1 %cmp5, label %for.body.6, label %for.end Index: polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll +++ polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll @@ -8,22 +8,22 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [bounds, p] -> { : } +; CHECK-NEXT: Execution Context: [bounds0l0, p] -> { : } ; CHECK-NEXT: } ; -; CHECK: p0: @bounds +; CHECK: p0: %bounds0l0 ; CHECK: p1: %p ; CHECK-NOT: p2: ; CHECK: Statements { ; CHECK: Stmt_for_body_6 ; CHECK: Domain := -; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] : p = 0 and i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds }; +; CHECK: [bounds0l0, p] -> { Stmt_for_body_6[i0, i1, i2] : p = 0 and i0 >= 0 and i0 <= -1 + bounds0l0 and i1 >= 0 and i1 <= -1 + bounds0l0 and i2 >= 0 and i2 <= -1 + bounds0l0 }; ; CHECK: Schedule := -; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: [bounds0l0, p] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [bounds0l0, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [bounds0l0, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: } ; ; int bounds[1]; @@ -49,8 +49,8 @@ for.cond: ; preds = %for.inc.16, %entry %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] - %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 - %tmp7 = sext i32 %tmp to i64 + %bounds0l0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp7 = sext i32 %bounds0l0 to i64 %cmp = icmp slt i64 %indvars.iv5, %tmp7 br i1 %cmp, label %for.body, label %for.end.18 @@ -60,8 +60,8 @@ for.cond.1: ; preds = %for.inc.13, %for.body %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] - %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 - %tmp9 = sext i32 %tmp8 to i64 + %bounds0l1 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp9 = sext i32 %bounds0l1 to i64 %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 br i1 %cmp2, label %for.body.3, label %for.end.15 @@ -70,8 +70,8 @@ for.cond.4: ; preds = %for.inc, %for.body.3 %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] - %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 - %tmp11 = sext i32 %tmp10 to i64 + %bounds0l2 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp11 = sext i32 %bounds0l2 to i64 %cmp5 = icmp slt i64 %indvars.iv, %tmp11 br i1 %cmp5, label %for.body.6, label %for.end Index: polly/trunk/test/ScopInfo/multidim_parameter_addrec_product.ll =================================================================== --- polly/trunk/test/ScopInfo/multidim_parameter_addrec_product.ll +++ polly/trunk/test/ScopInfo/multidim_parameter_addrec_product.ll @@ -8,14 +8,14 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [p] -> { Stmt_bb5[i0, i1] -> MemRef_p[0] }; -; CHECK-NEXT: Execution Context: [p] -> { : } +; CHECK-NEXT: [pval] -> { Stmt_bb5[i0, i1] -> MemRef_p[0] }; +; CHECK-NEXT: Execution Context: [pval] -> { : } ; CHECK-NEXT: } ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [p] -> { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] }; +; CHECK: [pval] -> { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [p] -> { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] }; +; CHECK: [pval] -> { Stmt_bb5[i0, i1] -> MemRef_A[i0, i1] }; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/ScopInfo/required-invariant-loop-bounds.ll =================================================================== --- polly/trunk/test/ScopInfo/required-invariant-loop-bounds.ll +++ polly/trunk/test/ScopInfo/required-invariant-loop-bounds.ll @@ -3,10 +3,10 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : } +; CHECK-NEXT: Execution Context: [bounds0, bounds1] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : bounds >= 0 } +; CHECK-NEXT: Execution Context: [bounds0, bounds1] -> { : bounds0 >= 0 } ; CHECK: } ; double A[1000][1000]; @@ -30,8 +30,8 @@ for.cond: ; preds = %for.inc.6, %entry %i.0 = phi i64 [ 0, %entry ], [ %inc7, %for.inc.6 ] - %tmp = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @bounds, i64 0, i64 0), align 16 - %cmp = icmp sgt i64 %i.0, %tmp + %bounds0 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @bounds, i64 0, i64 0), align 16 + %cmp = icmp sgt i64 %i.0, %bounds0 br i1 %cmp, label %for.end.8, label %for.body for.body: ; preds = %for.cond @@ -39,8 +39,8 @@ for.cond.1: ; preds = %for.inc, %for.body %j.0 = phi i64 [ 0, %for.body ], [ %inc, %for.inc ] - %tmp1 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @bounds, i64 0, i64 1), align 8 - %cmp2 = icmp sgt i64 %j.0, %tmp1 + %bounds1 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @bounds, i64 0, i64 1), align 8 + %cmp2 = icmp sgt i64 %j.0, %bounds1 br i1 %cmp2, label %for.end, label %for.body.3 for.body.3: ; preds = %for.cond.1