Index: polly/trunk/include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- polly/trunk/include/polly/CodeGen/IslNodeBuilder.h +++ polly/trunk/include/polly/CodeGen/IslNodeBuilder.h @@ -42,6 +42,9 @@ void addParameters(__isl_take isl_set *Context); void create(__isl_take isl_ast_node *Node); + /// @brief Preload all memory loads that are invariant. + void preloadInvariantLoads(); + /// @brief Finalize code generation for the SCoP @p S. /// /// @see BlockGenerator::finalizeSCoP(Scop &S) @@ -190,6 +193,21 @@ /// @param Mark The node we generate code for. virtual void createMark(__isl_take isl_ast_node *Marker); virtual void createFor(__isl_take isl_ast_node *For); + + /// @brief Preload the memory load access @p MA. + /// + /// If @p MA is not always executed it will be conditionally loaded and + /// merged with undef from the same type. Hence, if @p MA is executed only + /// under condition C then the preload code will look like this: + /// + /// MA_preload = undef; + /// if (C) + /// MA_preload = load MA; + /// use MA_preload + Value *preloadInvariantLoad(const MemoryAccess &MA, + __isl_take isl_set *Domain, + __isl_keep isl_ast_build *Build); + void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -127,6 +127,9 @@ /// @brief Destructor to free the isl id of the base pointer. ~ScopArrayInfo(); + /// @brief Set the base pointer to @p BP. + void setBasePtr(Value *BP) { BasePtr = BP; } + /// @brief Return the base pointer. Value *getBasePtr() const { return BasePtr; } @@ -690,6 +693,15 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, MemoryAccess::ReductionType RT); +/// @brief Ordered list type to hold accesses. +using MemoryAccessList = std::forward_list; + +/// @brief Type for invariant memory accesses and their domain context. +using InvariantAccessTy = std::pair; + +/// @brief Type for multiple invariant memory accesses and their domain context. +using InvariantAccessesTy = SmallVector; + ///===----------------------------------------------------------------------===// /// @brief Statement of the Scop /// @@ -700,9 +712,6 @@ /// At the moment every statement represents a single basic block of LLVM-IR. class ScopStmt { public: - /// @brief List to hold all (scalar) memory accesses mapped to an instruction. - using MemoryAccessList = std::forward_list; - ScopStmt(const ScopStmt &) = delete; const ScopStmt &operator=(const ScopStmt &) = delete; @@ -880,6 +889,9 @@ /// @brief Return true if this statement represents a whole region. bool isRegionStmt() const { return R != nullptr; } + /// @brief Return true if this statement does not contain any accesses. + bool isEmpty() const { return MemAccs.empty(); } + /// @brief Return the (scalar) memory accesses for @p Inst. const MemoryAccessList &getAccessesFor(const Instruction *Inst) const { MemoryAccessList *MAL = lookupAccessesFor(Inst); @@ -913,6 +925,13 @@ BB = Block; } + /// @brief Move the memory access in @p InvMAs to @p TargetList. + /// + /// Note that scalar accesses that are caused by any access in @p InvMAs will + /// be eliminated too. + void hoistMemoryAccesses(MemoryAccessList &InvMAs, + InvariantAccessesTy &TargetList); + typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; @@ -1023,7 +1042,7 @@ /// Max loop depth. unsigned MaxLoopDepth; - typedef std::deque StmtSet; + typedef std::list StmtSet; /// The statements in this Scop. StmtSet Stmts; @@ -1130,6 +1149,9 @@ /// group to ensure the SCoP is executed in an alias free environment. MinMaxVectorPairVectorTy MinMaxAliasGroups; + /// @brief List of invariant accesses. + InvariantAccessesTy InvariantAccesses; + /// @brief Scop constructor; invoked from ScopInfo::buildScop. Scop(Region &R, AccFuncMapType &AccFuncMap, ScalarEvolution &SE, DominatorTree &DT, isl_ctx *ctx, unsigned MaxLoopDepth); @@ -1183,6 +1205,15 @@ /// @brief Add parameter constraints to @p C that imply a non-empty domain. __isl_give isl_set *addNonEmptyDomainConstraints(__isl_take isl_set *C) const; + /// @brief Simplify the SCoP representation + /// + /// At the moment we perform the following simplifications: + /// - removal of empty statements (due to invariant load hoisting) + void simplifySCoP(); + + /// @brief Hoist all invariant memory loads. + void hoistInvariantLoads(); + /// @brief Build the Context of the Scop. void buildContext(); @@ -1313,6 +1344,11 @@ /// @return The maximum depth of the loop. inline unsigned getMaxLoopDepth() const { return MaxLoopDepth; } + /// @brief Return the set of invariant accesses. + const InvariantAccessesTy &getInvariantAccesses() const { + return InvariantAccesses; + } + /// @brief Mark the SCoP as optimized by the scheduler. void markAsOptimized() { IsOptimized = true; } Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -1350,6 +1350,46 @@ void ScopStmt::dump() const { print(dbgs()); } +void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, + InvariantAccessesTy &TargetList) { + + // Remove all memory accesses in @p InvMAs from this statement together + // with all scalar accesses that were caused by them. The tricky iteration + // order uses is needed because the MemAccs is a vector and the order in + // which the accesses of each memory access list (MAL) are stored in this + // vector is reversed. + for (MemoryAccess *MA : InvMAs) { + auto &MAL = *lookupAccessesFor(MA->getAccessInstruction()); + MAL.reverse(); + + auto MALIt = MAL.begin(); + auto MALEnd = MAL.end(); + auto MemAccsIt = MemAccs.begin(); + while (MALIt != MALEnd) { + while (*MemAccsIt != *MALIt) + MemAccsIt++; + + MALIt++; + MemAccs.erase(MemAccsIt); + } + + InstructionToAccess.erase(MA->getAccessInstruction()); + delete &MAL; + } + + // Get the context under which this statement, hence the memory accesses, are + // executed. + isl_set *DomainCtx = isl_set_params(getDomain()); + DomainCtx = isl_set_remove_redundancies(DomainCtx); + DomainCtx = isl_set_detect_equalities(DomainCtx); + DomainCtx = isl_set_coalesce(DomainCtx); + + for (MemoryAccess *MA : InvMAs) + TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx))); + + isl_set_free(DomainCtx); +} + //===----------------------------------------------------------------------===// /// Scop class implement @@ -2268,6 +2308,9 @@ buildBoundaryContext(); simplifyContexts(); buildAliasChecks(AA); + + hoistInvariantLoads(); + simplifySCoP(); } Scop::~Scop() { @@ -2290,6 +2333,9 @@ isl_pw_multi_aff_free(MMA.second); } } + + for (const auto &IA : InvariantAccesses) + isl_set_free(IA.second); } void Scop::updateAccessDimensionality() { @@ -2298,6 +2344,81 @@ Access->updateDimensionality(); } +void Scop::simplifySCoP() { + + for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) { + ScopStmt &Stmt = *StmtIt; + + if (!StmtIt->isEmpty()) { + StmtIt++; + continue; + } + + if (Stmt.isRegionStmt()) + for (BasicBlock *BB : Stmt.getRegion()->blocks()) + StmtMap.erase(BB); + else + StmtMap.erase(Stmt.getBasicBlock()); + + StmtIt = Stmts.erase(StmtIt); + } +} + +void Scop::hoistInvariantLoads() { + isl_union_map *Writes = getWrites(); + for (ScopStmt &Stmt : *this) { + + // TODO: Loads that are not loop carried, hence are in a statement with + // zero iterators, are by construction invariant, though we + // currently "hoist" them anyway. + + isl_set *Domain = Stmt.getDomain(); + MemoryAccessList InvMAs; + + for (MemoryAccess *MA : Stmt) { + if (MA->isImplicit() || MA->isWrite() || !MA->isAffine()) + continue; + + isl_map *AccessRelation = MA->getAccessRelation(); + if (isl_map_involves_dims(AccessRelation, isl_dim_in, 0, + Stmt.getNumIterators())) { + isl_map_free(AccessRelation); + continue; + } + + AccessRelation = + isl_map_intersect_domain(AccessRelation, isl_set_copy(Domain)); + isl_set *AccessRange = isl_map_range(AccessRelation); + + isl_union_map *Written = isl_union_map_intersect_range( + isl_union_map_copy(Writes), isl_union_set_from_set(AccessRange)); + bool IsWritten = !isl_union_map_is_empty(Written); + isl_union_map_free(Written); + + if (IsWritten) + continue; + + InvMAs.push_front(MA); + } + + // We inserted invariant accesses always in the front but need them to be + // sorted in a "natural order". The statements are already sorted in reverse + // post order and that suffices for the accesses too. The reason we require + // an order in the first place is the dependences between invariant loads + // that can be caused by indirect loads. + InvMAs.reverse(); + + // Transfer the memory access from the statement to the SCoP. + Stmt.hoistMemoryAccesses(InvMAs, InvariantAccesses); + + isl_set_free(Domain); + } + isl_union_map_free(Writes); + + if (!InvariantAccesses.empty()) + IsOptimized = true; +} + const ScopArrayInfo * Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType, ArrayRef Sizes, bool IsPHI) { @@ -2478,6 +2599,12 @@ << "\n"; OS.indent(4) << "Region: " << getNameStr() << "\n"; OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; + OS.indent(4) << "Invariant Accesses: {\n"; + for (const auto &IA : InvariantAccesses) { + IA.first->print(OS); + OS.indent(12) << "Execution Context: " << IA.second << "\n"; + } + OS.indent(4) << "}\n"; printContext(OS.indent(4)); printArrayInfo(OS.indent(4)); printAliasAssumptions(OS); Index: polly/trunk/lib/CodeGen/BlockGenerators.cpp =================================================================== --- polly/trunk/lib/CodeGen/BlockGenerators.cpp +++ polly/trunk/lib/CodeGen/BlockGenerators.cpp @@ -108,6 +108,8 @@ return const_cast(Old); if (Value *New = GlobalMap.lookup(Old)) { + if (Value *NewRemapped = GlobalMap.lookup(New)) + New = NewRemapped; if (Old->getType()->getScalarSizeInBits() < New->getType()->getScalarSizeInBits()) New = Builder.CreateTruncOrBitCast(New, Old->getType()); @@ -226,6 +228,9 @@ Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { + if (Value *PreloadLoad = GlobalMap.lookup(Load)) + return PreloadLoad; + const Value *Pointer = Load->getPointerOperand(); Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, LTS, NewAccesses); @@ -762,6 +767,12 @@ void VectorBlockGenerator::generateLoad( ScopStmt &Stmt, const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { + if (Value *PreloadLoad = GlobalMap.lookup(Load)) { + VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad, + Load->getName() + "_p"); + return; + } + if (!VectorType::isValidElementType(Load->getType())) { for (int i = 0; i < getVectorWidth(); i++) ScalarMaps[i][Load] = Index: polly/trunk/lib/CodeGen/CodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/CodeGeneration.cpp +++ polly/trunk/lib/CodeGen/CodeGeneration.cpp @@ -146,8 +146,9 @@ auto SplitBlock = StartBlock->getSinglePredecessor(); Builder.SetInsertPoint(SplitBlock->getTerminator()); NodeBuilder.addParameters(S.getContext()); + NodeBuilder.preloadInvariantLoads(); Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); - SplitBlock->getTerminator()->setOperand(0, RTC); + Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); Builder.SetInsertPoint(StartBlock->begin()); NodeBuilder.create(AstRoot); Index: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp +++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp @@ -814,6 +814,123 @@ llvm_unreachable("Unknown isl_ast_node type"); } +/// @brief Create the actual preload memory access for @p MA. +static inline Value *createPreloadLoad(Scop &S, const MemoryAccess &MA, + isl_ast_build *Build, + IslExprBuilder &ExprBuilder) { + isl_set *AccessRange = isl_map_range(MA.getAccessRelation()); + isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); + PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext()); + isl_ast_expr *Access = + isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); + return ExprBuilder.create(Access); +} + +Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, + isl_set *Domain, + isl_ast_build *Build) { + + isl_set *Universe = isl_set_universe(isl_set_get_space(Domain)); + bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); + isl_set_free(Universe); + + if (AlwaysExecuted) { + isl_set_free(Domain); + return createPreloadLoad(S, MA, Build, ExprBuilder); + } else { + + isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); + + Value *Cond = ExprBuilder.create(DomainCond); + if (!Cond->getType()->isIntegerTy(1)) + Cond = Builder.CreateIsNotNull(Cond); + + BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), + Builder.GetInsertPoint(), &DT, &LI); + CondBB->setName("polly.preload.cond"); + + BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), &DT, &LI); + MergeBB->setName("polly.preload.merge"); + + Function *F = Builder.GetInsertBlock()->getParent(); + LLVMContext &Context = F->getContext(); + BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F); + + DT.addNewBlock(ExecBB, CondBB); + if (Loop *L = LI.getLoopFor(CondBB)) + L->addBasicBlockToLoop(ExecBB, LI); + + auto *CondBBTerminator = CondBB->getTerminator(); + Builder.SetInsertPoint(CondBBTerminator); + Builder.CreateCondBr(Cond, ExecBB, MergeBB); + CondBBTerminator->eraseFromParent(); + + Builder.SetInsertPoint(ExecBB); + Builder.CreateBr(MergeBB); + + Builder.SetInsertPoint(ExecBB->getTerminator()); + Instruction *AccInst = MA.getAccessInstruction(); + Type *AccInstTy = AccInst->getType(); + Value *PreAccInst = createPreloadLoad(S, MA, Build, ExprBuilder); + + Builder.SetInsertPoint(MergeBB->getTerminator()); + auto *MergePHI = Builder.CreatePHI( + AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); + MergePHI->addIncoming(PreAccInst, ExecBB); + MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); + + return MergePHI; + } +} + +void IslNodeBuilder::preloadInvariantLoads() { + + const auto &InvAccList = S.getInvariantAccesses(); + if (InvAccList.empty()) + return; + + const Region &R = S.getRegion(); + + BasicBlock *PreLoadBB = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); + PreLoadBB->setName("polly.preload.begin"); + Builder.SetInsertPoint(PreLoadBB->begin()); + + isl_ast_build *Build = + isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); + + for (const auto &IA : InvAccList) { + MemoryAccess *MA = IA.first; + assert(!MA->isImplicit()); + + isl_set *Domain = isl_set_copy(IA.second); + Instruction *AccInst = MA->getAccessInstruction(); + Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build); + ValueMap[AccInst] = PreloadVal; + + if (SE.isSCEVable(AccInst->getType())) { + isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); + if (ParamId) + IDToValue[ParamId] = PreloadVal; + isl_id_free(ParamId); + } + + SmallVector Users; + for (auto *U : AccInst->users()) + if (Instruction *UI = dyn_cast(U)) + if (!R.contains(UI)) + Users.push_back(UI); + for (auto *U : Users) + U->replaceUsesOfWith(AccInst, PreloadVal); + + auto *SAI = S.getScopArrayInfo(MA->getBaseAddr()); + for (auto *DerivedSAI : SAI->getDerivedSAIs()) + DerivedSAI->setBasePtr(PreloadVal); + } + + isl_ast_build_free(Build); +} + void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { for (unsigned i = 0; i < isl_set_dim(Context, isl_dim_param); ++i) { Index: polly/trunk/test/Isl/CodeGen/aliasing_parametric_simple_2.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/aliasing_parametric_simple_2.ll +++ polly/trunk/test/Isl/CodeGen/aliasing_parametric_simple_2.ll @@ -6,6 +6,7 @@ ; } ; ; CHECK: sext i32 %c to i64 +; CHECK: sext i32 %c to i64 ; CHECK: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 ; CHECK: %[[M1:[._a-zA-Z0-9]*]] = icmp sle i64 %[[M0]], 15 ; CHECK: %[[M2:[._a-zA-Z0-9]*]] = sext i32 %c to i64 @@ -23,7 +24,7 @@ ; CHECK: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]] ; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i32* %[[AMax]], %[[BMin]] ; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] -; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %1, %[[NoAlias]] +; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %3, %[[NoAlias]] ; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/Isl/CodeGen/exprModDiv.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/exprModDiv.ll +++ polly/trunk/test/Isl/CodeGen/exprModDiv.ll @@ -6,7 +6,7 @@ ; ; void exprModDiv(float *A, float *B, float *C, long N, long p) { ; for (long i = 0; i < N; i++) -; C[i] += A[i] + B[i] + A[p] + B[p]; +; C[i] += A[i] + B[i] + A[i] + B[i + p]; ; } ; ; @@ -32,21 +32,21 @@ ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 127 * floord(-p - 1, 127) + 127] -; CHECK: %20 = sub nsw i64 0, %p -; CHECK: %21 = sub nsw i64 %20, 1 -; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 127 +; CHECK: %17 = sub nsw i64 0, %p +; CHECK: %18 = sub nsw i64 %17, 1 +; CHECK: %pexp.fdiv_q.0 = sub i64 %18, 127 ; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1 -; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0 -; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21 +; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %18, 0 +; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %18 ; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127 -; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4 -; CHECK: %23 = add nsw i64 %p, %22 -; CHECK: %24 = add nsw i64 %23, 127 -; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24 +; CHECK: %19 = mul nsw i64 127, %pexp.fdiv_q.4 +; CHECK: %20 = add nsw i64 %p, %19 +; CHECK: %21 = add nsw i64 %20, 127 +; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %21 ; A[p / 127] ; CHECK: %pexp.div = sdiv exact i64 %p, 127 -; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div +; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div ; A[i % 128] ; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 @@ -58,17 +58,17 @@ ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 128 * floord(-p - 1, 128) + 128] -; POW2: %20 = sub nsw i64 0, %p -; POW2: %21 = sub nsw i64 %20, 1 -; POW2: %polly.fdiv_q.shr = ashr i64 %21, 7 -; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr -; POW2: %23 = add nsw i64 %p, %22 -; POW2: %24 = add nsw i64 %23, 128 -; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24 +; POW2: %17 = sub nsw i64 0, %p +; POW2: %18 = sub nsw i64 %17, 1 +; POW2: %polly.fdiv_q.shr = ashr i64 %18, 7 +; POW2: %19 = mul nsw i64 128, %polly.fdiv_q.shr +; POW2: %20 = add nsw i64 %p, %19 +; POW2: %21 = add nsw i64 %20, 128 +; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %21 ; A[p / 128] ; POW2: %pexp.div = sdiv exact i64 %p, 128 -; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div +; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -87,10 +87,11 @@ %arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0 %tmp1 = load float, float* %arrayidx1, align 4 %add = fadd float %tmp, %tmp1 - %arrayidx2 = getelementptr inbounds float, float* %A, i64 %p + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0 %tmp2 = load float, float* %arrayidx2, align 4 %add3 = fadd float %add, %tmp2 - %arrayidx4 = getelementptr inbounds float, float* %B, i64 %p + %padd = add nsw i64 %p, %i.0 + %arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd %tmp3 = load float, float* %arrayidx4, align 4 %add5 = fadd float %add3, %tmp3 %arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0 Index: polly/trunk/test/Isl/CodeGen/invariant_load.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/invariant_load.ll +++ polly/trunk/test/Isl/CodeGen/invariant_load.ll @@ -0,0 +1,39 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -S < %s | FileCheck %s +; +; CHECK-LABEL: polly.preload.begin: +; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0 +; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B +; +; CHECK-LABEL: polly.stmt.bb2: +; CHECK-NEXT: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar +; CHECK-NEXT: store i32 %polly.access.B.load, i32* %scevgep, align 4 +; +; void f(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i] = *B; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %B) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %B, align 4 + %tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} Index: polly/trunk/test/Isl/CodeGen/non-affine-phi-node-expansion.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/non-affine-phi-node-expansion.ll +++ polly/trunk/test/Isl/CodeGen/non-affine-phi-node-expansion.ll @@ -4,6 +4,11 @@ %struct.wombat = type {[4 x i32]} +; CHECK: polly.preload.begin: +; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0 +; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B +; CHECK-NOT: %polly.access.B.load = load i32, i32* %polly.access.B + ; CHECK: polly.stmt.bb3.entry: ; preds = %polly.start ; CHECK: br label %polly.stmt.bb3 @@ -14,8 +19,7 @@ ; CHECK: br label %polly.stmt.bb13.exit ; CHECK: polly.stmt.bb5: ; preds = %polly.stmt.bb3 -; CHECK: %tmp7_p_scalar_ = load i32, i32* %B, !alias.scope !0, !noalias !2 -; CHECK: store i32 %tmp7_p_scalar_, i32* %polly.access.cast.arg1, !alias.scope !3, !noalias !4 +; CHECK: store i32 %polly.access.B.load, i32* %polly.access.cast.arg2 ; CHECK: br label %polly.stmt.bb13.exit ; Function Attrs: nounwind uwtable Index: polly/trunk/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll +++ polly/trunk/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll @@ -1,62 +0,0 @@ -; RUN: opt %loadPolly -disable-basicaa -polly-detect-unprofitable -polly-codegen -polly-no-early-exit -S < %s | FileCheck %s -; -; This caused an lnt crash at some point, just verify it will run through and -; produce the PHI node in the exit we are looking for. -; -; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %.merge = phi %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* [ %.final_reload, %polly.stmt.for.end.298 ], [ %13, %for.end.298 ] -; -%struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823*, %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 } -%struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823 = type { i32, i32, [100 x %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*], i32, float, float, float } -%struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822 = type { i32, i32, i32, i32, i32, i32, %struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818*, %struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820*, %struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] } -%struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818 = type { %struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816*, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 } -%struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816 = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 } -%struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 } -%struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820 = type { [3 x [11 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [9 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [10 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [6 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819] } -%struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819 = type { i16, i8, i64 } -%struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821 = type { [2 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]] } -%struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824 = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 } -%struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825 = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825* } - -@img = external global %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, align 8 - -; Function Attrs: nounwind uwtable -define void @intrapred_luma() #0 { -entry: - %PredPel = alloca [13 x i16], align 16 - br label %for.body - -for.body: ; preds = %for.body, %entry - br i1 undef, label %for.body, label %for.body.262 - -for.body.262: ; preds = %for.body - %0 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8 - br label %for.body.280 - -for.body.280: ; preds = %for.body.280, %for.body.262 - %indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ] - %arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1 - %arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66 - %1 = load i16, i16* %arrayidx283, align 2 - %arrayidx289 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66 - store i16 %1, i16* %arrayidx289, align 2 - %indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1 - br i1 false, label %for.body.280, label %for.end.298 - -for.end.298: ; preds = %for.body.280 - %2 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8 - br label %for.body.310 - -for.body.310: ; preds = %for.body.310, %for.end.298 - %indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ] - %arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 9 - %arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv - %3 = load i16, i16* %arrayidx313, align 2 - %arrayidx322 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1 - store i16 %3, i16* %arrayidx322, align 2 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br i1 false, label %for.body.310, label %for.end.328 - -for.end.328: ; preds = %for.body.310 - ret void -} Index: polly/trunk/test/Isl/CodeGen/simple_vec_call.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/simple_vec_call.ll +++ polly/trunk/test/Isl/CodeGen/simple_vec_call.ll @@ -24,16 +24,10 @@ ret void } -; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0 -; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1 -; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2 -; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3 -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %0) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %1) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %2) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %3) [[NUW]] +; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW:#[0-9]+]] +; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] +; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] +; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] ; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0 ; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1 ; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2 Index: polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll +++ polly/trunk/test/Isl/CodeGen/simple_vec_call_2.ll @@ -24,19 +24,13 @@ ret void } -; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0 -; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1 -; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2 -; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3 -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) [[NUW]] -; CHECK: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0 -; CHECK: %5 = insertelement <4 x float**> %4, float** %p_result1, i32 1 -; CHECK: %6 = insertelement <4 x float**> %5, float** %p_result2, i32 2 -; CHECK: %7 = insertelement <4 x float**> %6, float** %p_result3, i32 3 -; CHECK: store <4 x float**> %7, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align +; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW:#[0-9]+]] +; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: %0 = insertelement <4 x float**> undef, float** %p_result, i32 0 +; CHECK: %1 = insertelement <4 x float**> %0, float** %p_result1, i32 1 +; CHECK: %2 = insertelement <4 x float**> %1, float** %p_result2, i32 2 +; CHECK: %3 = insertelement <4 x float**> %2, float** %p_result3, i32 3 +; CHECK: store <4 x float**> %3, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align ; CHECK: attributes [[NUW]] = { nounwind } Index: polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll +++ polly/trunk/test/Isl/CodeGen/simple_vec_cast.ll @@ -28,8 +28,10 @@ ret void } -; CHECK: %tmp_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8, !alias.scope !0, !noalias !2 -; CHECK: %tmp_p_splat = shufflevector <1 x float> %tmp_p_splat_one, <1 x float> %tmp_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = fpext <4 x float> %tmp_p_splat to <4 x double> -; CHECK: store <4 x double> %0, <4 x double>* bitcast ([1024 x double]* @B to <4 x double>*), align 8, !alias.scope !3, !noalias !4 +; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0) +; CHECK: polly.stmt.bb2: ; preds = %polly.start +; CHECK: %tmp_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0 +; CHECK: %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: %0 = fpext <4 x float> %tmp_p.splat to <4 x double> +; CHECK: store <4 x double> %0, <4 x double>* Index: polly/trunk/test/Isl/CodeGen/simple_vec_const.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/simple_vec_const.ll +++ polly/trunk/test/Isl/CodeGen/simple_vec_const.ll @@ -52,5 +52,8 @@ } -; CHECK: load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*) -; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer +; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0) + +; CHECK: polly.stmt.: ; preds = %polly.start +; CHECK: %_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0 +; CHECK: %_p.splat = shufflevector <4 x float> %_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer Index: polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll +++ polly/trunk/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll @@ -22,6 +22,9 @@ return: ret void } -; CHECK: %value_p_splat_one = load <1 x float**>, <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: store <4 x float**> %value_p_splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8 +; CHECK: %.load = load float**, float*** getelementptr inbounds ([1024 x float**], [1024 x float**]* @A, i32 0, i32 0) + +; CHECK-NOT: load <1 x float**> +; CHECK: %value_p.splatinsert = insertelement <4 x float**> undef, float** %.load, i32 0 +; CHECK: %value_p.splat = shufflevector <4 x float**> %value_p.splatinsert, <4 x float**> undef, <4 x i32> zeroinitializer +; CHECK: store <4 x float**> %value_p.splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8 Index: polly/trunk/test/Isl/CodeGen/two-scops-in-row.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/two-scops-in-row.ll +++ polly/trunk/test/Isl/CodeGen/two-scops-in-row.ll @@ -21,6 +21,7 @@ for.0: %Scalar0.val = load i32, i32* %Scalar0 + store i32 1, i32* %Scalar0 br i1 false, label %for.0, label %for.1.preheader for.1.preheader: Index: polly/trunk/test/ScopInfo/inter_bb_scalar_dep.ll =================================================================== --- polly/trunk/test/ScopInfo/inter_bb_scalar_dep.ll +++ polly/trunk/test/ScopInfo/inter_bb_scalar_dep.ll @@ -14,6 +14,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; Function Attrs: nounwind +; CHECK: Invariant +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_init_ptr[0] + define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: br label %for.i @@ -25,11 +29,7 @@ entry.next: ; preds = %for.i %init = load i64, i64* %init_ptr -; CHECK-LABEL: Stmt_entry_next -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; +; CHECK-NOT: Stmt_entry_next br label %for.j for.j: ; preds = %for.j, %entry.next Index: polly/trunk/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll =================================================================== --- polly/trunk/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ polly/trunk/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -14,7 +14,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -; Function Attrs: nounwind +; CHECK: Invariant Accesses: { +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: MemRef_init_ptr[0] +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: MemRef_init_ptr[0] +; CHECK: } define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: br label %for.i @@ -26,23 +31,17 @@ entry.next: ; preds = %for.i %init = load i64, i64* %init_ptr -; CHECK-LABEL: Stmt_entry_next -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; +; CHECK-NOT: Stmt_entry_next br label %for.j for.j: ; preds = %for.j, %entry.next %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] %init_2 = load i64, i64* %init_ptr %init_sum = add i64 %init, %init_2 -; CHECK-LABEL: Stmt_for_j +; CHECK: Stmt_for_j ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] }; -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; %scevgep = getelementptr i64, i64* %A, i64 %indvar.j store i64 %init_sum, i64* %scevgep Index: polly/trunk/test/ScopInfo/intra_bb_scalar_dep.ll =================================================================== --- polly/trunk/test/ScopInfo/intra_bb_scalar_dep.ll +++ polly/trunk/test/ScopInfo/intra_bb_scalar_dep.ll @@ -14,6 +14,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; Function Attrs: nounwind +; CHECK: Invariant Accesses: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: br label %for.i @@ -32,11 +35,12 @@ %init_plus_two = add i64 %init, 2 %scevgep = getelementptr i64, i64* %A, i64 %indvar.j store i64 %init_plus_two, i64* %scevgep -; CHECK-LABEL: Stmt_for_j -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_j +; CHECK-NOT: ReadAccess +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; +; CHECK-NEXT: } %indvar.j.next = add nsw i64 %indvar.j, 1 %exitcond.j = icmp eq i64 %indvar.j.next, %N br i1 %exitcond.j, label %for.i.end, label %for.j Index: polly/trunk/test/ScopInfo/invariant_load.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load.ll +++ polly/trunk/test/ScopInfo/invariant_load.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Invariant Accesses: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_B[0] }; +; +; void f(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i] = *B; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %B) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %B, align 4 + %tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} Index: polly/trunk/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll +++ polly/trunk/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-detect-unprofitable -analyze < %s | FileCheck %s +; +; CHECK: Invariant Accesses: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_bb5[i0] -> MemRef_BP[0] }; +; CHECK-NEXT: Execution Context: [N] -> { : N >= 514 } +; +; void f(int *BP, int *A, int N) { +; for (int i = 0; i < N; i++) +; if (i > 512) +; A[i] = *BP; +; else +; A[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %BP, i32* %A, i32 %N) { +bb: + %tmp = sext i32 %N to i64 + br label %bb1 + +bb1: ; preds = %bb11, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ] + %tmp2 = icmp slt i64 %indvars.iv, %tmp + br i1 %tmp2, label %bb3, label %bb12 + +bb3: ; preds = %bb1 + %tmp4 = icmp sgt i64 %indvars.iv, 512 + br i1 %tmp4, label %bb5, label %bb8 + +bb5: ; preds = %bb3 + %tmp9a = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %inv = load i32, i32 *%BP + store i32 %inv, i32* %tmp9a, align 4 + br label %bb10 + +bb8: ; preds = %bb3 + %tmp9b = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp9b, align 4 + br label %bb10 + +bb10: ; preds = %bb8, %bb5 + br label %bb11 + +bb11: ; preds = %bb10 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb12: ; preds = %bb1 + ret void +} Index: polly/trunk/test/ScopInfo/tempscop-printing.ll =================================================================== --- polly/trunk/test/ScopInfo/tempscop-printing.ll +++ polly/trunk/test/ScopInfo/tempscop-printing.ll @@ -14,6 +14,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; CHECK-LABEL: Function: f +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_init_ptr[0] +; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 } + define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { entry: br label %for.i @@ -24,12 +28,8 @@ br label %entry.next entry.next: -; CHECK: Stmt_entry_next +; CHECK-NOT: Stmt_entry_next %init = load i64, i64* %init_ptr -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; br label %for.j for.j: @@ -55,6 +55,9 @@ } ; CHECK-LABEL: Function: g +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_init_ptr[0] +; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 } define void @g(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { entry: br label %for.i @@ -65,12 +68,8 @@ br label %entry.next entry.next: -; CHECK: Stmt_entry_next +; CHECK-NOT: Stmt_entry_next %init = load i64, i64* %init_ptr -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; br label %for.j for.j: